new

4fd0e316 · Bayan Alkhuzaei CS2023 · 377dc2ee · 4fd0e316 · 4fd0e316 · 4fd0e316
Commit 4fd0e316 authored 1 year ago by Bayan Alkhuzaei CS2023
--- a/assi-3/assignment-3a.py
+++ b/assi-3/assignment-3a.py
+import random   # built-in library for generating random numbers
+import nltk     # library for working with human language 
+from nltk.tokenize import word_tokenize     # A function from NLTK for breaking words down
+from nltk import FreqDist   # A class from NLTK for representing the frequency distribution of a set.
+nltk.download('punkt')      # models used by word_tokenize to tokenize words.
+
+
+def calc_word_probs(text):          # this takes a text as input,
+    words = word_tokenize(text)     # tokenizes it into words, 
+    word_counts = FreqDist(words)   # then it calculates the frequency distribution of them, 
+    total_words = len(words)        # then it computes the probability of each word in said text.
+
+    probs = {word: count / total_words for word, count in word_counts.items()}
+    return probs
+
+# This line is creating a Python dictionary ({}),
+# where each word in the text is a key, 
+# and the corresponding value is the probability of that word occurring in the given text.
+# it iterates over each item in the word_counts dictionary.
+# For each item, it splits the tuple into two variables; word and count.
+# The word is used as the key, and the value is calculated by dividing 
+# the count of that word by the total number of words in the text. 
+
+def generate_word(probs):
+    possible_outcomes = list(probs.keys())  # get the words (keys) from the probabilities dictionary and convert them into a list. (support)
+    likely_usage = list(probs.values())     # get the probabilities (values) from the probabilities dictionary and convert them into a list. (weights). 
+    
+    generated_word = random.choices(possible_outcomes, likely_usage)[0] #randomly choose a word from the list of words (support) based on their probabilities (weights). 
+    return generated_word
+
+if __name__ == '__main__':
+    with open('republic.txt', 'r', encoding='utf-8') as file:
+        input_text = file.read()
+
+    word_probs= calc_word_probs(input_text)
+    generated_word = generate_word(word_probs)
+
+    print("Input text:", input_text)
+    print("Generated word:", generated_word)
+
+# read the content of republic.txt and store it in the variable input_text.
+# calculate the word probabilities using the calc_word_probs function.
+# generate a random word based on the computed probabilities using the generate_word function
+# print both the input text and the generated word.
\ No newline at end of file
--- a/assi-3/assignment-3b.py
+++ b/assi-3/assignment-3b.py
+import random   # library for generating random numbers
+import nltk     # library for working with human language 
+from nltk.tokenize import word_tokenize     # A function from NLTK for breaking words down
+from nltk import ConditionalFreqDist   # A class from NLTK for representing the conditional frequency distribution of a set.
+nltk.download('punkt')      # models used by word_tokenize to tokenize words.
+
+def probs_model(text):  # takes text as input
+    words = word_tokenize(text)  # split words
+    bigrams = list(nltk.bigrams(words))  # pairs of consecutive words
+    cond_freq_dist = ConditionalFreqDist(bigrams)  # conditional frequency of the 2 words
+    return cond_freq_dist
+
+# probs_model is the representation of the conditional probs.
+
+def generate_next_word(model, initial_word):  # takes in the representation of the conditional probs and the initial word
+    next_words = model[initial_word]  # randomly selects a next word based on the conditional probs
+    if not next_words:
+        return None  # in case there are no next words
+
+    next_word = random.choices(list(next_words))[0]
+    return next_word
+
+if __name__ == '__main__':
+    input_text =  "In the sweet town of Candyland, there lived a marshmallow named Mallow. Mallow had a unique passion ? a love for Alan Turing's work on computers and artificial intelligence. Instead of bouncing with other candies, Mallow spent its days reading Turing's papers and dreaming of marshmallow-powered machines. Mallow's friends couldn't quite understand its fascination, but they embraced Mallow's uniqueness. One day, Mallow surprised everyone by creating a tiny marshmallow computer that could solve candy puzzles. The town marveled at Mallow's ingenuity, and Mallow's love for Turing's work became a source of inspiration for Candyland. And so, Mallow, the marshmallow with a Turing twist, continued to blend sweetness with technology, making Candyland a tastier and smarter place."
+    # Reads the content of republic.txt and stores it in the variable input_text.
+
+    model = probs_model(input_text)
+
+    # Selects any word and generates the next word based on conditional probabilities
+    initial_word = "Alan"  
+    generated_next_word = generate_next_word(model, initial_word)
+
+    # Print the input text and the next word.
+    print("Input text:", input_text)
+    print(f"Given seed word: '{initial_word}', Generated next word: {generated_next_word}")
--- a/assi-3/assignment-3c.py
+++ b/assi-3/assignment-3c.py
+import random   # library for generating random numbers
+import nltk     # library for working with human language 
+from nltk.tokenize import word_tokenize     # A function from NLTK for breaking words down
+from nltk import ConditionalFreqDist   # A class from NLTK for representing the conditional frequency distribution of a set.
+nltk.download('punkt')      # models used by word_tokenize to tokenize words.
+
+def probs_model(text):  # takes text as input
+    words = word_tokenize(text)  # split words
+    bigrams = list(nltk.bigrams(words))  # pairs of consecutive words
+    cond_freq_dist = ConditionalFreqDist(bigrams)  # conditional frequency of the 2 words
+    return cond_freq_dist
+
+# probs_model is the representation of the conditional probs.
+
+def generate_sentence(model, initial_word, length):  # takes in the representation of the conditional probs, seed word, and sentence length
+    sentence = [initial_word]
+
+#Iterates to generate the next word based on the conditional probabilities until the sentence length is reached.
+    for _ in range(length - 1):
+        next_words = model[sentence[-1]]
+        if not next_words:
+            break  # If there are no next words, end the sentence
+        next_word = random.choice(list(next_words))
+        sentence.append(next_word)
+
+    return ' '.join(sentence) #combine the generated words into a single string
+
+if __name__ == '__main__':
+    input_text = "In the sweet town of Candyland, there lived a marshmallow named Mallow. Mallow had a unique passion ? a love for Alan Turing's work on computers and artificial intelligence. Instead of bouncing with other candies, Mallow spent its days reading Turing's papers and dreaming of marshmallow-powered machines. Mallow's friends couldn't quite understand its fascination, but they embraced Mallow's uniqueness. One day, Mallow surprised everyone by creating a tiny marshmallow computer that could solve candy puzzles. The town marveled at Mallow's ingenuity, and Mallow's love for Turing's work became a source of inspiration for Candyland. And so, Mallow, the marshmallow with a Turing twist, continued to blend sweetness with technology, making Candyland a tastier and smarter place."
+
+    model = probs_model(input_text)
+
+    seed_word = "Alan"  # Select any word as the starting point
+    sentence_length = 15  # desired sentence length
+
+    generated_sentence = generate_sentence(model, seed_word, sentence_length)
+
+    # Print the input text and the generated sentence.
+    print("Input text:", input_text)
+    print(f"Given seed word: '{seed_word}', Generated sentence: {generated_sentence}")
--- a/assi-3/republic.txt
+++ b/assi-3/republic.txt