Upload New File

eae85ee8 · Bayan Alkhuzaei CS2023 · ec4339f6 · eae85ee8
Commit eae85ee8 authored 1 year ago by Bayan Alkhuzaei CS2023
--- a/assignment-3a.py
+++ b/assignment-3a.py
+import random   # built-in library for generating random numbers
+import nltk     # library for working with human language 
+from nltk.tokenize import word_tokenize     # A function from NLTK for breaking words down
+from nltk import FreqDist   # A class from NLTK for representing the frequency distribution of a set.
+nltk.download('punkt')      # models used by word_tokenize to tokenize words.
+def calc_word_probs(text):          # this takes a text as input,
+    words = word_tokenize(text)     # tokenizes it into words, 
+    word_counts = FreqDist(words)   # then it calculates the frequency distribution of them, 
+    total_words = len(words)        # then it computes the probability of each word in said text.
+    probs = {word: count / total_words for word, count in word_counts.items()}
+    return probs
+# This line is creating a Python dictionary ({}),
+# where each word in the text is a key, 
+# and the corresponding value is the probability of that word occurring in the given text.
+# it iterates over each item in the word_counts dictionary.
+# For each item, it splits the tuple into two variables; word and count.
+# The word is used as the key, and the value is calculated by dividing 
+# the count of that word by the total number of words in the text. 
+def generate_word(probs):
+    possible_outcomes = list(probs.keys())  # get the words (keys) from the probabilities dictionary and convert them into a list. (support)
+    likely_usage = list(probs.values())     # get the probabilities (values) from the probabilities dictionary and convert them into a list. (weights). 
+    generated_word = random.choices(possible_outcomes, likely_usage)[0] #randomly choose a word from the list of words (support) based on their probabilities (weights). 
+    return generated_word
+if __name__ == '__main__':
+    with open('republic.txt', 'r', encoding='utf-8') as file:
+        input_text = file.read()
+    word_probs= calc_word_probs(input_text)
+    generated_word = generate_word(word_probs)
+    print("Input text:", input_text)
+    print("Generated word:", generated_word)
+# read the content of republic.txt and store it in the variable input_text.
+# calculate the word probabilities using the calc_word_probs function.
+# generate a random word based on the computed probabilities using the generate_word function
+# print both the input text and the generated word.
\ No newline at end of file