import random # built-in library for generating random numbers import nltk # library for working with human language from nltk.tokenize import word_tokenize # A function from NLTK for breaking words down from nltk import FreqDist # A class from NLTK for representing the frequency distribution of a set. nltk.download('punkt') # models used by word_tokenize to tokenize words. def calc_word_probs(text): # this takes a text as input, words = word_tokenize(text) # tokenizes it into words, word_counts = FreqDist(words) # then it calculates the frequency distribution of them, total_words = len(words) # then it computes the probability of each word in said text. probs = {word: count / total_words for word, count in word_counts.items()} return probs # This line is creating a Python dictionary ({}), # where each word in the text is a key, # and the corresponding value is the probability of that word occurring in the given text. # it iterates over each item in the word_counts dictionary. # For each item, it splits the tuple into two variables; word and count. # The word is used as the key, and the value is calculated by dividing # the count of that word by the total number of words in the text. def generate_word(probs): possible_outcomes = list(probs.keys()) # get the words (keys) from the probabilities dictionary and convert them into a list. (support) likely_usage = list(probs.values()) # get the probabilities (values) from the probabilities dictionary and convert them into a list. (weights). generated_word = random.choices(possible_outcomes, likely_usage)[0] #randomly choose a word from the list of words (support) based on their probabilities (weights). return generated_word if __name__ == '__main__': with open('republic.txt', 'r', encoding='utf-8') as file: input_text = file.read() word_probs= calc_word_probs(input_text) generated_word = generate_word(word_probs) print("Input text:", input_text) print("Generated word:", generated_word) # read the content of republic.txt and store it in the variable input_text. # calculate the word probabilities using the calc_word_probs function. # generate a random word based on the computed probabilities using the generate_word function # print both the input text and the generated word.