Skip to content
Snippets Groups Projects
Commit eae85ee8 authored by Bayan Alkhuzaei CS2023's avatar Bayan Alkhuzaei CS2023
Browse files

Upload New File

parent ec4339f6
No related branches found
No related tags found
No related merge requests found
import random # built-in library for generating random numbers
import nltk # library for working with human language
from nltk.tokenize import word_tokenize # A function from NLTK for breaking words down
from nltk import FreqDist # A class from NLTK for representing the frequency distribution of a set.
nltk.download('punkt') # models used by word_tokenize to tokenize words.
def calc_word_probs(text): # this takes a text as input,
words = word_tokenize(text) # tokenizes it into words,
word_counts = FreqDist(words) # then it calculates the frequency distribution of them,
total_words = len(words) # then it computes the probability of each word in said text.
probs = {word: count / total_words for word, count in word_counts.items()}
return probs
# This line is creating a Python dictionary ({}),
# where each word in the text is a key,
# and the corresponding value is the probability of that word occurring in the given text.
# it iterates over each item in the word_counts dictionary.
# For each item, it splits the tuple into two variables; word and count.
# The word is used as the key, and the value is calculated by dividing
# the count of that word by the total number of words in the text.
def generate_word(probs):
possible_outcomes = list(probs.keys()) # get the words (keys) from the probabilities dictionary and convert them into a list. (support)
likely_usage = list(probs.values()) # get the probabilities (values) from the probabilities dictionary and convert them into a list. (weights).
generated_word = random.choices(possible_outcomes, likely_usage)[0] #randomly choose a word from the list of words (support) based on their probabilities (weights).
return generated_word
if __name__ == '__main__':
with open('republic.txt', 'r', encoding='utf-8') as file:
input_text = file.read()
word_probs= calc_word_probs(input_text)
generated_word = generate_word(word_probs)
print("Input text:", input_text)
print("Generated word:", generated_word)
# read the content of republic.txt and store it in the variable input_text.
# calculate the word probabilities using the calc_word_probs function.
# generate a random word based on the computed probabilities using the generate_word function
# print both the input text and the generated word.
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment