Skip to content
Snippets Groups Projects
Commit 4fd0e316 authored by Bayan Alkhuzaei CS2023's avatar Bayan Alkhuzaei CS2023
Browse files

new

parent 377dc2ee
No related branches found
No related tags found
No related merge requests found
import random # built-in library for generating random numbers
import nltk # library for working with human language
from nltk.tokenize import word_tokenize # A function from NLTK for breaking words down
from nltk import FreqDist # A class from NLTK for representing the frequency distribution of a set.
nltk.download('punkt') # models used by word_tokenize to tokenize words.
def calc_word_probs(text): # this takes a text as input,
words = word_tokenize(text) # tokenizes it into words,
word_counts = FreqDist(words) # then it calculates the frequency distribution of them,
total_words = len(words) # then it computes the probability of each word in said text.
probs = {word: count / total_words for word, count in word_counts.items()}
return probs
# This line is creating a Python dictionary ({}),
# where each word in the text is a key,
# and the corresponding value is the probability of that word occurring in the given text.
# it iterates over each item in the word_counts dictionary.
# For each item, it splits the tuple into two variables; word and count.
# The word is used as the key, and the value is calculated by dividing
# the count of that word by the total number of words in the text.
def generate_word(probs):
possible_outcomes = list(probs.keys()) # get the words (keys) from the probabilities dictionary and convert them into a list. (support)
likely_usage = list(probs.values()) # get the probabilities (values) from the probabilities dictionary and convert them into a list. (weights).
generated_word = random.choices(possible_outcomes, likely_usage)[0] #randomly choose a word from the list of words (support) based on their probabilities (weights).
return generated_word
if __name__ == '__main__':
with open('republic.txt', 'r', encoding='utf-8') as file:
input_text = file.read()
word_probs= calc_word_probs(input_text)
generated_word = generate_word(word_probs)
print("Input text:", input_text)
print("Generated word:", generated_word)
# read the content of republic.txt and store it in the variable input_text.
# calculate the word probabilities using the calc_word_probs function.
# generate a random word based on the computed probabilities using the generate_word function
# print both the input text and the generated word.
\ No newline at end of file
import random # library for generating random numbers
import nltk # library for working with human language
from nltk.tokenize import word_tokenize # A function from NLTK for breaking words down
from nltk import ConditionalFreqDist # A class from NLTK for representing the conditional frequency distribution of a set.
nltk.download('punkt') # models used by word_tokenize to tokenize words.
def probs_model(text): # takes text as input
words = word_tokenize(text) # split words
bigrams = list(nltk.bigrams(words)) # pairs of consecutive words
cond_freq_dist = ConditionalFreqDist(bigrams) # conditional frequency of the 2 words
return cond_freq_dist
# probs_model is the representation of the conditional probs.
def generate_next_word(model, initial_word): # takes in the representation of the conditional probs and the initial word
next_words = model[initial_word] # randomly selects a next word based on the conditional probs
if not next_words:
return None # in case there are no next words
next_word = random.choices(list(next_words))[0]
return next_word
if __name__ == '__main__':
input_text = "In the sweet town of Candyland, there lived a marshmallow named Mallow. Mallow had a unique passion ? a love for Alan Turing's work on computers and artificial intelligence. Instead of bouncing with other candies, Mallow spent its days reading Turing's papers and dreaming of marshmallow-powered machines. Mallow's friends couldn't quite understand its fascination, but they embraced Mallow's uniqueness. One day, Mallow surprised everyone by creating a tiny marshmallow computer that could solve candy puzzles. The town marveled at Mallow's ingenuity, and Mallow's love for Turing's work became a source of inspiration for Candyland. And so, Mallow, the marshmallow with a Turing twist, continued to blend sweetness with technology, making Candyland a tastier and smarter place."
# Reads the content of republic.txt and stores it in the variable input_text.
model = probs_model(input_text)
# Selects any word and generates the next word based on conditional probabilities
initial_word = "Alan"
generated_next_word = generate_next_word(model, initial_word)
# Print the input text and the next word.
print("Input text:", input_text)
print(f"Given seed word: '{initial_word}', Generated next word: {generated_next_word}")
import random # library for generating random numbers
import nltk # library for working with human language
from nltk.tokenize import word_tokenize # A function from NLTK for breaking words down
from nltk import ConditionalFreqDist # A class from NLTK for representing the conditional frequency distribution of a set.
nltk.download('punkt') # models used by word_tokenize to tokenize words.
def probs_model(text): # takes text as input
words = word_tokenize(text) # split words
bigrams = list(nltk.bigrams(words)) # pairs of consecutive words
cond_freq_dist = ConditionalFreqDist(bigrams) # conditional frequency of the 2 words
return cond_freq_dist
# probs_model is the representation of the conditional probs.
def generate_sentence(model, initial_word, length): # takes in the representation of the conditional probs, seed word, and sentence length
sentence = [initial_word]
#Iterates to generate the next word based on the conditional probabilities until the sentence length is reached.
for _ in range(length - 1):
next_words = model[sentence[-1]]
if not next_words:
break # If there are no next words, end the sentence
next_word = random.choice(list(next_words))
sentence.append(next_word)
return ' '.join(sentence) #combine the generated words into a single string
if __name__ == '__main__':
input_text = "In the sweet town of Candyland, there lived a marshmallow named Mallow. Mallow had a unique passion ? a love for Alan Turing's work on computers and artificial intelligence. Instead of bouncing with other candies, Mallow spent its days reading Turing's papers and dreaming of marshmallow-powered machines. Mallow's friends couldn't quite understand its fascination, but they embraced Mallow's uniqueness. One day, Mallow surprised everyone by creating a tiny marshmallow computer that could solve candy puzzles. The town marveled at Mallow's ingenuity, and Mallow's love for Turing's work became a source of inspiration for Candyland. And so, Mallow, the marshmallow with a Turing twist, continued to blend sweetness with technology, making Candyland a tastier and smarter place."
model = probs_model(input_text)
seed_word = "Alan" # Select any word as the starting point
sentence_length = 15 # desired sentence length
generated_sentence = generate_sentence(model, seed_word, sentence_length)
# Print the input text and the generated sentence.
print("Input text:", input_text)
print(f"Given seed word: '{seed_word}', Generated sentence: {generated_sentence}")
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment