Skip to content
Snippets Groups Projects
assignment-3b.py 2.37 KiB
Newer Older
import random   # library for generating random numbers
import nltk     # library for working with human language 
from nltk.tokenize import word_tokenize     # A function from NLTK for breaking words down
from nltk import ConditionalFreqDist   # A class from NLTK for representing the conditional frequency distribution of a set.
nltk.download('punkt')      # models used by word_tokenize to tokenize words.

def probs_model(text):  # takes text as input
    words = word_tokenize(text)  # split words
    bigrams = list(nltk.bigrams(words))  # pairs of consecutive words
    cond_freq_dist = ConditionalFreqDist(bigrams)  # conditional frequency of the 2 words
    return cond_freq_dist

# probs_model is the representation of the conditional probs.

def generate_next_word(model, initial_word):  # takes in the representation of the conditional probs and the initial word
    next_words = model[initial_word]  # randomly selects a next word based on the conditional probs
    if not next_words:
        return None  # in case there are no next words

    next_word = random.choices(list(next_words))[0]
    return next_word

if __name__ == '__main__':
    input_text =  "In the sweet town of Candyland, there lived a marshmallow named Mallow. Mallow had a unique passion ? a love for Alan Turing's work on computers and artificial intelligence. Instead of bouncing with other candies, Mallow spent its days reading Turing's papers and dreaming of marshmallow-powered machines. Mallow's friends couldn't quite understand its fascination, but they embraced Mallow's uniqueness. One day, Mallow surprised everyone by creating a tiny marshmallow computer that could solve candy puzzles. The town marveled at Mallow's ingenuity, and Mallow's love for Turing's work became a source of inspiration for Candyland. And so, Mallow, the marshmallow with a Turing twist, continued to blend sweetness with technology, making Candyland a tastier and smarter place."
    # Reads the content of republic.txt and stores it in the variable input_text.

    model = probs_model(input_text)

    # Selects any word and generates the next word based on conditional probabilities
    initial_word = "Alan"  
    generated_next_word = generate_next_word(model, initial_word)

    # Print the input text and the next word.
    print("Input text:", input_text)
    print(f"Given seed word: '{initial_word}', Generated next word: {generated_next_word}")