#!/usr/bin/env python letters = 'abcdefghijklmnopqrstuvwxyz ' def file2prob(filename): """ Read a file and return a dictionary of letters and their probabilities """ letter_dict = { c: 0 for c in letters } letter_total = 0 with open(filename, encoding="utf-8") as fp: for c in fp.read(): if c.lower() not in letter_dict: continue letter_dict[c.lower()] += 1 letter_total += 1 probs = { c: letter_dict[c]/letter_total for c in letter_dict } return probs def file2pairs(filename): """ Read a file and return a dictionary of letters and the probabilities of following letters. That is, the conditional probability of a letter given its predecessor. """ letter_dict = { c: { a: 0 for a in letters } for c in letters } previous = None with open(filename, encdoing="utf-8") as fp: for c in fp.read(): if c not in letter_dict: continue c = c.lower() if previous is None: previous = c continue letter_dict[previous][c] += 1 previous = c probs = { c: { d: letter_dict[c][d]/sum(letter_dict[c].values()) for d in letters } for c in letters } return probs if __name__ == '__main__': import sys if len(sys.argv) != 3: print("Usage: %s filename letter" % sys.argv[0]) sys.exit(-1) filename = sys.argv[1] letter = sys.argv[2] probs = file2prob(filename) print(probs[letter])