#!/usr/bin/env python from letters import file2prob, file2pairs from math import log def entropy(dist): """ Calculate the entropy of a probability distribution. The probability distribution is given as a dictionary where the keys are the support and the values are the probabilities """ return -sum(p*log(p, 2) for p in dist.values() if p > 0) if __name__ == '__main__': import sys if len(sys.argv) != 2: print("Usage: %s filename" % sys.argv[0]) sys.exit(-1) filename = sys.argv[1] probs = file2prob(filename) conds = file2pairs(filename) cc = sum([ [probs[c]*d for d in conds[c].values()] for c in conds ], []) centropy = -sum(p*log(p,2) for p in cc if p > 0) print(entropy(probs)/-log(1/len(probs),2), centropy/-log(1/len(cc),2))