📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This module contains a code example related to12Think Python, 2nd Edition3by Allen Downey4http://thinkpython2.com56Copyright 2015 Allen Downey78License: http://creativecommons.org/licenses/by/4.0/9"""1011from __future__ import print_function, division121314import sys15import random1617from markov import skip_gutenberg_header, shift181920class Markov:21"""Encapsulates the statistical summary of a text."""2223def __init__(self):24self.suffix_map = {} # map from prefixes to a list of suffixes25self.prefix = () # current tuple of words2627def process_file(self, filename, order=2):28"""Reads a file and performs Markov analysis.2930filename: string31order: integer number of words in the prefix3233Returns: map from prefix to list of possible suffixes.34"""35fp = open(filename)36skip_gutenberg_header(fp)3738for line in fp:39if line.startswith('*** END OF THIS'):40break4142for word in line.rstrip().split():43self.process_word(word, order)4445def process_word(self, word, order=2):46"""Processes each word.4748word: string49order: integer5051During the first few iterations, all we do is store up the words;52after that we start adding entries to the dictionary.53"""54if len(self.prefix) < order:55self.prefix += (word,)56return5758try:59self.suffix_map[self.prefix].append(word)60except KeyError:61# if there is no entry for this prefix, make one62self.suffix_map[self.prefix] = [word]6364self.prefix = shift(self.prefix, word)6566def random_text(self, n=100):67"""Generates random wordsfrom the analyzed text.6869Starts with a random prefix from the dictionary.7071n: number of words to generate72"""73# choose a random prefix (not weighted by frequency)74start = random.choice(list(self.suffix_map.keys()))7576for i in range(n):77suffixes = self.suffix_map.get(start, None)78if suffixes == None:79# if the prefix isn't in map, we got to the end of the80# original text, so we have to start again.81self.random_text(n-i)82return8384# choose a random suffix85word = random.choice(suffixes)86print(word, end=' ')87start = shift(start, word)888990def main(script, filename='158-0.txt', n=100, order=2):91try:92n = int(n)93order = int(order)94except ValueError:95print('Usage: %d filename [# of words] [prefix length]' % script)96else:97markov = Markov()98markov.process_file(filename, order)99markov.random_text(n)100101102if __name__ == '__main__':103main(*sys.argv)104105106107