📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This module contains a code example related to12Think Python, 2nd Edition3by Allen Downey4http://thinkpython2.com56Copyright 2015 Allen Downey78License: http://creativecommons.org/licenses/by/4.0/9"""1011from __future__ import print_function, division1213import sys14import string15import random1617# global variables18suffix_map = {} # map from prefixes to a list of suffixes19prefix = () # current tuple of words202122def process_file(filename, order=2):23"""Reads a file and performs Markov analysis.2425filename: string26order: integer number of words in the prefix2728returns: map from prefix to list of possible suffixes.29"""30fp = open(filename)31skip_gutenberg_header(fp)3233for line in fp:34if line.startswith('*** END OF THIS'):35break3637for word in line.rstrip().split():38process_word(word, order)394041def skip_gutenberg_header(fp):42"""Reads from fp until it finds the line that ends the header.4344fp: open file object45"""46for line in fp:47if line.startswith('*** START OF THIS'):48break495051def process_word(word, order=2):52"""Processes each word.5354word: string55order: integer5657During the first few iterations, all we do is store up the words;58after that we start adding entries to the dictionary.59"""60global prefix61if len(prefix) < order:62prefix += (word,)63return6465try:66suffix_map[prefix].append(word)67except KeyError:68# if there is no entry for this prefix, make one69suffix_map[prefix] = [word]7071prefix = shift(prefix, word)727374def random_text(n=100):75"""Generates random wordsfrom the analyzed text.7677Starts with a random prefix from the dictionary.7879n: number of words to generate80"""81# choose a random prefix (not weighted by frequency)82start = random.choice(list(suffix_map.keys()))8384for i in range(n):85suffixes = suffix_map.get(start, None)86if suffixes == None:87# if the start isn't in map, we got to the end of the88# original text, so we have to start again.89random_text(n-i)90return9192# choose a random suffix93word = random.choice(suffixes)94print(word, end=' ')95start = shift(start, word)969798def shift(t, word):99"""Forms a new tuple by removing the head and adding word to the tail.100101t: tuple of strings102word: string103104Returns: tuple of strings105"""106return t[1:] + (word,)107108109def main(script, filename='158-0.txt', n=100, order=2):110try:111n = int(n)112order = int(order)113except ValueError:114print('Usage: %d filename [# of words] [prefix length]' % script)115else:116process_file(filename, order)117random_text(n)118print()119120121if __name__ == '__main__':122main(*sys.argv)123124125