📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code for use with "Think Bayes",1by Allen B. Downey, available from greenteapress.com23Copyright 2012 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import math89import columns10import thinkbayes11import thinkstats12import thinkplot131415USE_SUMMARY_DATA = True1617class Hockey(thinkbayes.Suite):18"""Represents hypotheses about the scoring rate for a team."""1920def __init__(self, name=''):21"""Initializes the Hockey object.2223name: string24"""25if USE_SUMMARY_DATA:26# prior based on each team's average goals scored27mu = 2.828sigma = 0.329else:30# prior based on each pair-wise match-up31mu = 2.832sigma = 0.853334pmf = thinkbayes.MakeGaussianPmf(mu, sigma, 4)35thinkbayes.Suite.__init__(self, pmf, name=name)3637def Likelihood(self, data, hypo):38"""Computes the likelihood of the data under the hypothesis.3940Evaluates the Poisson PMF for lambda and k.4142hypo: goal scoring rate in goals per game43data: goals scored in one period44"""45lam = hypo46k = data47like = thinkbayes.EvalPoissonPmf(k, lam)48return like495051def MakeGoalPmf(suite, high=10):52"""Makes the distribution of goals scored, given distribution of lam.5354suite: distribution of goal-scoring rate55high: upper bound5657returns: Pmf of goals per game58"""59metapmf = thinkbayes.Pmf()6061for lam, prob in suite.Items():62pmf = thinkbayes.MakePoissonPmf(lam, high)63metapmf.Set(pmf, prob)6465mix = thinkbayes.MakeMixture(metapmf, name=suite.name)66return mix676869def MakeGoalTimePmf(suite):70"""Makes the distribution of time til first goal.7172suite: distribution of goal-scoring rate7374returns: Pmf of goals per game75"""76metapmf = thinkbayes.Pmf()7778for lam, prob in suite.Items():79pmf = thinkbayes.MakeExponentialPmf(lam, high=2, n=2001)80metapmf.Set(pmf, prob)8182mix = thinkbayes.MakeMixture(metapmf, name=suite.name)83return mix848586class Game(object):87"""Represents a game.8889Attributes are set in columns.read_csv.90"""91convert = dict()9293def clean(self):94self.goals = self.pd1 + self.pd2 + self.pd3959697def ReadHockeyData(filename='hockey_data.csv'):98"""Read game scores from the data file.99100filename: string101"""102game_list = columns.read_csv(filename, Game)103104# map from gameID to list of two games105games = {}106for game in game_list:107if game.season != 2011:108continue109key = game.game110games.setdefault(key, []).append(game)111112# map from (team1, team2) to (score1, score2)113pairs = {}114for key, pair in games.iteritems():115t1, t2 = pair116key = t1.team, t2.team117entry = t1.total, t2.total118pairs.setdefault(key, []).append(entry)119120ProcessScoresTeamwise(pairs)121ProcessScoresPairwise(pairs)122123124def ProcessScoresPairwise(pairs):125"""Average number of goals for each team against each opponent.126127pairs: map from (team1, team2) to (score1, score2)128"""129# map from (team1, team2) to list of goals scored130goals_scored = {}131for key, entries in pairs.iteritems():132t1, t2 = key133for entry in entries:134g1, g2 = entry135goals_scored.setdefault((t1, t2), []).append(g1)136goals_scored.setdefault((t2, t1), []).append(g2)137138# make a list of average goals scored139lams = []140for key, goals in goals_scored.iteritems():141if len(goals) < 3:142continue143lam = thinkstats.Mean(goals)144lams.append(lam)145146# make the distribution of average goals scored147cdf = thinkbayes.MakeCdfFromList(lams)148thinkplot.Cdf(cdf)149thinkplot.Show()150151mu, var = thinkstats.MeanVar(lams)152print 'mu, sig', mu, math.sqrt(var)153154print 'BOS v VAN', pairs['BOS', 'VAN']155156157def ProcessScoresTeamwise(pairs):158"""Average number of goals for each team.159160pairs: map from (team1, team2) to (score1, score2)161"""162# map from team to list of goals scored163goals_scored = {}164for key, entries in pairs.iteritems():165t1, t2 = key166for entry in entries:167g1, g2 = entry168goals_scored.setdefault(t1, []).append(g1)169goals_scored.setdefault(t2, []).append(g2)170171# make a list of average goals scored172lams = []173for key, goals in goals_scored.iteritems():174lam = thinkstats.Mean(goals)175lams.append(lam)176177# make the distribution of average goals scored178cdf = thinkbayes.MakeCdfFromList(lams)179thinkplot.Cdf(cdf)180thinkplot.Show()181182mu, var = thinkstats.MeanVar(lams)183print 'mu, sig', mu, math.sqrt(var)184185186def main():187#ReadHockeyData()188#return189190formats = ['pdf', 'eps']191192suite1 = Hockey('bruins')193suite2 = Hockey('canucks')194195thinkplot.Clf()196thinkplot.PrePlot(num=2)197thinkplot.Pmf(suite1)198thinkplot.Pmf(suite2)199thinkplot.Save(root='hockey0',200xlabel='Goals per game',201ylabel='Probability',202formats=formats)203204suite1.UpdateSet([0, 2, 8, 4])205suite2.UpdateSet([1, 3, 1, 0])206207thinkplot.Clf()208thinkplot.PrePlot(num=2)209thinkplot.Pmf(suite1)210thinkplot.Pmf(suite2)211thinkplot.Save(root='hockey1',212xlabel='Goals per game',213ylabel='Probability',214formats=formats)215216217goal_dist1 = MakeGoalPmf(suite1)218goal_dist2 = MakeGoalPmf(suite2)219220thinkplot.Clf()221thinkplot.PrePlot(num=2)222thinkplot.Pmf(goal_dist1)223thinkplot.Pmf(goal_dist2)224thinkplot.Save(root='hockey2',225xlabel='Goals',226ylabel='Probability',227formats=formats)228229time_dist1 = MakeGoalTimePmf(suite1)230time_dist2 = MakeGoalTimePmf(suite2)231232print 'MLE bruins', suite1.MaximumLikelihood()233print 'MLE canucks', suite2.MaximumLikelihood()234235thinkplot.Clf()236thinkplot.PrePlot(num=2)237thinkplot.Pmf(time_dist1)238thinkplot.Pmf(time_dist2)239thinkplot.Save(root='hockey3',240xlabel='Games until goal',241ylabel='Probability',242formats=formats)243244diff = goal_dist1 - goal_dist2245p_win = diff.ProbGreater(0)246p_loss = diff.ProbLess(0)247p_tie = diff.Prob(0)248249print p_win, p_loss, p_tie250251p_overtime = thinkbayes.PmfProbLess(time_dist1, time_dist2)252p_adjust = thinkbayes.PmfProbEqual(time_dist1, time_dist2)253p_overtime += p_adjust / 2254print 'p_overtime', p_overtime255256print p_overtime * p_tie257p_win += p_overtime * p_tie258print 'p_win', p_win259260# win the next two261p_series = p_win**2262263# split the next two, win the third264p_series += 2 * p_win * (1-p_win) * p_win265266print 'p_series', p_series267268269if __name__ == '__main__':270main()271272273