Repository for a workshop on Bayesian statistics
"""This file contains code used in "Think Bayes",1by Allen B. Downey, available from greenteapress.com23Copyright 2012 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import csv1011import thinkbayes12import thinkplot131415def ReadScale(filename='sat_scale.csv', col=2):16"""Reads a CSV file of SAT scales (maps from raw score to standard score).1718Args:19filename: string filename20col: which column to start with (0=Reading, 2=Math, 4=Writing)2122Returns: thinkbayes.Interpolator object23"""24def ParseRange(s):25t = [int(x) for x in s.split('-')]26return 1.0 * sum(t) / len(t)2728fp = open(filename)29reader = csv.reader(fp)30raws = []31scores = []3233for t in reader:34try:35raw = int(t[col])36raws.append(raw)37score = ParseRange(t[col+1])38scores.append(score)39except:40pass4142raws.sort()43scores.sort()44return thinkbayes.Interpolator(raws, scores)454647def ReadRanks(filename='sat_ranks.csv'):48"""Reads a CSV file of SAT scores.4950Args:51filename: string filename5253Returns:54list of (score, freq) pairs55"""56fp = open(filename)57reader = csv.reader(fp)58res = []5960for t in reader:61try:62score = int(t[0])63freq = int(t[1])64res.append((score, freq))65except ValueError:66pass6768return res697071def DivideValues(pmf, denom):72"""Divides the values in a Pmf by denom.7374Returns a new Pmf.75"""76new = thinkbayes.Pmf()77denom = float(denom)78for val, prob in pmf.Items():79x = val / denom80new.Set(x, prob)81return new828384class Exam(object):85"""Encapsulates information about an exam.8687Contains the distribution of scaled scores and an88Interpolator that maps between scaled and raw scores.89"""90def __init__(self):91self.scale = ReadScale()9293scores = ReadRanks()94score_pmf = thinkbayes.MakePmfFromDict(dict(scores))9596self.raw = self.ReverseScale(score_pmf)97self.max_score = max(self.raw.Values())98self.prior = DivideValues(self.raw, denom=self.max_score)99100def Lookup(self, raw):101"""Looks up a raw score and returns a scaled score."""102return self.scale.Lookup(raw)103104def Reverse(self, score):105"""Looks up a scaled score and returns a raw score.106107Since we ignore the penalty, negative scores round up to zero.108"""109raw = self.scale.Reverse(score)110return raw if raw > 0 else 0111112def ReverseScale(self, pmf):113"""Applies the reverse scale to the values of a PMF.114115Args:116pmf: Pmf object117scale: Interpolator object118119Returns:120new Pmf121"""122new = thinkbayes.Pmf()123for val, prob in pmf.Items():124raw = self.Reverse(val)125new.Incr(raw, prob)126return new127128129class Sat(thinkbayes.Suite):130"""Represents the distribution of efficacy for a test-taker."""131132def __init__(self, exam):133thinkbayes.Suite.__init__(self)134135self.exam = exam136137# start with the prior distribution138for x, prob in exam.prior.Items():139self.Set(x, prob)140141def Likelihood(self, data, hypo):142"""Computes the likelihood of a test score, given x."""143x = hypo144score = data145raw = self.exam.Reverse(score)146147yes, no = raw, self.exam.max_score - raw148like = x**yes * (1-x)**no149return like150151152def PmfProbGreater(pmf1, pmf2):153"""Probability that a value from pmf1 is less than a value from pmf2.154155Args:156pmf1: Pmf object157pmf2: Pmf object158159Returns:160float probability161"""162total = 0.0163for x1, p1 in pmf1.Items():164for x2, p2 in pmf2.Items():165# Fill this in!166pass167168return total169170171def main():172exam = Exam()173174alice = Sat(exam)175alice.label = 'alice'176alice.Update(780)177178bob = Sat(exam)179bob.label = 'bob'180bob.Update(760)181182print('Prob Alice is "smarter":', PmfProbGreater(alice, bob))183print('Prob Bob is "smarter":', PmfProbGreater(bob, alice))184185thinkplot.PrePlot(2)186thinkplot.Pdfs([alice, bob])187thinkplot.Show(xlabel='x',188ylabel='Probability',189loc='upper left',190xlim=[0.7, 1.02])191192193if __name__ == '__main__':194main()195196197