Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Repository for a workshop on Bayesian statistics

1430 views
1
"""This file contains code used in "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import csv
11
12
import thinkbayes
13
import thinkplot
14
15
16
def ReadScale(filename='sat_scale.csv', col=2):
17
"""Reads a CSV file of SAT scales (maps from raw score to standard score).
18
19
Args:
20
filename: string filename
21
col: which column to start with (0=Reading, 2=Math, 4=Writing)
22
23
Returns: thinkbayes.Interpolator object
24
"""
25
def ParseRange(s):
26
t = [int(x) for x in s.split('-')]
27
return 1.0 * sum(t) / len(t)
28
29
fp = open(filename)
30
reader = csv.reader(fp)
31
raws = []
32
scores = []
33
34
for t in reader:
35
try:
36
raw = int(t[col])
37
raws.append(raw)
38
score = ParseRange(t[col+1])
39
scores.append(score)
40
except:
41
pass
42
43
raws.sort()
44
scores.sort()
45
return thinkbayes.Interpolator(raws, scores)
46
47
48
def ReadRanks(filename='sat_ranks.csv'):
49
"""Reads a CSV file of SAT scores.
50
51
Args:
52
filename: string filename
53
54
Returns:
55
list of (score, freq) pairs
56
"""
57
fp = open(filename)
58
reader = csv.reader(fp)
59
res = []
60
61
for t in reader:
62
try:
63
score = int(t[0])
64
freq = int(t[1])
65
res.append((score, freq))
66
except ValueError:
67
pass
68
69
return res
70
71
72
def DivideValues(pmf, denom):
73
"""Divides the values in a Pmf by denom.
74
75
Returns a new Pmf.
76
"""
77
new = thinkbayes.Pmf()
78
denom = float(denom)
79
for val, prob in pmf.Items():
80
x = val / denom
81
new.Set(x, prob)
82
return new
83
84
85
class Exam(object):
86
"""Encapsulates information about an exam.
87
88
Contains the distribution of scaled scores and an
89
Interpolator that maps between scaled and raw scores.
90
"""
91
def __init__(self):
92
self.scale = ReadScale()
93
94
scores = ReadRanks()
95
score_pmf = thinkbayes.MakePmfFromDict(dict(scores))
96
97
self.raw = self.ReverseScale(score_pmf)
98
self.max_score = max(self.raw.Values())
99
self.prior = DivideValues(self.raw, denom=self.max_score)
100
101
def Lookup(self, raw):
102
"""Looks up a raw score and returns a scaled score."""
103
return self.scale.Lookup(raw)
104
105
def Reverse(self, score):
106
"""Looks up a scaled score and returns a raw score.
107
108
Since we ignore the penalty, negative scores round up to zero.
109
"""
110
raw = self.scale.Reverse(score)
111
return raw if raw > 0 else 0
112
113
def ReverseScale(self, pmf):
114
"""Applies the reverse scale to the values of a PMF.
115
116
Args:
117
pmf: Pmf object
118
scale: Interpolator object
119
120
Returns:
121
new Pmf
122
"""
123
new = thinkbayes.Pmf()
124
for val, prob in pmf.Items():
125
raw = self.Reverse(val)
126
new.Incr(raw, prob)
127
return new
128
129
130
class Sat(thinkbayes.Suite):
131
"""Represents the distribution of efficacy for a test-taker."""
132
133
def __init__(self, exam):
134
thinkbayes.Suite.__init__(self)
135
136
self.exam = exam
137
138
# start with the prior distribution
139
for x, prob in exam.prior.Items():
140
self.Set(x, prob)
141
142
def Likelihood(self, data, hypo):
143
"""Computes the likelihood of a test score, given x."""
144
x = hypo
145
score = data
146
raw = self.exam.Reverse(score)
147
148
yes, no = raw, self.exam.max_score - raw
149
like = x**yes * (1-x)**no
150
return like
151
152
153
def PmfProbGreater(pmf1, pmf2):
154
"""Probability that a value from pmf1 is less than a value from pmf2.
155
156
Args:
157
pmf1: Pmf object
158
pmf2: Pmf object
159
160
Returns:
161
float probability
162
"""
163
total = 0.0
164
for x1, p1 in pmf1.Items():
165
for x2, p2 in pmf2.Items():
166
if x1 > x2:
167
total += p1 * p2
168
169
return total
170
171
172
def main():
173
exam = Exam()
174
175
alice = Sat(exam)
176
alice.label = 'alice'
177
alice.Update(780)
178
179
bob = Sat(exam)
180
bob.label = 'bob'
181
bob.Update(760)
182
183
print('Prob Alice is "smarter":', PmfProbGreater(alice, bob))
184
print('Prob Bob is "smarter":', PmfProbGreater(bob, alice))
185
186
thinkplot.PrePlot(2)
187
thinkplot.Pdfs([alice, bob])
188
thinkplot.Show(xlabel='x',
189
ylabel='Probability',
190
loc='upper left',
191
xlim=[0.7, 1.02])
192
193
194
if __name__ == '__main__':
195
main()
196
197