Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

Repository for a workshop on Bayesian statistics

1430 views
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2013 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import thinkbayes
11
import thinkplot
12
13
import numpy
14
15
"""
16
Problem: students sign up to participate in a community service
17
project. Some fraction, q, of the students who sign up actually
18
participate, and of those some fraction, r, report back.
19
20
Given a sample of students who sign up and the number who report
21
back, we can estimate the product q*r, but don't learn much about
22
q and r separately.
23
24
If we can get a smaller sample of students where we know who
25
participated and who reported, we can use that to improve the
26
estimates of q and r.
27
28
And we can use that to compute the posterior distribution of the
29
number of students who participated.
30
31
"""
32
33
class Volunteer(thinkbayes.Suite):
34
35
def Likelihood(self, data, hypo):
36
"""Computes the likelihood of the data under the hypothesis.
37
38
hypo: pair of (q, r)
39
data: one of two possible formats
40
"""
41
if len(data) == 2:
42
return self.Likelihood1(data, hypo)
43
elif len(data) == 3:
44
return self.Likelihood2(data, hypo)
45
else:
46
raise ValueError()
47
48
def Likelihood1(self, data, hypo):
49
"""Computes the likelihood of the data under the hypothesis.
50
51
hypo: pair of (q, r)
52
data: tuple (signed up, reported)
53
"""
54
q, r = hypo
55
p = q * r
56
signed_up, reported = data
57
yes = reported
58
no = signed_up - reported
59
60
like = p**yes * (1-p)**no
61
return like
62
63
def Likelihood2(self, data, hypo):
64
"""Computes the likelihood of the data under the hypothesis.
65
66
hypo: pair of (q, r)
67
data: tuple (signed up, participated, reported)
68
"""
69
q, r = hypo
70
71
signed_up, participated, reported = data
72
73
yes = participated
74
no = signed_up - participated
75
like1 = q**yes * (1-q)**no
76
77
yes = reported
78
no = participated - reported
79
like2 = r**yes * (1-r)**no
80
81
return like1 * like2
82
83
84
def MarginalDistribution(suite, index):
85
"""Extracts the marginal distribution of one parameter.
86
87
suite: Suite
88
index: which parameter
89
90
returns: Pmf
91
"""
92
pmf = thinkbayes.Pmf()
93
for t, prob in suite.Items():
94
pmf.Incr(t[index], prob)
95
return pmf
96
97
98
def MarginalProduct(suite):
99
"""Extracts the distribution of the product of the parameters.
100
101
suite: Suite
102
103
returns: Pmf
104
"""
105
pmf = thinkbayes.Pmf()
106
for (q, r), prob in suite.Items():
107
pmf.Incr(q*r, prob)
108
return pmf
109
110
111
def main():
112
probs = numpy.linspace(0, 1, 101)
113
114
hypos = []
115
for q in probs:
116
for r in probs:
117
hypos.append((q, r))
118
119
suite = Volunteer(hypos)
120
121
# update the Suite with the larger sample of students who
122
# signed up and reported
123
data = 140, 50
124
suite.Update(data)
125
126
# update again with the smaller sample of students who signed
127
# up, participated, and reported
128
data = 5, 3, 1
129
suite.Update(data)
130
131
#p_marginal = MarginalProduct(suite)
132
q_marginal = MarginalDistribution(suite, 0)
133
r_marginal = MarginalDistribution(suite, 1)
134
135
thinkplot.Pmf(q_marginal, label='q')
136
thinkplot.Pmf(r_marginal, label='r')
137
#thinkplot.Pmf(p_marginal)
138
139
thinkplot.Save(root='volunteer1',
140
xlabel='fraction participating/reporting',
141
ylabel='PMF',
142
formats=['png']
143
)
144
145
146
if __name__ == '__main__':
147
main()
148
149