Repository for a workshop on Bayesian statistics
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2013 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import thinkbayes10import thinkplot1112import numpy1314"""15Problem: students sign up to participate in a community service16project. Some fraction, q, of the students who sign up actually17participate, and of those some fraction, r, report back.1819Given a sample of students who sign up and the number who report20back, we can estimate the product q*r, but don't learn much about21q and r separately.2223If we can get a smaller sample of students where we know who24participated and who reported, we can use that to improve the25estimates of q and r.2627And we can use that to compute the posterior distribution of the28number of students who participated.2930"""3132class Volunteer(thinkbayes.Suite):3334def Likelihood(self, data, hypo):35"""Computes the likelihood of the data under the hypothesis.3637hypo: pair of (q, r)38data: one of two possible formats39"""40if len(data) == 2:41return self.Likelihood1(data, hypo)42elif len(data) == 3:43return self.Likelihood2(data, hypo)44else:45raise ValueError()4647def Likelihood1(self, data, hypo):48"""Computes the likelihood of the data under the hypothesis.4950hypo: pair of (q, r)51data: tuple (signed up, reported)52"""53q, r = hypo54p = q * r55signed_up, reported = data56yes = reported57no = signed_up - reported5859like = p**yes * (1-p)**no60return like6162def Likelihood2(self, data, hypo):63"""Computes the likelihood of the data under the hypothesis.6465hypo: pair of (q, r)66data: tuple (signed up, participated, reported)67"""68q, r = hypo6970signed_up, participated, reported = data7172yes = participated73no = signed_up - participated74like1 = q**yes * (1-q)**no7576yes = reported77no = participated - reported78like2 = r**yes * (1-r)**no7980return like1 * like2818283def MarginalDistribution(suite, index):84"""Extracts the marginal distribution of one parameter.8586suite: Suite87index: which parameter8889returns: Pmf90"""91pmf = thinkbayes.Pmf()92for t, prob in suite.Items():93pmf.Incr(t[index], prob)94return pmf959697def MarginalProduct(suite):98"""Extracts the distribution of the product of the parameters.99100suite: Suite101102returns: Pmf103"""104pmf = thinkbayes.Pmf()105for (q, r), prob in suite.Items():106pmf.Incr(q*r, prob)107return pmf108109110def main():111probs = numpy.linspace(0, 1, 101)112113hypos = []114for q in probs:115for r in probs:116hypos.append((q, r))117118suite = Volunteer(hypos)119120# update the Suite with the larger sample of students who121# signed up and reported122data = 140, 50123suite.Update(data)124125# update again with the smaller sample of students who signed126# up, participated, and reported127data = 5, 3, 1128suite.Update(data)129130#p_marginal = MarginalProduct(suite)131q_marginal = MarginalDistribution(suite, 0)132r_marginal = MarginalDistribution(suite, 1)133134thinkplot.Pmf(q_marginal, label='q')135thinkplot.Pmf(r_marginal, label='r')136#thinkplot.Pmf(p_marginal)137138thinkplot.Save(root='volunteer1',139xlabel='fraction participating/reporting',140ylabel='PMF',141formats=['png']142)143144145if __name__ == '__main__':146main()147148149