📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import thinkplot8import thinkstats291011def BiasPmf(pmf, name, invert=False):12"""Returns the Pmf with oversampling proportional to value.1314If pmf is the distribution of true values, the result is the15distribution that would be seen if values are oversampled in16proportion to their values; for example, if you ask students17how big their classes are, large classes are oversampled in18proportion to their size.1920If invert=True, computes in inverse operation; for example,21unbiasing a sample collected from students.2223Args:24pmf: Pmf object.25name: string name for the new Pmf.26invert: boolean2728Returns:29Pmf object30"""31new_pmf = pmf.Copy(name=name)3233for x, p in pmf.Items():34if invert:35new_pmf.Mult(x, 1.0/x)36else:37new_pmf.Mult(x, x)3839new_pmf.Normalize()40return new_pmf414243def UnbiasPmf(pmf, name):44"""Returns the Pmf with oversampling proportional to 1/value.4546Args:47pmf: Pmf object.48name: string name for the new Pmf.4950Returns:51Pmf object52"""53return BiasPmf(pmf, name, invert=True)545556def ClassSizes():5758# start with the actual distribution of class sizes from the book59d = {607: 8,6112: 8,6217: 14,6322: 4,6427: 6,6532: 12,6637: 8,6742: 3,6847: 2,69}7071# form the pmf72pmf = thinkstats2.MakePmfFromDict(d, 'actual')73print 'mean', pmf.Mean()74print 'var', pmf.Var()7576# compute the biased pmf77biased_pmf = BiasPmf(pmf, 'observed')78print 'mean', biased_pmf.Mean()79print 'var', biased_pmf.Var()8081# unbias the biased pmf82unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased')83print 'mean', unbiased_pmf.Mean()84print 'var', unbiased_pmf.Var()8586# plot the Pmfs87thinkplot.Pmfs([pmf, biased_pmf])88thinkplot.Show(xlabel='Class size',89ylabel='PMF')909192def main():93ClassSizes()949596if __name__ == '__main__':97main()9899100