📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2011 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function89import numpy as np10import random11import scipy.stats1213import thinkstats214import thinkplot1516from collections import Counter171819def ParetoCdf(x, alpha, xmin):20"""Evaluates CDF of the Pareto distribution with parameters alpha, xmin."""21if x < xmin:22return 023return 1 - pow(x / xmin, -alpha)242526def ParetoMedian(xmin, alpha):27"""Computes the median of a Pareto distribution."""28return xmin * pow(2, 1/alpha)293031def MakeParetoCdf():32"""Generates a plot of the CDF of height in Pareto World."""33n = 5034max = 1000.035xs = [max*i/n for i in range(n)]3637xmin = 10038alpha = 1.739ps = [ParetoCdf(x, alpha, xmin) for x in xs]40print('Median', ParetoMedian(xmin, alpha))4142pyplot.clf()43pyplot.plot(xs, ps, linewidth=2)44myplot.Save('pareto_world1',45title = 'Pareto CDF',46xlabel = 'height (cm)',47ylabel = 'CDF',48legend=False)495051def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25):52"""Makes a figure showing the CDF of height in ParetoWorld.5354Compared to a normal distribution.5556xmin: parameter of the Pareto distribution57alpha: parameter of the Pareto distribution58mu: parameter of the Normal distribution59sigma: parameter of the Normal distribution60"""6162t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]63cdf1 = Cdf.MakeCdfFromList(t1, name='pareto')6465t2 = [random.normalvariate(mu, sigma) for i in range(10000)]66cdf2 = Cdf.MakeCdfFromList(t2, name='normal')6768myplot.Clf()69myplot.Cdfs([cdf1, cdf2])70myplot.Save(root='pareto_world2',71title='Pareto World',72xlabel='height (cm)',73ylabel='CDF')747576def TallestPareto(iters=2, n=10000, xmin=100, alpha=1.7):77"""Find the tallest person in Pareto World.7879iters: how many samples to generate80n: how many in each sample81xmin: parameter of the Pareto distribution82alpha: parameter of the Pareto distribution83"""84tallest = 085for i in range(iters):86t = [xmin * random.paretovariate(alpha) for i in range(n)]87tallest = max(max(t), tallest)88return tallest899091def ParetoSample(alpha, xmin, n):92t = scipy.stats.pareto.rvs(b=alpha, size=n) * xmin93return t.round()949596def main():9798counter = Counter()99for i in range(10000):100sample = ParetoSample(1.7, 0.001, 10000)101counter.update(Counter(sample))102103print(len(counter))104return105106pmf = thinkstats2.Pmf(counter)107print('mean', pmf.Mean())108for x, prob in pmf.Largest(10):109print(x)110111cdf = thinkstats2.Cdf(pmf)112thinkplot.Cdf(cdf, complement=True)113thinkplot.Show(xscale='log',114yscale='log')115return116117MakeFigure()118MakeParetoCdf()119print(TallestPareto(iters=2))120121122if __name__ == "__main__":123main()124125126