📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2010 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import math8import sys910import irs1112import Pmf13import Cdf141516def PmfMean(pmf):17total = 0.018for val, p in pmf.Items():19total += p * val20return total212223def PmfMoment(pmf, mean=None, exponent=2):24if mean is None:25mean = PmfMean(pmf)2627total = 0.028for val, p in pmf.Items():29total += p * (val - mean)**exponent30return total313233def RelativeMeanDifference(pmf, mean=None):34if mean is None:35mean = PmfMean(pmf)3637diff = Pmf.Pmf()38for v1, p1 in pmf.Items():39for v2, p2 in pmf.Items():40diff.Incr(abs(v1-v2), p1*p2)4142print PmfMean(diff), mean4344return PmfMean(diff) / mean454647def SummarizeData(pmf, cdf):48mean = PmfMean(pmf)49print 'mean:', mean5051median = cdf.Percentile(50)52print 'median:', median5354fraction_below_mean = cdf.Prob(mean)55print 'fraction below mean:', fraction_below_mean5657m2 = PmfMoment(pmf, mean, 2)58m3 = PmfMoment(pmf, mean, 3)5960sigma = math.sqrt(m2)61print 'sigma:', sigma6263g1 = m3 / m2**(3/2)64print 'skewness:', g16566gp = 3 * (mean - median) / sigma67print 'Pearsons skewness:', gp6869gini = RelativeMeanDifference(pmf) / 270print 'gini', gini7172def main(script, *args):73data = irs.ReadIncomeFile()74hist, pmf, cdf = irs.MakeIncomeDist(data)75SummarizeData(pmf, cdf)767778if __name__ == "__main__":79main(*sys.argv)808182