📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2010 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import math8import random910import thinkstats111213def Cov(xs, ys, mux=None, muy=None):14"""Computes Cov(X, Y).1516Args:17xs: sequence of values18ys: sequence of values19mux: optional float mean of xs20muy: optional float mean of ys2122Returns:23Cov(X, Y)24"""25if mux is None:26mux = thinkstats.Mean(xs)27if muy is None:28muy = thinkstats.Mean(ys)2930total = 0.031for x, y in zip(xs, ys):32total += (x-mux) * (y-muy)3334return total / len(xs)353637def Corr(xs, ys):38"""Computes Corr(X, Y).3940Args:41xs: sequence of values42ys: sequence of values4344Returns:45Corr(X, Y)46"""47xbar, varx = thinkstats.MeanVar(xs)48ybar, vary = thinkstats.MeanVar(ys)4950corr = Cov(xs, ys, xbar, ybar) / math.sqrt(varx * vary)5152return corr535455def SerialCorr(xs):56"""Computes the serial correlation of a sequence."""57return Corr(xs[:-1], xs[1:])585960def SpearmanCorr(xs, ys):61"""Computes Spearman's rank correlation.6263Args:64xs: sequence of values65ys: sequence of values6667Returns:68float Spearman's correlation69"""70xranks = MapToRanks(xs)71yranks = MapToRanks(ys)72return Corr(xranks, yranks)737475def LeastSquares(xs, ys):76"""Computes a linear least squares fit for ys as a function of xs.7778Args:79xs: sequence of values80ys: sequence of values8182Returns:83tuple of (intercept, slope)84"""85xbar, varx = thinkstats.MeanVar(xs)86ybar, vary = thinkstats.MeanVar(ys)8788slope = Cov(xs, ys, xbar, ybar) / varx89inter = ybar - slope * xbar9091return inter, slope929394def FitLine(xs, inter, slope):95"""Returns the fitted line for the range of xs.9697xs: x values used for the fit98slope: estimated slope99inter: estimated intercept100"""101fxs = min(xs), max(xs)102fys = [x * slope + inter for x in fxs]103return fxs, fys104105106def Residuals(xs, ys, inter, slope):107"""Computes residuals for a linear fit with parameters inter and slope.108109Args:110xs: independent variable111ys: dependent variable112inter: float intercept113slope: float slope114115Returns:116list of residuals117"""118res = [y - inter - slope*x for x, y in zip(xs, ys)]119return res120121122def CoefDetermination(ys, res):123"""Computes the coefficient of determination (R^2) for given residuals.124125Args:126ys: dependent variable127res: residuals128129Returns:130float coefficient of determination131"""132ybar, vary = thinkstats.MeanVar(ys)133resbar, varres = thinkstats.MeanVar(res)134return 1 - varres / vary135136137def MapToRanks(t):138"""Returns a list of ranks corresponding to the elements in t.139140Args:141t: sequence of numbers142143Returns:144list of integer ranks, starting at 1145"""146# pair up each value with its index147pairs = enumerate(t)148149# sort by value150sorted_pairs = sorted(pairs, key=lambda pair: pair[1])151152# pair up each pair with its rank153ranked = enumerate(sorted_pairs)154155# sort by index156resorted = sorted(ranked, key=lambda trip: trip[1][0])157158# extract the ranks159ranks = [trip[0]+1 for trip in resorted]160return ranks161162163def CorrelatedGenerator(rho):164"""Generates standard normal variates with correlation.165166rho: target coefficient of correlation167168Returns: iterable169"""170x = random.gauss(0, 1)171yield x172173sigma = math.sqrt(1 - rho**2);174while True:175x = random.gauss(x * rho, sigma)176yield x177178179def CorrelatedNormalGenerator(mu, sigma, rho):180"""Generates normal variates with correlation.181182mu: mean of variate183sigma: standard deviation of variate184rho: target coefficient of correlation185186Returns: iterable187"""188for x in CorrelatedGenerator(rho):189yield x * sigma + mu190191192def main():193pass194195196if __name__ == '__main__':197main()198199200