📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2010 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import math8import sys9import survey10import thinkstats111213class Respondents(survey.Table):14"""Represents the respondent table."""1516def ReadRecords(self, data_dir='.', n=None):17filename = self.GetFilename()18self.ReadFile(data_dir,19filename,20self.GetFields(),21survey.Respondent,22n)23self.Recode()2425def GetFilename(self):26"""Get the name of the data file.2728This function can be overridden by child classes.2930The BRFSS data is available from thinkstats.com/CDBRFS08.ASC.gz3132"""33return 'CDBRFS08.ASC.gz'3435def GetFields(self):36"""Returns a tuple specifying the fields to extract.3738BRFSS codebook39http://www.cdc.gov/brfss/technical_infodata/surveydata/2008.htm4041The elements of the tuple are field, start, end, case.4243field is the name of the variable44start and end are the indices as specified in the NSFG docs45case is a callable that converts the result to int, float, etc.46"""47return [48('age', 101, 102, int),49('weight2', 119, 122, int),50('wtyrago', 127, 130, int),51('wtkg2', 1254, 1258, int),52('htm3', 1251, 1253, int),53('sex', 143, 143, int),54]5556def Recode(self):57"""Recode variables that need cleaning."""5859def CleanWeight(weight):60if weight in [7777, 9999]:61return 'NA'62elif weight < 1000:63return weight / 2.264elif 9000 < weight < 9999:65return weight - 900066else:67return weight6869for rec in self.records:70# recode wtkg271if rec.wtkg2 in ['NA', 99999]:72rec.wtkg2 = 'NA'73else:74rec.wtkg2 /= 100.07576# recode wtyrago77rec.weight2 = CleanWeight(rec.weight2)78rec.wtyrago = CleanWeight(rec.wtyrago)7980# recode htm381if rec.htm3 == 999:82rec.htm3 = 'NA'8384# recode age85if rec.age in [7, 9]:86rec.age = 'NA'878889def SummarizeHeight(self):90"""Print summary statistics for male and female height."""9192# make a dictionary that maps from gender code to list of heights93d = {1:[], 2:[], 'all':[]}94[d[r.sex].append(r.htm3) for r in self.records if r.htm3 != 'NA']95[d['all'].append(r.htm3) for r in self.records if r.htm3 != 'NA']9697print 'Height (cm):'98print 'key n mean var sigma cv'99for key, t in d.iteritems():100mu, var = thinkstats.TrimmedMeanVar(t)101sigma = math.sqrt(var)102cv = sigma / mu103print key, len(t), mu, var, sigma, cv104105return d106107def SummarizeWeight(self):108"""Print summary statistics for male and female weight."""109110# make a dictionary that maps from gender code to list of weights111d = {1:[], 2:[], 'all':[]}112[d[r.sex].append(r.weight2) for r in self.records if r.weight2 != 'NA']113[d['all'].append(r.weight2) for r in self.records if r.weight2 != 'NA']114115print 'Weight (kg):'116print 'key n mean var sigma cv'117for key, t in d.iteritems():118mu, var = thinkstats.TrimmedMeanVar(t)119sigma = math.sqrt(var)120cv = sigma / mu121print key, len(t), mu, var, sigma, cv122123124def SummarizeWeightChange(self):125"""Print the mean reported change in weight in kg."""126127data = [(r.weight2, r.wtyrago) for r in self.records128if r.weight2 != 'NA' and r.wtyrago != 'NA']129130changes = [(curr - prev) for curr, prev in data]131132print 'Mean change', thinkstats.Mean(changes)133134135def main(name, data_dir='.'):136resp = Respondents()137resp.ReadRecords(data_dir)138resp.SummarizeHeight()139resp.SummarizeWeight()140resp.SummarizeWeightChange()141142if __name__ == '__main__':143main(*sys.argv)144145146