📚 The CoCalc Library - books, templates and other resources
License: OTHER
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2010 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67import sys8import gzip9import os1011class Record(object):12"""Represents a record."""1314class Respondent(Record):15"""Represents a respondent."""1617class Pregnancy(Record):18"""Represents a pregnancy."""1920class Table(object):21"""Represents a table as a list of objects"""2223def __init__(self):24self.records = []2526def __len__(self):27return len(self.records)2829def ReadFile(self, data_dir, filename, fields, constructor, n=None):30"""Reads a compressed data file builds one object per record.3132Args:33data_dir: string directory name34filename: string name of the file to read3536fields: sequence of (name, start, end, case) tuples specifying37the fields to extract3839constructor: what kind of object to create40"""41filename = os.path.join(data_dir, filename)4243if filename.endswith('gz'):44fp = gzip.open(filename)45else:46fp = open(filename)4748for i, line in enumerate(fp):49if i == n:50break51record = self.MakeRecord(line, fields, constructor)52self.AddRecord(record)53fp.close()5455def MakeRecord(self, line, fields, constructor):56"""Scans a line and returns an object with the appropriate fields.5758Args:59line: string line from a data file6061fields: sequence of (name, start, end, cast) tuples specifying62the fields to extract6364constructor: callable that makes an object for the record.6566Returns:67Record with appropriate fields.68"""69obj = constructor()70for (field, start, end, cast) in fields:71try:72s = line[start-1:end]73val = cast(s)74except ValueError:75#print line76#print field, start, end, s77val = 'NA'78setattr(obj, field, val)79return obj8081def AddRecord(self, record):82"""Adds a record to this table.8384Args:85record: an object of one of the record types.86"""87self.records.append(record)8889def ExtendRecords(self, records):90"""Adds records to this table.9192Args:93records: a sequence of record object94"""95self.records.extend(records)9697def Recode(self):98"""Child classes can override this to recode values."""99pass100101102class Respondents(Table):103"""Represents the respondent table."""104105def ReadRecords(self, data_dir='.', n=None):106filename = self.GetFilename()107self.ReadFile(data_dir, filename, self.GetFields(), Respondent, n)108self.Recode()109110def GetFilename(self):111return '2002FemResp.dat.gz'112113def GetFields(self):114"""Returns a tuple specifying the fields to extract.115116The elements of the tuple are field, start, end, case.117118field is the name of the variable119start and end are the indices as specified in the NSFG docs120cast is a callable that converts the result to int, float, etc.121"""122return [123('caseid', 1, 12, int),124]125126class Pregnancies(Table):127"""Contains survey data about a Pregnancy."""128129def ReadRecords(self, data_dir='.', n=None):130filename = self.GetFilename()131self.ReadFile(data_dir, filename, self.GetFields(), Pregnancy, n)132self.Recode()133134def GetFilename(self):135return '2002FemPreg.dat.gz'136137def GetFields(self):138"""Gets information about the fields to extract from the survey data.139140Documentation of the fields for Cycle 6 is at141http://nsfg.icpsr.umich.edu/cocoon/WebDocs/NSFG/public/index.htm142143Returns:144sequence of (name, start, end, type) tuples145"""146return [147('caseid', 1, 12, int),148('nbrnaliv', 22, 22, int),149('babysex', 56, 56, int),150('birthwgt_lb', 57, 58, int),151('birthwgt_oz', 59, 60, int),152('prglength', 275, 276, int),153('outcome', 277, 277, int),154('birthord', 278, 279, int),155('agepreg', 284, 287, int),156('finalwgt', 423, 440, float),157]158159def Recode(self):160for rec in self.records:161162# divide mother's age by 100163try:164if rec.agepreg != 'NA':165rec.agepreg /= 100.0166except AttributeError:167pass168169# convert weight at birth from lbs/oz to total ounces170# note: there are some very low birthweights171# that are almost certainly errors, but for now I am not172# filtering173try:174if (rec.birthwgt_lb != 'NA' and rec.birthwgt_lb < 20 and175rec.birthwgt_oz != 'NA' and rec.birthwgt_oz <= 16):176rec.totalwgt_oz = rec.birthwgt_lb * 16 + rec.birthwgt_oz177else:178rec.totalwgt_oz = 'NA'179except AttributeError:180pass181182183def main(name, data_dir='.'):184resp = Respondents()185resp.ReadRecords(data_dir)186print 'Number of respondents', len(resp.records)187188preg = Pregnancies()189preg.ReadRecords(data_dir)190print 'Number of pregnancies', len(preg.records)191192193if __name__ == '__main__':194main(*sys.argv)195196197