CoCalc -- sat.py

Repository for a workshop on Bayesian statistics
¹⁴³⁰ views
1
"""This file contains code used in "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
from __future__ import print_function, division
9

10
import csv
11

12
import thinkbayes
13
import thinkplot
14

15

16
def ReadScale(filename='sat_scale.csv', col=2):
17
    """Reads a CSV file of SAT scales (maps from raw score to standard score).
18

19
    Args:
20
      filename: string filename
21
      col: which column to start with (0=Reading, 2=Math, 4=Writing)
22

23
    Returns: thinkbayes.Interpolator object
24
    """
25
    def ParseRange(s):
26
        t = [int(x) for x in s.split('-')]
27
        return 1.0 * sum(t) / len(t)
28

29
    fp = open(filename)
30
    reader = csv.reader(fp)
31
    raws = []
32
    scores = []
33

34
    for t in reader:
35
        try:
36
            raw = int(t[col])
37
            raws.append(raw)
38
            score = ParseRange(t[col+1])
39
            scores.append(score)
40
        except:
41
            pass
42

43
    raws.sort()
44
    scores.sort()
45
    return thinkbayes.Interpolator(raws, scores)
46

47

48
def ReadRanks(filename='sat_ranks.csv'):
49
    """Reads a CSV file of SAT scores.
50

51
    Args:
52
      filename: string filename
53

54
    Returns:
55
      list of (score, freq) pairs
56
    """
57
    fp = open(filename)
58
    reader = csv.reader(fp)
59
    res = []
60

61
    for t in reader:
62
        try:
63
            score = int(t[0])
64
            freq = int(t[1])
65
            res.append((score, freq))
66
        except ValueError:
67
            pass
68

69
    return res
70

71

72
def DivideValues(pmf, denom):
73
    """Divides the values in a Pmf by denom.
74

75
    Returns a new Pmf.
76
    """
77
    new = thinkbayes.Pmf()
78
    denom = float(denom)
79
    for val, prob in pmf.Items():
80
        x = val / denom
81
        new.Set(x, prob)
82
    return new
83

84

85
class Exam(object):
86
    """Encapsulates information about an exam.
87

88
    Contains the distribution of scaled scores and an
89
    Interpolator that maps between scaled and raw scores.
90
    """
91
    def __init__(self):
92
        self.scale = ReadScale()
93

94
        scores = ReadRanks()
95
        score_pmf = thinkbayes.MakePmfFromDict(dict(scores))
96

97
        self.raw = self.ReverseScale(score_pmf)
98
        self.max_score = max(self.raw.Values())
99
        self.prior = DivideValues(self.raw, denom=self.max_score)
100

101
    def Lookup(self, raw):
102
        """Looks up a raw score and returns a scaled score."""
103
        return self.scale.Lookup(raw)
104
        
105
    def Reverse(self, score):
106
        """Looks up a scaled score and returns a raw score.
107

108
        Since we ignore the penalty, negative scores round up to zero.
109
        """
110
        raw = self.scale.Reverse(score)
111
        return raw if raw > 0 else 0
112
        
113
    def ReverseScale(self, pmf):
114
        """Applies the reverse scale to the values of a PMF.
115

116
        Args:
117
            pmf: Pmf object
118
            scale: Interpolator object
119

120
        Returns:
121
            new Pmf
122
        """
123
        new = thinkbayes.Pmf()
124
        for val, prob in pmf.Items():
125
            raw = self.Reverse(val)
126
            new.Incr(raw, prob)
127
        return new
128

129

130
class Sat(thinkbayes.Suite):
131
    """Represents the distribution of efficacy for a test-taker."""
132

133
    def __init__(self, exam):
134
        thinkbayes.Suite.__init__(self)
135

136
        self.exam = exam
137

138
        # start with the prior distribution
139
        for x, prob in exam.prior.Items():
140
            self.Set(x, prob)
141

142
    def Likelihood(self, data, hypo):
143
        """Computes the likelihood of a test score, given x."""
144
        x = hypo
145
        score = data
146
        raw = self.exam.Reverse(score)
147

148
        yes, no = raw, self.exam.max_score - raw
149
        like = x**yes * (1-x)**no
150
        return like
151

152

153
def PmfProbGreater(pmf1, pmf2):
154
    """Probability that a value from pmf1 is less than a value from pmf2.
155

156
    Args:
157
        pmf1: Pmf object
158
        pmf2: Pmf object
159

160
    Returns:
161
        float probability
162
    """
163
    total = 0.0
164
    for x1, p1 in pmf1.Items():
165
        for x2, p2 in pmf2.Items():
166
            # Fill this in!
167
            pass
168

169
    return total
170

171

172
def main():
173
    exam = Exam()
174

175
    alice = Sat(exam)
176
    alice.label = 'alice'
177
    alice.Update(780)
178

179
    bob = Sat(exam)
180
    bob.label = 'bob'
181
    bob.Update(760)
182

183
    print('Prob Alice is "smarter":', PmfProbGreater(alice, bob))
184
    print('Prob Bob is "smarter":', PmfProbGreater(bob, alice))
185

186
    thinkplot.PrePlot(2)
187
    thinkplot.Pdfs([alice, bob])
188
    thinkplot.Show(xlabel='x',
189
                   ylabel='Probability',
190
                   loc='upper left',
191
                   xlim=[0.7, 1.02])
192

193

194
if __name__ == '__main__':
195
    main()
196

197
Product

Resources

Company