CoCalc -- thinkbayes.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / think-bayes / code / thinkbayes.py
¹³²⁹²³ views
License: OTHER
1
"""This file contains code for use with "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
"""This file contains class definitions for:
9

10
Hist: represents a histogram (map from values to integer frequencies).
11

12
Pmf: represents a probability mass function (map from values to probs).
13

14
_DictWrapper: private parent class for Hist and Pmf.
15

16
Cdf: represents a discrete cumulative distribution function
17

18
Pdf: represents a continuous probability density function
19

20
"""
21

22
import bisect
23
import copy
24
import logging
25
import math
26
import numpy
27
import random
28

29
import scipy.stats
30
from scipy.special import erf, erfinv, gammaln
31

32
ROOT2 = math.sqrt(2)
33

34
def RandomSeed(x):
35
    """Initialize the random and numpy.random generators.
36

37
    x: int seed
38
    """
39
    random.seed(x)
40
    numpy.random.seed(x)
41
    
42

43
def Odds(p):
44
    """Computes odds for a given probability.
45

46
    Example: p=0.75 means 75 for and 25 against, or 3:1 odds in favor.
47

48
    Note: when p=1, the formula for odds divides by zero, which is
49
    normally undefined.  But I think it is reasonable to define Odds(1)
50
    to be infinity, so that's what this function does.
51

52
    p: float 0-1
53

54
    Returns: float odds
55
    """
56
    if p == 1:
57
        return float('inf')
58
    return p / (1 - p)
59

60

61
def Probability(o):
62
    """Computes the probability corresponding to given odds.
63

64
    Example: o=2 means 2:1 odds in favor, or 2/3 probability
65

66
    o: float odds, strictly positive
67

68
    Returns: float probability
69
    """
70
    return o / (o + 1)
71

72

73
def Probability2(yes, no):
74
    """Computes the probability corresponding to given odds.
75

76
    Example: yes=2, no=1 means 2:1 odds in favor, or 2/3 probability.
77
    
78
    yes, no: int or float odds in favor
79
    """
80
    return float(yes) / (yes + no)
81

82

83
class Interpolator(object):
84
    """Represents a mapping between sorted sequences; performs linear interp.
85

86
    Attributes:
87
        xs: sorted list
88
        ys: sorted list
89
    """
90

91
    def __init__(self, xs, ys):
92
        self.xs = xs
93
        self.ys = ys
94

95
    def Lookup(self, x):
96
        """Looks up x and returns the corresponding value of y."""
97
        return self._Bisect(x, self.xs, self.ys)
98

99
    def Reverse(self, y):
100
        """Looks up y and returns the corresponding value of x."""
101
        return self._Bisect(y, self.ys, self.xs)
102

103
    def _Bisect(self, x, xs, ys):
104
        """Helper function."""
105
        if x <= xs[0]:
106
            return ys[0]
107
        if x >= xs[-1]:
108
            return ys[-1]
109
        i = bisect.bisect(xs, x)
110
        frac = 1.0 * (x - xs[i - 1]) / (xs[i] - xs[i - 1])
111
        y = ys[i - 1] + frac * 1.0 * (ys[i] - ys[i - 1])
112
        return y
113

114

115
class _DictWrapper(object):
116
    """An object that contains a dictionary."""
117

118
    def __init__(self, values=None, name=''):
119
        """Initializes the distribution.
120

121
        hypos: sequence of hypotheses
122
        """
123
        self.name = name
124
        self.d = {}
125

126
        # flag whether the distribution is under a log transform
127
        self.log = False
128

129
        if values is None:
130
            return
131

132
        init_methods = [
133
            self.InitPmf,
134
            self.InitMapping,
135
            self.InitSequence,
136
            self.InitFailure,
137
            ]
138

139
        for method in init_methods:
140
            try:
141
                method(values)
142
                break
143
            except AttributeError:
144
                continue
145

146
        if len(self) > 0:
147
            self.Normalize()
148

149
    def InitSequence(self, values):
150
        """Initializes with a sequence of equally-likely values.
151

152
        values: sequence of values
153
        """
154
        for value in values:
155
            self.Set(value, 1)
156

157
    def InitMapping(self, values):
158
        """Initializes with a map from value to probability.
159

160
        values: map from value to probability
161
        """
162
        for value, prob in values.iteritems():
163
            self.Set(value, prob)
164

165
    def InitPmf(self, values):
166
        """Initializes with a Pmf.
167

168
        values: Pmf object
169
        """
170
        for value, prob in values.Items():
171
            self.Set(value, prob)
172

173
    def InitFailure(self, values):
174
        """Raises an error."""
175
        raise ValueError('None of the initialization methods worked.')
176

177
    def __len__(self):
178
        return len(self.d)
179

180
    def __iter__(self):
181
        return iter(self.d)
182

183
    def iterkeys(self):
184
        return iter(self.d)
185

186
    def __contains__(self, value):
187
        return value in self.d
188

189
    def Copy(self, name=None):
190
        """Returns a copy.
191

192
        Make a shallow copy of d.  If you want a deep copy of d,
193
        use copy.deepcopy on the whole object.
194

195
        Args:
196
            name: string name for the new Hist
197
        """
198
        new = copy.copy(self)
199
        new.d = copy.copy(self.d)
200
        new.name = name if name is not None else self.name
201
        return new
202

203
    def Scale(self, factor):
204
        """Multiplies the values by a factor.
205

206
        factor: what to multiply by
207

208
        Returns: new object
209
        """
210
        new = self.Copy()
211
        new.d.clear()
212

213
        for val, prob in self.Items():
214
            new.Set(val * factor, prob)
215
        return new
216

217
    def Log(self, m=None):
218
        """Log transforms the probabilities.
219
        
220
        Removes values with probability 0.
221

222
        Normalizes so that the largest logprob is 0.
223
        """
224
        if self.log:
225
            raise ValueError("Pmf/Hist already under a log transform")
226
        self.log = True
227

228
        if m is None:
229
            m = self.MaxLike()
230

231
        for x, p in self.d.iteritems():
232
            if p:
233
                self.Set(x, math.log(p / m))
234
            else:
235
                self.Remove(x)
236

237
    def Exp(self, m=None):
238
        """Exponentiates the probabilities.
239

240
        m: how much to shift the ps before exponentiating
241

242
        If m is None, normalizes so that the largest prob is 1.
243
        """
244
        if not self.log:
245
            raise ValueError("Pmf/Hist not under a log transform")
246
        self.log = False
247

248
        if m is None:
249
            m = self.MaxLike()
250

251
        for x, p in self.d.iteritems():
252
            self.Set(x, math.exp(p - m))
253

254
    def GetDict(self):
255
        """Gets the dictionary."""
256
        return self.d
257

258
    def SetDict(self, d):
259
        """Sets the dictionary."""
260
        self.d = d
261

262
    def Values(self):
263
        """Gets an unsorted sequence of values.
264

265
        Note: one source of confusion is that the keys of this
266
        dictionary are the values of the Hist/Pmf, and the
267
        values of the dictionary are frequencies/probabilities.
268
        """
269
        return self.d.keys()
270

271
    def Items(self):
272
        """Gets an unsorted sequence of (value, freq/prob) pairs."""
273
        return self.d.items()
274

275
    def Render(self):
276
        """Generates a sequence of points suitable for plotting.
277

278
        Returns:
279
            tuple of (sorted value sequence, freq/prob sequence)
280
        """
281
        return zip(*sorted(self.Items()))
282

283
    def Print(self):
284
        """Prints the values and freqs/probs in ascending order."""
285
        for val, prob in sorted(self.d.iteritems()):
286
            print val, prob
287

288
    def Set(self, x, y=0):
289
        """Sets the freq/prob associated with the value x.
290

291
        Args:
292
            x: number value
293
            y: number freq or prob
294
        """
295
        self.d[x] = y
296

297
    def Incr(self, x, term=1):
298
        """Increments the freq/prob associated with the value x.
299

300
        Args:
301
            x: number value
302
            term: how much to increment by
303
        """
304
        self.d[x] = self.d.get(x, 0) + term
305

306
    def Mult(self, x, factor):
307
        """Scales the freq/prob associated with the value x.
308

309
        Args:
310
            x: number value
311
            factor: how much to multiply by
312
        """
313
        self.d[x] = self.d.get(x, 0) * factor
314

315
    def Remove(self, x):
316
        """Removes a value.
317

318
        Throws an exception if the value is not there.
319

320
        Args:
321
            x: value to remove
322
        """
323
        del self.d[x]
324

325
    def Total(self):
326
        """Returns the total of the frequencies/probabilities in the map."""
327
        total = sum(self.d.itervalues())
328
        return total
329

330
    def MaxLike(self):
331
        """Returns the largest frequency/probability in the map."""
332
        return max(self.d.itervalues())
333

334

335
class Hist(_DictWrapper):
336
    """Represents a histogram, which is a map from values to frequencies.
337

338
    Values can be any hashable type; frequencies are integer counters.
339
    """
340

341
    def Freq(self, x):
342
        """Gets the frequency associated with the value x.
343

344
        Args:
345
            x: number value
346

347
        Returns:
348
            int frequency
349
        """
350
        return self.d.get(x, 0)
351

352
    def Freqs(self, xs):
353
        """Gets frequencies for a sequence of values."""
354
        return [self.Freq(x) for x in xs]
355

356
    def IsSubset(self, other):
357
        """Checks whether the values in this histogram are a subset of
358
        the values in the given histogram."""
359
        for val, freq in self.Items():
360
            if freq > other.Freq(val):
361
                return False
362
        return True
363

364
    def Subtract(self, other):
365
        """Subtracts the values in the given histogram from this histogram."""
366
        for val, freq in other.Items():
367
            self.Incr(val, -freq)
368

369

370
class Pmf(_DictWrapper):
371
    """Represents a probability mass function.
372
    
373
    Values can be any hashable type; probabilities are floating-point.
374
    Pmfs are not necessarily normalized.
375
    """
376

377
    def Prob(self, x, default=0):
378
        """Gets the probability associated with the value x.
379

380
        Args:
381
            x: number value
382
            default: value to return if the key is not there
383

384
        Returns:
385
            float probability
386
        """
387
        return self.d.get(x, default)
388

389
    def Probs(self, xs):
390
        """Gets probabilities for a sequence of values."""
391
        return [self.Prob(x) for x in xs]
392

393
    def MakeCdf(self, name=None):
394
        """Makes a Cdf."""
395
        return MakeCdfFromPmf(self, name=name)
396

397
    def ProbGreater(self, x):
398
        """Probability that a sample from this Pmf exceeds x.
399

400
        x: number
401

402
        returns: float probability
403
        """
404
        t = [prob for (val, prob) in self.d.iteritems() if val > x]
405
        return sum(t)
406

407
    def ProbLess(self, x):
408
        """Probability that a sample from this Pmf is less than x.
409

410
        x: number
411

412
        returns: float probability
413
        """
414
        t = [prob for (val, prob) in self.d.iteritems() if val < x]
415
        return sum(t)
416

417
    def __lt__(self, obj):
418
        """Less than.
419

420
        obj: number or _DictWrapper
421

422
        returns: float probability
423
        """
424
        if isinstance(obj, _DictWrapper):
425
            return PmfProbLess(self, obj)
426
        else:
427
            return self.ProbLess(obj)
428

429
    def __gt__(self, obj):
430
        """Greater than.
431

432
        obj: number or _DictWrapper
433

434
        returns: float probability
435
        """
436
        if isinstance(obj, _DictWrapper):
437
            return PmfProbGreater(self, obj)
438
        else:
439
            return self.ProbGreater(obj)
440

441
    def __ge__(self, obj):
442
        """Greater than or equal.
443

444
        obj: number or _DictWrapper
445

446
        returns: float probability
447
        """
448
        return 1 - (self < obj)
449

450
    def __le__(self, obj):
451
        """Less than or equal.
452

453
        obj: number or _DictWrapper
454

455
        returns: float probability
456
        """
457
        return 1 - (self > obj)
458

459
    def __eq__(self, obj):
460
        """Equal to.
461

462
        obj: number or _DictWrapper
463

464
        returns: float probability
465
        """
466
        if isinstance(obj, _DictWrapper):
467
            return PmfProbEqual(self, obj)
468
        else:
469
            return self.Prob(obj)
470

471
    def __ne__(self, obj):
472
        """Not equal to.
473

474
        obj: number or _DictWrapper
475

476
        returns: float probability
477
        """
478
        return 1 - (self == obj)
479

480
    def Normalize(self, fraction=1.0):
481
        """Normalizes this PMF so the sum of all probs is fraction.
482

483
        Args:
484
            fraction: what the total should be after normalization
485

486
        Returns: the total probability before normalizing
487
        """
488
        if self.log:
489
            raise ValueError("Pmf is under a log transform")
490

491
        total = self.Total()
492
        if total == 0.0:
493
            raise ValueError('total probability is zero.')
494
            logging.warning('Normalize: total probability is zero.')
495
            return total
496

497
        factor = float(fraction) / total
498
        for x in self.d:
499
            self.d[x] *= factor
500

501
        return total
502

503
    def Random(self):
504
        """Chooses a random element from this PMF.
505

506
        Returns:
507
            float value from the Pmf
508
        """
509
        if len(self.d) == 0:
510
            raise ValueError('Pmf contains no values.')
511

512
        target = random.random()
513
        total = 0.0
514
        for x, p in self.d.iteritems():
515
            total += p
516
            if total >= target:
517
                return x
518

519
        # we shouldn't get here
520
        assert False
521

522
    def Mean(self):
523
        """Computes the mean of a PMF.
524

525
        Returns:
526
            float mean
527
        """
528
        mu = 0.0
529
        for x, p in self.d.iteritems():
530
            mu += p * x
531
        return mu
532

533
    def Var(self, mu=None):
534
        """Computes the variance of a PMF.
535

536
        Args:
537
            mu: the point around which the variance is computed;
538
                if omitted, computes the mean
539

540
        Returns:
541
            float variance
542
        """
543
        if mu is None:
544
            mu = self.Mean()
545

546
        var = 0.0
547
        for x, p in self.d.iteritems():
548
            var += p * (x - mu) ** 2
549
        return var
550

551
    def MaximumLikelihood(self):
552
        """Returns the value with the highest probability.
553

554
        Returns: float probability
555
        """
556
        prob, val = max((prob, val) for val, prob in self.Items())
557
        return val
558

559
    def CredibleInterval(self, percentage=90):
560
        """Computes the central credible interval.
561

562
        If percentage=90, computes the 90% CI.
563

564
        Args:
565
            percentage: float between 0 and 100
566

567
        Returns:
568
            sequence of two floats, low and high
569
        """
570
        cdf = self.MakeCdf()
571
        return cdf.CredibleInterval(percentage)
572

573
    def __add__(self, other):
574
        """Computes the Pmf of the sum of values drawn from self and other.
575

576
        other: another Pmf
577

578
        returns: new Pmf
579
        """
580
        try:
581
            return self.AddPmf(other)
582
        except AttributeError:
583
            return self.AddConstant(other)
584

585
    def AddPmf(self, other):
586
        """Computes the Pmf of the sum of values drawn from self and other.
587

588
        other: another Pmf
589

590
        returns: new Pmf
591
        """
592
        pmf = Pmf()
593
        for v1, p1 in self.Items():
594
            for v2, p2 in other.Items():
595
                pmf.Incr(v1 + v2, p1 * p2)
596
        return pmf
597

598
    def AddConstant(self, other):
599
        """Computes the Pmf of the sum a constant and  values from self.
600

601
        other: a number
602

603
        returns: new Pmf
604
        """
605
        pmf = Pmf()
606
        for v1, p1 in self.Items():
607
            pmf.Set(v1 + other, p1)
608
        return pmf
609

610
    def __sub__(self, other):
611
        """Computes the Pmf of the diff of values drawn from self and other.
612

613
        other: another Pmf
614

615
        returns: new Pmf
616
        """
617
        pmf = Pmf()
618
        for v1, p1 in self.Items():
619
            for v2, p2 in other.Items():
620
                pmf.Incr(v1 - v2, p1 * p2)
621
        return pmf
622

623
    def Max(self, k):
624
        """Computes the CDF of the maximum of k selections from this dist.
625

626
        k: int
627

628
        returns: new Cdf
629
        """
630
        cdf = self.MakeCdf()
631
        cdf.ps = [p ** k for p in cdf.ps]
632
        return cdf
633

634

635
class Joint(Pmf):
636
    """Represents a joint distribution.
637

638
    The values are sequences (usually tuples)
639
    """
640

641
    def Marginal(self, i, name=''):
642
        """Gets the marginal distribution of the indicated variable.
643

644
        i: index of the variable we want
645

646
        Returns: Pmf
647
        """
648
        pmf = Pmf(name=name)
649
        for vs, prob in self.Items():
650
            pmf.Incr(vs[i], prob)
651
        return pmf
652

653
    def Conditional(self, i, j, val, name=''):
654
        """Gets the conditional distribution of the indicated variable.
655

656
        Distribution of vs[i], conditioned on vs[j] = val.
657

658
        i: index of the variable we want
659
        j: which variable is conditioned on
660
        val: the value the jth variable has to have
661

662
        Returns: Pmf
663
        """
664
        pmf = Pmf(name=name)
665
        for vs, prob in self.Items():
666
            if vs[j] != val: continue
667
            pmf.Incr(vs[i], prob)
668

669
        pmf.Normalize()
670
        return pmf
671

672
    def MaxLikeInterval(self, percentage=90):
673
        """Returns the maximum-likelihood credible interval.
674

675
        If percentage=90, computes a 90% CI containing the values
676
        with the highest likelihoods.
677

678
        percentage: float between 0 and 100
679

680
        Returns: list of values from the suite
681
        """
682
        interval = []
683
        total = 0
684

685
        t = [(prob, val) for val, prob in self.Items()]
686
        t.sort(reverse=True)
687

688
        for prob, val in t:
689
            interval.append(val)
690
            total += prob
691
            if total >= percentage / 100.0:
692
                break
693

694
        return interval
695

696

697
def MakeJoint(pmf1, pmf2):
698
    """Joint distribution of values from pmf1 and pmf2.
699

700
    Args:
701
        pmf1: Pmf object
702
        pmf2: Pmf object
703

704
    Returns:
705
        Joint pmf of value pairs
706
    """
707
    joint = Joint()
708
    for v1, p1 in pmf1.Items():
709
        for v2, p2 in pmf2.Items():
710
            joint.Set((v1, v2), p1 * p2)
711
    return joint
712

713

714
def MakeHistFromList(t, name=''):
715
    """Makes a histogram from an unsorted sequence of values.
716

717
    Args:
718
        t: sequence of numbers
719
        name: string name for this histogram
720

721
    Returns:
722
        Hist object
723
    """
724
    hist = Hist(name=name)
725
    [hist.Incr(x) for x in t]
726
    return hist
727

728

729
def MakeHistFromDict(d, name=''):
730
    """Makes a histogram from a map from values to frequencies.
731

732
    Args:
733
        d: dictionary that maps values to frequencies
734
        name: string name for this histogram
735

736
    Returns:
737
        Hist object
738
    """
739
    return Hist(d, name)
740

741

742
def MakePmfFromList(t, name=''):
743
    """Makes a PMF from an unsorted sequence of values.
744

745
    Args:
746
        t: sequence of numbers
747
        name: string name for this PMF
748

749
    Returns:
750
        Pmf object
751
    """
752
    hist = MakeHistFromList(t)
753
    d = hist.GetDict()
754
    pmf = Pmf(d, name)
755
    pmf.Normalize()
756
    return pmf
757

758

759
def MakePmfFromDict(d, name=''):
760
    """Makes a PMF from a map from values to probabilities.
761

762
    Args:
763
        d: dictionary that maps values to probabilities
764
        name: string name for this PMF
765

766
    Returns:
767
        Pmf object
768
    """
769
    pmf = Pmf(d, name)
770
    pmf.Normalize()
771
    return pmf
772

773

774
def MakePmfFromItems(t, name=''):
775
    """Makes a PMF from a sequence of value-probability pairs
776

777
    Args:
778
        t: sequence of value-probability pairs
779
        name: string name for this PMF
780

781
    Returns:
782
        Pmf object
783
    """
784
    pmf = Pmf(dict(t), name)
785
    pmf.Normalize()
786
    return pmf
787

788

789
def MakePmfFromHist(hist, name=None):
790
    """Makes a normalized PMF from a Hist object.
791

792
    Args:
793
        hist: Hist object
794
        name: string name
795

796
    Returns:
797
        Pmf object
798
    """
799
    if name is None:
800
        name = hist.name
801

802
    # make a copy of the dictionary
803
    d = dict(hist.GetDict())
804
    pmf = Pmf(d, name)
805
    pmf.Normalize()
806
    return pmf
807

808

809
def MakePmfFromCdf(cdf, name=None):
810
    """Makes a normalized Pmf from a Cdf object.
811

812
    Args:
813
        cdf: Cdf object
814
        name: string name for the new Pmf
815

816
    Returns:
817
        Pmf object
818
    """
819
    if name is None:
820
        name = cdf.name
821

822
    pmf = Pmf(name=name)
823

824
    prev = 0.0
825
    for val, prob in cdf.Items():
826
        pmf.Incr(val, prob - prev)
827
        prev = prob
828

829
    return pmf
830

831

832
def MakeMixture(metapmf, name='mix'):
833
    """Make a mixture distribution.
834

835
    Args:
836
      metapmf: Pmf that maps from Pmfs to probs.
837
      name: string name for the new Pmf.
838

839
    Returns: Pmf object.
840
    """
841
    mix = Pmf(name=name)
842
    for pmf, p1 in metapmf.Items():
843
        for x, p2 in pmf.Items():
844
            mix.Incr(x, p1 * p2)
845
    return mix
846

847

848
def MakeUniformPmf(low, high, n):
849
    """Make a uniform Pmf.
850

851
    low: lowest value (inclusive)
852
    high: highest value (inclusize)
853
    n: number of values
854
    """
855
    pmf = Pmf()
856
    for x in numpy.linspace(low, high, n):
857
        pmf.Set(x, 1)
858
    pmf.Normalize()
859
    return pmf
860

861

862
class Cdf(object):
863
    """Represents a cumulative distribution function.
864

865
    Attributes:
866
        xs: sequence of values
867
        ps: sequence of probabilities
868
        name: string used as a graph label.
869
    """
870

871
    def __init__(self, xs=None, ps=None, name=''):
872
        self.xs = [] if xs is None else xs
873
        self.ps = [] if ps is None else ps
874
        self.name = name
875

876
    def Copy(self, name=None):
877
        """Returns a copy of this Cdf.
878

879
        Args:
880
            name: string name for the new Cdf
881
        """
882
        if name is None:
883
            name = self.name
884
        return Cdf(list(self.xs), list(self.ps), name)
885

886
    def MakePmf(self, name=None):
887
        """Makes a Pmf."""
888
        return MakePmfFromCdf(self, name=name)
889

890
    def Values(self):
891
        """Returns a sorted list of values.
892
        """
893
        return self.xs
894

895
    def Items(self):
896
        """Returns a sorted sequence of (value, probability) pairs.
897

898
        Note: in Python3, returns an iterator.
899
        """
900
        return zip(self.xs, self.ps)
901

902
    def Append(self, x, p):
903
        """Add an (x, p) pair to the end of this CDF.
904

905
        Note: this us normally used to build a CDF from scratch, not
906
        to modify existing CDFs.  It is up to the caller to make sure
907
        that the result is a legal CDF.
908
        """
909
        self.xs.append(x)
910
        self.ps.append(p)
911

912
    def Shift(self, term):
913
        """Adds a term to the xs.
914

915
        term: how much to add
916
        """
917
        new = self.Copy()
918
        new.xs = [x + term for x in self.xs]
919
        return new
920

921
    def Scale(self, factor):
922
        """Multiplies the xs by a factor.
923

924
        factor: what to multiply by
925
        """
926
        new = self.Copy()
927
        new.xs = [x * factor for x in self.xs]
928
        return new
929

930
    def Prob(self, x):
931
        """Returns CDF(x), the probability that corresponds to value x.
932

933
        Args:
934
            x: number
935

936
        Returns:
937
            float probability
938
        """
939
        if x < self.xs[0]: return 0.0
940
        index = bisect.bisect(self.xs, x)
941
        p = self.ps[index - 1]
942
        return p
943

944
    def Value(self, p):
945
        """Returns InverseCDF(p), the value that corresponds to probability p.
946

947
        Args:
948
            p: number in the range [0, 1]
949

950
        Returns:
951
            number value
952
        """
953
        if p < 0 or p > 1:
954
            raise ValueError('Probability p must be in range [0, 1]')
955

956
        if p == 0: return self.xs[0]
957
        if p == 1: return self.xs[-1]
958
        index = bisect.bisect(self.ps, p)
959
        if p == self.ps[index - 1]:
960
            return self.xs[index - 1]
961
        else:
962
            return self.xs[index]
963

964
    def Percentile(self, p):
965
        """Returns the value that corresponds to percentile p.
966

967
        Args:
968
            p: number in the range [0, 100]
969

970
        Returns:
971
            number value
972
        """
973
        return self.Value(p / 100.0)
974

975
    def Random(self):
976
        """Chooses a random value from this distribution."""
977
        return self.Value(random.random())
978

979
    def Sample(self, n):
980
        """Generates a random sample from this distribution.
981
        
982
        Args:
983
            n: int length of the sample
984
        """
985
        return [self.Random() for i in range(n)]
986

987
    def Mean(self):
988
        """Computes the mean of a CDF.
989

990
        Returns:
991
            float mean
992
        """
993
        old_p = 0
994
        total = 0.0
995
        for x, new_p in zip(self.xs, self.ps):
996
            p = new_p - old_p
997
            total += p * x
998
            old_p = new_p
999
        return total
1000

1001
    def CredibleInterval(self, percentage=90):
1002
        """Computes the central credible interval.
1003

1004
        If percentage=90, computes the 90% CI.
1005

1006
        Args:
1007
            percentage: float between 0 and 100
1008

1009
        Returns:
1010
            sequence of two floats, low and high
1011
        """
1012
        prob = (1 - percentage / 100.0) / 2
1013
        interval = self.Value(prob), self.Value(1 - prob)
1014
        return interval
1015

1016
    def _Round(self, multiplier=1000.0):
1017
        """
1018
        An entry is added to the cdf only if the percentile differs
1019
        from the previous value in a significant digit, where the number
1020
        of significant digits is determined by multiplier.  The
1021
        default is 1000, which keeps log10(1000) = 3 significant digits.
1022
        """
1023
        # TODO(write this method)
1024
        raise UnimplementedMethodException()
1025

1026
    def Render(self):
1027
        """Generates a sequence of points suitable for plotting.
1028

1029
        An empirical CDF is a step function; linear interpolation
1030
        can be misleading.
1031

1032
        Returns:
1033
            tuple of (xs, ps)
1034
        """
1035
        xs = [self.xs[0]]
1036
        ps = [0.0]
1037
        for i, p in enumerate(self.ps):
1038
            xs.append(self.xs[i])
1039
            ps.append(p)
1040

1041
            try:
1042
                xs.append(self.xs[i + 1])
1043
                ps.append(p)
1044
            except IndexError:
1045
                pass
1046
        return xs, ps
1047

1048
    def Max(self, k):
1049
        """Computes the CDF of the maximum of k selections from this dist.
1050

1051
        k: int
1052

1053
        returns: new Cdf
1054
        """
1055
        cdf = self.Copy()
1056
        cdf.ps = [p ** k for p in cdf.ps]
1057
        return cdf
1058

1059

1060
def MakeCdfFromItems(items, name=''):
1061
    """Makes a cdf from an unsorted sequence of (value, frequency) pairs.
1062

1063
    Args:
1064
        items: unsorted sequence of (value, frequency) pairs
1065
        name: string name for this CDF
1066

1067
    Returns:
1068
        cdf: list of (value, fraction) pairs
1069
    """
1070
    runsum = 0
1071
    xs = []
1072
    cs = []
1073

1074
    for value, count in sorted(items):
1075
        runsum += count
1076
        xs.append(value)
1077
        cs.append(runsum)
1078

1079
    total = float(runsum)
1080
    ps = [c / total for c in cs]
1081

1082
    cdf = Cdf(xs, ps, name)
1083
    return cdf
1084

1085

1086
def MakeCdfFromDict(d, name=''):
1087
    """Makes a CDF from a dictionary that maps values to frequencies.
1088

1089
    Args:
1090
       d: dictionary that maps values to frequencies.
1091
       name: string name for the data.
1092

1093
    Returns:
1094
        Cdf object
1095
    """
1096
    return MakeCdfFromItems(d.iteritems(), name)
1097

1098

1099
def MakeCdfFromHist(hist, name=''):
1100
    """Makes a CDF from a Hist object.
1101

1102
    Args:
1103
       hist: Pmf.Hist object
1104
       name: string name for the data.
1105

1106
    Returns:
1107
        Cdf object
1108
    """
1109
    return MakeCdfFromItems(hist.Items(), name)
1110

1111

1112
def MakeCdfFromPmf(pmf, name=None):
1113
    """Makes a CDF from a Pmf object.
1114

1115
    Args:
1116
       pmf: Pmf.Pmf object
1117
       name: string name for the data.
1118

1119
    Returns:
1120
        Cdf object
1121
    """
1122
    if name == None:
1123
        name = pmf.name
1124
    return MakeCdfFromItems(pmf.Items(), name)
1125

1126

1127
def MakeCdfFromList(seq, name=''):
1128
    """Creates a CDF from an unsorted sequence.
1129

1130
    Args:
1131
        seq: unsorted sequence of sortable values
1132
        name: string name for the cdf
1133

1134
    Returns:
1135
       Cdf object
1136
    """
1137
    hist = MakeHistFromList(seq)
1138
    return MakeCdfFromHist(hist, name)
1139

1140

1141
class UnimplementedMethodException(Exception):
1142
    """Exception if someone calls a method that should be overridden."""
1143

1144

1145
class Suite(Pmf):
1146
    """Represents a suite of hypotheses and their probabilities."""
1147

1148
    def Update(self, data):
1149
        """Updates each hypothesis based on the data.
1150

1151
        data: any representation of the data
1152

1153
        returns: the normalizing constant
1154
        """
1155
        for hypo in self.Values():
1156
            like = self.Likelihood(data, hypo)
1157
            self.Mult(hypo, like)
1158
        return self.Normalize()
1159

1160
    def LogUpdate(self, data):
1161
        """Updates a suite of hypotheses based on new data.
1162

1163
        Modifies the suite directly; if you want to keep the original, make
1164
        a copy.
1165

1166
        Note: unlike Update, LogUpdate does not normalize.
1167

1168
        Args:
1169
            data: any representation of the data
1170
        """
1171
        for hypo in self.Values():
1172
            like = self.LogLikelihood(data, hypo)
1173
            self.Incr(hypo, like)
1174

1175
    def UpdateSet(self, dataset):
1176
        """Updates each hypothesis based on the dataset.
1177

1178
        This is more efficient than calling Update repeatedly because
1179
        it waits until the end to Normalize.
1180

1181
        Modifies the suite directly; if you want to keep the original, make
1182
        a copy.
1183

1184
        dataset: a sequence of data
1185

1186
        returns: the normalizing constant
1187
        """
1188
        for data in dataset:
1189
            for hypo in self.Values():
1190
                like = self.Likelihood(data, hypo)
1191
                self.Mult(hypo, like)
1192
        return self.Normalize()
1193

1194
    def LogUpdateSet(self, dataset):
1195
        """Updates each hypothesis based on the dataset.
1196

1197
        Modifies the suite directly; if you want to keep the original, make
1198
        a copy.
1199

1200
        dataset: a sequence of data
1201

1202
        returns: None
1203
        """
1204
        for data in dataset:
1205
            self.LogUpdate(data)
1206

1207
    def Likelihood(self, data, hypo):
1208
        """Computes the likelihood of the data under the hypothesis.
1209

1210
        hypo: some representation of the hypothesis
1211
        data: some representation of the data
1212
        """
1213
        raise UnimplementedMethodException()
1214

1215
    def LogLikelihood(self, data, hypo):
1216
        """Computes the log likelihood of the data under the hypothesis.
1217

1218
        hypo: some representation of the hypothesis
1219
        data: some representation of the data
1220
        """
1221
        raise UnimplementedMethodException()
1222

1223
    def Print(self):
1224
        """Prints the hypotheses and their probabilities."""
1225
        for hypo, prob in sorted(self.Items()):
1226
            print hypo, prob
1227

1228
    def MakeOdds(self):
1229
        """Transforms from probabilities to odds.
1230

1231
        Values with prob=0 are removed.
1232
        """
1233
        for hypo, prob in self.Items():
1234
            if prob:
1235
                self.Set(hypo, Odds(prob))
1236
            else:
1237
                self.Remove(hypo)
1238

1239
    def MakeProbs(self):
1240
        """Transforms from odds to probabilities."""
1241
        for hypo, odds in self.Items():
1242
            self.Set(hypo, Probability(odds))
1243

1244

1245
def MakeSuiteFromList(t, name=''):
1246
    """Makes a suite from an unsorted sequence of values.
1247

1248
    Args:
1249
        t: sequence of numbers
1250
        name: string name for this suite
1251

1252
    Returns:
1253
        Suite object
1254
    """
1255
    hist = MakeHistFromList(t)
1256
    d = hist.GetDict()
1257
    return MakeSuiteFromDict(d)
1258

1259

1260
def MakeSuiteFromHist(hist, name=None):
1261
    """Makes a normalized suite from a Hist object.
1262

1263
    Args:
1264
        hist: Hist object
1265
        name: string name
1266

1267
    Returns:
1268
        Suite object
1269
    """
1270
    if name is None:
1271
        name = hist.name
1272

1273
    # make a copy of the dictionary
1274
    d = dict(hist.GetDict())
1275
    return MakeSuiteFromDict(d, name)
1276

1277

1278
def MakeSuiteFromDict(d, name=''):
1279
    """Makes a suite from a map from values to probabilities.
1280

1281
    Args:
1282
        d: dictionary that maps values to probabilities
1283
        name: string name for this suite
1284

1285
    Returns:
1286
        Suite object
1287
    """
1288
    suite = Suite(name=name)
1289
    suite.SetDict(d)
1290
    suite.Normalize()
1291
    return suite
1292

1293

1294
def MakeSuiteFromCdf(cdf, name=None):
1295
    """Makes a normalized Suite from a Cdf object.
1296

1297
    Args:
1298
        cdf: Cdf object
1299
        name: string name for the new Suite
1300

1301
    Returns:
1302
        Suite object
1303
    """
1304
    if name is None:
1305
        name = cdf.name
1306

1307
    suite = Suite(name=name)
1308

1309
    prev = 0.0
1310
    for val, prob in cdf.Items():
1311
        suite.Incr(val, prob - prev)
1312
        prev = prob
1313

1314
    return suite
1315

1316

1317
class Pdf(object):
1318
    """Represents a probability density function (PDF)."""
1319

1320
    def Density(self, x):
1321
        """Evaluates this Pdf at x.
1322

1323
        Returns: float probability density
1324
        """
1325
        raise UnimplementedMethodException()
1326

1327
    def MakePmf(self, xs, name=''):
1328
        """Makes a discrete version of this Pdf, evaluated at xs.
1329

1330
        xs: equally-spaced sequence of values
1331

1332
        Returns: new Pmf
1333
        """
1334
        pmf = Pmf(name=name)
1335
        for x in xs:
1336
            pmf.Set(x, self.Density(x))
1337
        pmf.Normalize()
1338
        return pmf
1339

1340

1341
class GaussianPdf(Pdf):
1342
    """Represents the PDF of a Gaussian distribution."""
1343

1344
    def __init__(self, mu, sigma):
1345
        """Constructs a Gaussian Pdf with given mu and sigma.
1346

1347
        mu: mean
1348
        sigma: standard deviation
1349
        """
1350
        self.mu = mu
1351
        self.sigma = sigma
1352

1353
    def Density(self, x):
1354
        """Evaluates this Pdf at x.
1355

1356
        Returns: float probability density
1357
        """
1358
        return EvalGaussianPdf(x, self.mu, self.sigma)
1359

1360

1361
class EstimatedPdf(Pdf):
1362
    """Represents a PDF estimated by KDE."""
1363

1364
    def __init__(self, sample):
1365
        """Estimates the density function based on a sample.
1366

1367
        sample: sequence of data
1368
        """
1369
        self.kde = scipy.stats.gaussian_kde(sample)
1370

1371
    def Density(self, x):
1372
        """Evaluates this Pdf at x.
1373

1374
        Returns: float probability density
1375
        """
1376
        return self.kde.evaluate(x)
1377

1378
    def MakePmf(self, xs, name=''):
1379
        ps = self.kde.evaluate(xs)
1380
        pmf = MakePmfFromItems(zip(xs, ps), name=name)
1381
        return pmf
1382

1383

1384
def Percentile(pmf, percentage):
1385
    """Computes a percentile of a given Pmf.
1386

1387
    percentage: float 0-100
1388
    """
1389
    p = percentage / 100.0
1390
    total = 0
1391
    for val, prob in pmf.Items():
1392
        total += prob
1393
        if total >= p:
1394
            return val
1395

1396

1397
def CredibleInterval(pmf, percentage=90):
1398
    """Computes a credible interval for a given distribution.
1399

1400
    If percentage=90, computes the 90% CI.
1401

1402
    Args:
1403
        pmf: Pmf object representing a posterior distribution
1404
        percentage: float between 0 and 100
1405

1406
    Returns:
1407
        sequence of two floats, low and high
1408
    """
1409
    cdf = pmf.MakeCdf()
1410
    prob = (1 - percentage / 100.0) / 2
1411
    interval = cdf.Value(prob), cdf.Value(1 - prob)
1412
    return interval
1413

1414

1415
def PmfProbLess(pmf1, pmf2):
1416
    """Probability that a value from pmf1 is less than a value from pmf2.
1417

1418
    Args:
1419
        pmf1: Pmf object
1420
        pmf2: Pmf object
1421

1422
    Returns:
1423
        float probability
1424
    """
1425
    total = 0.0
1426
    for v1, p1 in pmf1.Items():
1427
        for v2, p2 in pmf2.Items():
1428
            if v1 < v2:
1429
                total += p1 * p2
1430
    return total
1431

1432

1433
def PmfProbGreater(pmf1, pmf2):
1434
    """Probability that a value from pmf1 is less than a value from pmf2.
1435

1436
    Args:
1437
        pmf1: Pmf object
1438
        pmf2: Pmf object
1439

1440
    Returns:
1441
        float probability
1442
    """
1443
    total = 0.0
1444
    for v1, p1 in pmf1.Items():
1445
        for v2, p2 in pmf2.Items():
1446
            if v1 > v2:
1447
                total += p1 * p2
1448
    return total
1449

1450

1451
def PmfProbEqual(pmf1, pmf2):
1452
    """Probability that a value from pmf1 equals a value from pmf2.
1453

1454
    Args:
1455
        pmf1: Pmf object
1456
        pmf2: Pmf object
1457

1458
    Returns:
1459
        float probability
1460
    """
1461
    total = 0.0
1462
    for v1, p1 in pmf1.Items():
1463
        for v2, p2 in pmf2.Items():
1464
            if v1 == v2:
1465
                total += p1 * p2
1466
    return total
1467

1468

1469
def RandomSum(dists):
1470
    """Chooses a random value from each dist and returns the sum.
1471

1472
    dists: sequence of Pmf or Cdf objects
1473

1474
    returns: numerical sum
1475
    """
1476
    total = sum(dist.Random() for dist in dists)
1477
    return total
1478

1479

1480
def SampleSum(dists, n):
1481
    """Draws a sample of sums from a list of distributions.
1482

1483
    dists: sequence of Pmf or Cdf objects
1484
    n: sample size
1485

1486
    returns: new Pmf of sums
1487
    """
1488
    pmf = MakePmfFromList(RandomSum(dists) for i in xrange(n))
1489
    return pmf
1490

1491

1492
def EvalGaussianPdf(x, mu, sigma):
1493
    """Computes the unnormalized PDF of the normal distribution.
1494

1495
    x: value
1496
    mu: mean
1497
    sigma: standard deviation
1498
    
1499
    returns: float probability density
1500
    """
1501
    return scipy.stats.norm.pdf(x, mu, sigma)
1502

1503

1504
def MakeGaussianPmf(mu, sigma, num_sigmas, n=201):
1505
    """Makes a PMF discrete approx to a Gaussian distribution.
1506
    
1507
    mu: float mean
1508
    sigma: float standard deviation
1509
    num_sigmas: how many sigmas to extend in each direction
1510
    n: number of values in the Pmf
1511

1512
    returns: normalized Pmf
1513
    """
1514
    pmf = Pmf()
1515
    low = mu - num_sigmas * sigma
1516
    high = mu + num_sigmas * sigma
1517

1518
    for x in numpy.linspace(low, high, n):
1519
        p = EvalGaussianPdf(x, mu, sigma)
1520
        pmf.Set(x, p)
1521
    pmf.Normalize()
1522
    return pmf
1523

1524

1525
def EvalBinomialPmf(k, n, p):
1526
    """Evaluates the binomial pmf.
1527

1528
    Returns the probabily of k successes in n trials with probability p.
1529
    """
1530
    return scipy.stats.binom.pmf(k, n, p)
1531
    
1532

1533
def EvalPoissonPmf(k, lam):
1534
    """Computes the Poisson PMF.
1535

1536
    k: number of events
1537
    lam: parameter lambda in events per unit time
1538

1539
    returns: float probability
1540
    """
1541
    return scipy.stats.poisson.pmf(k, lam)
1542

1543

1544
def MakePoissonPmf(lam, high, step=1):
1545
    """Makes a PMF discrete approx to a Poisson distribution.
1546

1547
    lam: parameter lambda in events per unit time
1548
    high: upper bound of the Pmf
1549

1550
    returns: normalized Pmf
1551
    """
1552
    pmf = Pmf()
1553
    for k in xrange(0, high + 1, step):
1554
        p = EvalPoissonPmf(k, lam)
1555
        pmf.Set(k, p)
1556
    pmf.Normalize()
1557
    return pmf
1558

1559

1560
def EvalExponentialPdf(x, lam):
1561
    """Computes the exponential PDF.
1562

1563
    x: value
1564
    lam: parameter lambda in events per unit time
1565

1566
    returns: float probability density
1567
    """
1568
    return lam * math.exp(-lam * x)
1569

1570

1571
def EvalExponentialCdf(x, lam):
1572
    """Evaluates CDF of the exponential distribution with parameter lam."""
1573
    return 1 - math.exp(-lam * x)
1574

1575

1576
def MakeExponentialPmf(lam, high, n=200):
1577
    """Makes a PMF discrete approx to an exponential distribution.
1578

1579
    lam: parameter lambda in events per unit time
1580
    high: upper bound
1581
    n: number of values in the Pmf
1582

1583
    returns: normalized Pmf
1584
    """
1585
    pmf = Pmf()
1586
    for x in numpy.linspace(0, high, n):
1587
        p = EvalExponentialPdf(x, lam)
1588
        pmf.Set(x, p)
1589
    pmf.Normalize()
1590
    return pmf
1591

1592

1593
def StandardGaussianCdf(x):
1594
    """Evaluates the CDF of the standard Gaussian distribution.
1595
    
1596
    See http://en.wikipedia.org/wiki/Normal_distribution
1597
    #Cumulative_distribution_function
1598

1599
    Args:
1600
        x: float
1601
                
1602
    Returns:
1603
        float
1604
    """
1605
    return (erf(x / ROOT2) + 1) / 2
1606

1607

1608
def GaussianCdf(x, mu=0, sigma=1):
1609
    """Evaluates the CDF of the gaussian distribution.
1610
    
1611
    Args:
1612
        x: float
1613

1614
        mu: mean parameter
1615
        
1616
        sigma: standard deviation parameter
1617
                
1618
    Returns:
1619
        float
1620
    """
1621
    return StandardGaussianCdf(float(x - mu) / sigma)
1622

1623

1624
def GaussianCdfInverse(p, mu=0, sigma=1):
1625
    """Evaluates the inverse CDF of the gaussian distribution.
1626

1627
    See http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function  
1628

1629
    Args:
1630
        p: float
1631

1632
        mu: mean parameter
1633
        
1634
        sigma: standard deviation parameter
1635
                
1636
    Returns:
1637
        float
1638
    """
1639
    x = ROOT2 * erfinv(2 * p - 1)
1640
    return mu + x * sigma
1641

1642

1643
class Beta(object):
1644
    """Represents a Beta distribution.
1645

1646
    See http://en.wikipedia.org/wiki/Beta_distribution
1647
    """
1648
    def __init__(self, alpha=1, beta=1, name=''):
1649
        """Initializes a Beta distribution."""
1650
        self.alpha = alpha
1651
        self.beta = beta
1652
        self.name = name
1653

1654
    def Update(self, data):
1655
        """Updates a Beta distribution.
1656

1657
        data: pair of int (heads, tails)
1658
        """
1659
        heads, tails = data
1660
        self.alpha += heads
1661
        self.beta += tails
1662

1663
    def Mean(self):
1664
        """Computes the mean of this distribution."""
1665
        return float(self.alpha) / (self.alpha + self.beta)
1666

1667
    def Random(self):
1668
        """Generates a random variate from this distribution."""
1669
        return random.betavariate(self.alpha, self.beta)
1670

1671
    def Sample(self, n):
1672
        """Generates a random sample from this distribution.
1673

1674
        n: int sample size
1675
        """
1676
        size = n,
1677
        return numpy.random.beta(self.alpha, self.beta, size)
1678

1679
    def EvalPdf(self, x):
1680
        """Evaluates the PDF at x."""
1681
        return x ** (self.alpha - 1) * (1 - x) ** (self.beta - 1)
1682

1683
    def MakePmf(self, steps=101, name=''):
1684
        """Returns a Pmf of this distribution.
1685

1686
        Note: Normally, we just evaluate the PDF at a sequence
1687
        of points and treat the probability density as a probability
1688
        mass.
1689

1690
        But if alpha or beta is less than one, we have to be
1691
        more careful because the PDF goes to infinity at x=0
1692
        and x=1.  In that case we evaluate the CDF and compute
1693
        differences.
1694
        """
1695
        if self.alpha < 1 or self.beta < 1:
1696
            cdf = self.MakeCdf()
1697
            pmf = cdf.MakePmf()
1698
            return pmf
1699

1700
        xs = [i / (steps - 1.0) for i in xrange(steps)]
1701
        probs = [self.EvalPdf(x) for x in xs]
1702
        pmf = MakePmfFromDict(dict(zip(xs, probs)), name)
1703
        return pmf
1704

1705
    def MakeCdf(self, steps=101):
1706
        """Returns the CDF of this distribution."""
1707
        xs = [i / (steps - 1.0) for i in xrange(steps)]
1708
        ps = [scipy.special.betainc(self.alpha, self.beta, x) for x in xs]
1709
        cdf = Cdf(xs, ps)
1710
        return cdf
1711

1712

1713
class Dirichlet(object):
1714
    """Represents a Dirichlet distribution.
1715

1716
    See http://en.wikipedia.org/wiki/Dirichlet_distribution
1717
    """
1718

1719
    def __init__(self, n, conc=1, name=''):
1720
        """Initializes a Dirichlet distribution.
1721

1722
        n: number of dimensions
1723
        conc: concentration parameter (smaller yields more concentration)
1724
        name: string name
1725
        """
1726
        if n < 2:
1727
            raise ValueError('A Dirichlet distribution with '
1728
                             'n<2 makes no sense')
1729

1730
        self.n = n
1731
        self.params = numpy.ones(n, dtype=numpy.float) * conc
1732
        self.name = name
1733

1734
    def Update(self, data):
1735
        """Updates a Dirichlet distribution.
1736

1737
        data: sequence of observations, in order corresponding to params
1738
        """
1739
        m = len(data)
1740
        self.params[:m] += data
1741

1742
    def Random(self):
1743
        """Generates a random variate from this distribution.
1744

1745
        Returns: normalized vector of fractions
1746
        """
1747
        p = numpy.random.gamma(self.params)
1748
        return p / p.sum()
1749

1750
    def Likelihood(self, data):
1751
        """Computes the likelihood of the data.
1752

1753
        Selects a random vector of probabilities from this distribution.
1754

1755
        Returns: float probability
1756
        """
1757
        m = len(data)
1758
        if self.n < m:
1759
            return 0
1760

1761
        x = data
1762
        p = self.Random()
1763
        q = p[:m] ** x
1764
        return q.prod()
1765

1766
    def LogLikelihood(self, data):
1767
        """Computes the log likelihood of the data.
1768

1769
        Selects a random vector of probabilities from this distribution.
1770

1771
        Returns: float log probability
1772
        """
1773
        m = len(data)
1774
        if self.n < m:
1775
            return float('-inf')
1776

1777
        x = self.Random()
1778
        y = numpy.log(x[:m]) * data
1779
        return y.sum()
1780

1781
    def MarginalBeta(self, i):
1782
        """Computes the marginal distribution of the ith element.
1783

1784
        See http://en.wikipedia.org/wiki/Dirichlet_distribution
1785
        #Marginal_distributions
1786

1787
        i: int
1788

1789
        Returns: Beta object
1790
        """
1791
        alpha0 = self.params.sum()
1792
        alpha = self.params[i]
1793
        return Beta(alpha, alpha0 - alpha)
1794

1795
    def PredictivePmf(self, xs, name=''):
1796
        """Makes a predictive distribution.
1797

1798
        xs: values to go into the Pmf
1799

1800
        Returns: Pmf that maps from x to the mean prevalence of x
1801
        """
1802
        alpha0 = self.params.sum()
1803
        ps = self.params / alpha0
1804
        return MakePmfFromItems(zip(xs, ps), name=name)
1805

1806

1807
def BinomialCoef(n, k):
1808
    """Compute the binomial coefficient "n choose k".
1809

1810
    n: number of trials
1811
    k: number of successes
1812

1813
    Returns: float
1814
    """
1815
    return scipy.misc.comb(n, k)
1816

1817

1818
def LogBinomialCoef(n, k):
1819
    """Computes the log of the binomial coefficient.
1820

1821
    http://math.stackexchange.com/questions/64716/
1822
    approximating-the-logarithm-of-the-binomial-coefficient
1823

1824
    n: number of trials
1825
    k: number of successes
1826

1827
    Returns: float
1828
    """
1829
    return n * log(n) - k * log(k) - (n - k) * log(n - k)
1830

1831

1832

1833
Product

Resources

Company