CoCalc -- redline.py

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / think-bayes / code / redline.py
¹³²⁹²³ views
License: OTHER
1
"""This file contains code used in "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2013 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
import thinkbayes
9

10
import thinkplot
11
import numpy
12

13
import math
14
import random
15
import sys
16

17
FORMATS = ['pdf', 'eps', 'png', 'jpg']
18

19
"""
20
Notation guide:
21

22
z: time between trains
23
x: time since the last train
24
y: time until the next train
25

26
zb: distribution of z as seen by a random arrival
27

28
"""
29

30
# longest hypothetical time between trains, in seconds
31

32
UPPER_BOUND = 1200
33

34
# observed gaps between trains, in seconds
35
# collected using code in redline_data.py, run daily 4-6pm
36
# for 5 days, Monday 6 May 2013 to Friday 10 May 2013
37

38
OBSERVED_GAP_TIMES = [
39
    428.0, 705.0, 407.0, 465.0, 433.0, 425.0, 204.0, 506.0, 143.0, 351.0, 
40
    450.0, 598.0, 464.0, 749.0, 341.0, 586.0, 754.0, 256.0, 378.0, 435.0, 
41
    176.0, 405.0, 360.0, 519.0, 648.0, 374.0, 483.0, 537.0, 578.0, 534.0, 
42
    577.0, 619.0, 538.0, 331.0, 186.0, 629.0, 193.0, 360.0, 660.0, 484.0, 
43
    512.0, 315.0, 457.0, 404.0, 740.0, 388.0, 357.0, 485.0, 567.0, 160.0, 
44
    428.0, 387.0, 901.0, 187.0, 622.0, 616.0, 585.0, 474.0, 442.0, 499.0, 
45
    437.0, 620.0, 351.0, 286.0, 373.0, 232.0, 393.0, 745.0, 636.0, 758.0,
46
]
47

48

49
def BiasPmf(pmf, name='', invert=False):
50
    """Returns the Pmf with oversampling proportional to value.
51

52
    If pmf is the distribution of true values, the result is the
53
    distribution that would be seen if values are oversampled in
54
    proportion to their values; for example, if you ask students
55
    how big their classes are, large classes are oversampled in
56
    proportion to their size.
57

58
    If invert=True, computes in inverse operation; for example,
59
    unbiasing a sample collected from students.
60

61
    Args:
62
      pmf: Pmf object.
63
      name: string name for the new Pmf.
64
      invert: boolean
65

66
     Returns:
67
       Pmf object
68
    """
69
    new_pmf = pmf.Copy(name=name)
70

71
    for x in pmf.Values():
72
        if invert:
73
            new_pmf.Mult(x, 1.0/x)
74
        else:
75
            new_pmf.Mult(x, x)
76
        
77
    new_pmf.Normalize()
78
    return new_pmf
79

80

81
def UnbiasPmf(pmf, name=''):
82
    """Returns the Pmf with oversampling proportional to 1/value.
83

84
    Args:
85
      pmf: Pmf object.
86
      name: string name for the new Pmf.
87

88
     Returns:
89
       Pmf object
90
    """
91
    return BiasPmf(pmf, name, invert=True)
92

93

94
def MakeUniformPmf(low, high):
95
    """Make a uniform Pmf.
96

97
    low: lowest value (inclusive)
98
    high: highest value (inclusive)
99
    """
100
    pmf = thinkbayes.Pmf()
101
    for x in MakeRange(low=low, high=high):
102
        pmf.Set(x, 1)
103
    pmf.Normalize()
104
    return pmf    
105
    
106

107
def MakeRange(low=10, high=None, skip=10):
108
    """Makes a range representing possible gap times in seconds.
109

110
    low: where to start
111
    high: where to end
112
    skip: how many to skip
113
    """
114
    if high is None:
115
        high = UPPER_BOUND
116

117
    return range(low, high+skip, skip)
118

119

120
class WaitTimeCalculator(object):
121
    """Encapsulates the forward inference process.
122

123
    Given the actual distribution of gap times (z),
124
    computes the distribution of gaps as seen by
125
    a random passenger (zb), which yields the distribution
126
    of wait times (y) and the distribution of elapsed times (x).
127
    """
128

129
    def __init__(self, pmf, inverse=False):
130
        """Constructor.
131

132
        pmf: Pmf of either z or zb
133
        inverse: boolean, true if pmf is zb, false if pmf is z
134
        """
135
        if inverse:
136
            self.pmf_zb = pmf
137
            self.pmf_z = UnbiasPmf(pmf, name="z")
138
        else:
139
            self.pmf_z = pmf
140
            self.pmf_zb = BiasPmf(pmf, name="zb")
141

142
        # distribution of wait time
143
        self.pmf_y = PmfOfWaitTime(self.pmf_zb)
144

145
        # the distribution of elapsed time is the same as the
146
        # distribution of wait time
147
        self.pmf_x = self.pmf_y
148

149
    def GenerateSampleWaitTimes(self, n):
150
        """Generates a random sample of wait times.
151

152
        n: sample size
153

154
        Returns: sequence of values
155
        """
156
        cdf_y = thinkbayes.MakeCdfFromPmf(self.pmf_y)
157
        sample = cdf_y.Sample(n)
158
        return sample
159

160
    def GenerateSampleGaps(self, n):
161
        """Generates a random sample of gaps seen by passengers.
162

163
        n: sample size
164

165
        Returns: sequence of values
166
        """
167
        cdf_zb = thinkbayes.MakeCdfFromPmf(self.pmf_zb)
168
        sample = cdf_zb.Sample(n)
169
        return sample
170

171
    def GenerateSamplePassengers(self, lam, n):
172
        """Generates a sample wait time and number of arrivals.
173

174
        lam: arrival rate in passengers per second
175
        n: number of samples
176

177
        Returns: list of (k1, y, k2) tuples
178
        k1: passengers there on arrival
179
        y: wait time
180
        k2: passengers arrived while waiting
181
        """
182
        zs = self.GenerateSampleGaps(n)
183
        xs, ys = SplitGaps(zs)
184

185
        res = []
186
        for x, y in zip(xs, ys):
187
            k1 = numpy.random.poisson(lam * x)
188
            k2 = numpy.random.poisson(lam * y)
189
            res.append((k1, y, k2))
190

191
        return res
192

193
    def PlotPmfs(self, root='redline0'):
194
        """Plots the computed Pmfs.
195

196
        root: string
197
        """
198
        pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0/60)
199

200
        thinkplot.Clf()
201
        thinkplot.PrePlot(2)
202
        thinkplot.Pmfs(pmfs)
203
        thinkplot.Save(root=root,
204
                       xlabel='Time (min)',
205
                       ylabel='CDF',
206
                       formats=FORMATS)
207

208

209
    def MakePlot(self, root='redline2'):
210
        """Plots the computed CDFs.
211

212
        root: string
213
        """
214
        print 'Mean z', self.pmf_z.Mean() / 60
215
        print 'Mean zb', self.pmf_zb.Mean() / 60
216
        print 'Mean y', self.pmf_y.Mean() / 60
217

218
        cdf_z = self.pmf_z.MakeCdf()
219
        cdf_zb = self.pmf_zb.MakeCdf()
220
        cdf_y = self.pmf_y.MakeCdf()
221

222
        cdfs = ScaleDists([cdf_z, cdf_zb, cdf_y], 1.0/60)
223

224
        thinkplot.Clf()
225
        thinkplot.PrePlot(3)
226
        thinkplot.Cdfs(cdfs)
227
        thinkplot.Save(root=root,
228
                       xlabel='Time (min)',
229
                       ylabel='CDF',
230
                       formats=FORMATS)
231

232

233
def SplitGaps(zs):
234
    """Splits zs into xs and ys.
235

236
    zs: sequence of gaps
237

238
    Returns: tuple of sequences (xs, ys)
239
    """
240
    xs = [random.uniform(0, z) for z in zs]
241
    ys = [z-x for z, x in zip(zs, xs)]
242
    return xs, ys
243

244

245
def PmfOfWaitTime(pmf_zb):
246
    """Distribution of wait time.
247

248
    pmf_zb: dist of gap time as seen by a random observer
249

250
    Returns: dist of wait time (also dist of elapsed time)
251
    """
252
    metapmf = thinkbayes.Pmf()
253
    for gap, prob in pmf_zb.Items():
254
        uniform = MakeUniformPmf(0, gap)
255
        metapmf.Set(uniform, prob)
256

257
    pmf_y = thinkbayes.MakeMixture(metapmf, name='y')
258
    return pmf_y
259

260

261
def ScaleDists(dists, factor):
262
    """Scales each of the distributions in a sequence.
263

264
    dists: sequence of Pmf or Cdf
265
    factor: float scale factor
266
    """
267
    return [dist.Scale(factor) for dist in dists]
268

269

270
class ElapsedTimeEstimator(object):
271
    """Uses the number of passengers to estimate time since last train."""
272

273
    def __init__(self, wtc, lam, num_passengers):
274
        """Constructor.
275

276
        pmf_x: expected distribution of elapsed time
277
        lam: arrival rate in passengers per second
278
        num_passengers: # passengers seen on the platform
279
        """
280
        # prior for elapsed time
281
        self.prior_x = Elapsed(wtc.pmf_x, name='prior x')
282

283
        # posterior of elapsed time (based on number of passengers)
284
        self.post_x = self.prior_x.Copy(name='posterior x')
285
        self.post_x.Update((lam, num_passengers))
286

287
        # predictive distribution of wait time
288
        self.pmf_y = PredictWaitTime(wtc.pmf_zb, self.post_x)
289

290
    def MakePlot(self, root='redline3'):
291
        """Plot the CDFs.
292

293
        root: string
294
        """
295
        # observed gaps
296
        cdf_prior_x = self.prior_x.MakeCdf()
297
        cdf_post_x = self.post_x.MakeCdf()
298
        cdf_y = self.pmf_y.MakeCdf()
299

300
        cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0/60)
301

302
        thinkplot.Clf()
303
        thinkplot.PrePlot(3)
304
        thinkplot.Cdfs(cdfs)
305
        thinkplot.Save(root=root,
306
                       xlabel='Time (min)',
307
                       ylabel='CDF',
308
                       formats=FORMATS)
309

310

311
class ArrivalRate(thinkbayes.Suite):
312
    """Represents the distribution of arrival rates (lambda)."""
313

314
    def Likelihood(self, data, hypo):
315
        """Computes the likelihood of the data under the hypothesis.
316

317
        Evaluates the Poisson PMF for lambda and k.
318

319
        hypo: arrival rate in passengers per second
320
        data: tuple of elapsed_time and number of passengers
321
        """
322
        lam = hypo
323
        x, k = data
324
        like = thinkbayes.EvalPoissonPmf(k, lam * x)
325
        return like
326

327

328
class ArrivalRateEstimator(object):
329
    """Estimates arrival rate based on passengers that arrive while waiting.
330
    """
331

332
    def __init__(self, passenger_data):
333
        """Constructor
334

335
        passenger_data: sequence of (k1, y, k2) pairs
336
        """
337
        # range for lambda
338
        low, high = 0, 5
339
        n = 51
340
        hypos = numpy.linspace(low, high, n) / 60
341

342
        self.prior_lam = ArrivalRate(hypos, name='prior')
343
        self.prior_lam.Remove(0)
344

345
        self.post_lam = self.prior_lam.Copy(name='posterior')
346

347
        for _k1, y, k2 in passenger_data:
348
            self.post_lam.Update((y, k2))
349

350
        print 'Mean posterior lambda', self.post_lam.Mean()
351

352
    def MakePlot(self, root='redline1'):
353
        """Plot the prior and posterior CDF of passengers arrival rate.
354

355
        root: string
356
        """
357
        thinkplot.Clf()
358
        thinkplot.PrePlot(2)
359

360
        # convert units to passengers per minute
361
        prior = self.prior_lam.MakeCdf().Scale(60)
362
        post = self.post_lam.MakeCdf().Scale(60)
363

364
        thinkplot.Cdfs([prior, post])
365

366
        thinkplot.Save(root=root,
367
                       xlabel='Arrival rate (passengers / min)',
368
                       ylabel='CDF',
369
                       formats=FORMATS)
370
                       
371

372
class Elapsed(thinkbayes.Suite):
373
    """Represents the distribution of elapsed time (x)."""
374

375
    def Likelihood(self, data, hypo):
376
        """Computes the likelihood of the data under the hypothesis.
377

378
        Evaluates the Poisson PMF for lambda and k.
379

380
        hypo: elapsed time since the last train
381
        data: tuple of arrival rate and number of passengers
382
        """
383
        x = hypo
384
        lam, k = data
385
        like = thinkbayes.EvalPoissonPmf(k, lam * x)
386
        return like
387

388

389
def PredictWaitTime(pmf_zb, pmf_x):
390
    """Computes the distribution of wait times.
391

392
    Enumerate all pairs of zb from pmf_zb and x from pmf_x,
393
    and accumulate the distribution of y = z - x.
394

395
    pmf_zb: distribution of gaps seen by random observer
396
    pmf_x: distribution of elapsed time
397
    """
398
    pmf_y = pmf_zb - pmf_x
399
    pmf_y.name = 'pred y'
400
    RemoveNegatives(pmf_y)
401
    return pmf_y
402

403

404
def RemoveNegatives(pmf):
405
    """Removes negative values from a PMF.
406

407
    pmf: Pmf
408
    """
409
    for val in pmf.Values():
410
        if val < 0:
411
            pmf.Remove(val)
412
    pmf.Normalize()
413

414

415
class Gaps(thinkbayes.Suite):
416
    """Represents the distribution of gap times,
417
    as updated by an observed waiting time."""
418

419
    def Likelihood(self, data, hypo):
420
        """The likelihood of the data under the hypothesis.
421

422
        If the actual gap time is z, what is the likelihood
423
        of waiting y seconds?
424

425
        hypo: actual time between trains
426
        data: observed wait time
427
        """
428
        z = hypo
429
        y = data
430
        if y > z:
431
            return 0
432
        return 1.0 / z
433

434

435
class GapDirichlet(thinkbayes.Dirichlet):
436
    """Represents the distribution of prevalences for each
437
    gap time."""
438

439
    def __init__(self, xs):
440
        """Constructor.
441

442
        xs: sequence of possible gap times
443
        """
444
        n = len(xs)
445
        thinkbayes.Dirichlet.__init__(self, n)
446
        self.xs = xs
447
        self.mean_zbs = []
448

449
    def PmfMeanZb(self):
450
        """Makes the Pmf of mean zb.
451

452
        Values stored in mean_zbs.
453
        """
454
        return thinkbayes.MakePmfFromList(self.mean_zbs)
455

456
    def Preload(self, data):
457
        """Adds pseudocounts to the parameters.
458

459
        data: sequence of pseudocounts
460
        """
461
        thinkbayes.Dirichlet.Update(self, data)
462

463
    def Update(self, data):
464
        """Computes the likelihood of the data.
465

466
        data: wait time observed by random arrival (y)
467

468
        Returns: float probability
469
        """
470
        k, y = data
471

472
        print k, y
473
        prior = self.PredictivePmf(self.xs)
474
        gaps = Gaps(prior)
475
        gaps.Update(y)
476
        probs = gaps.Probs(self.xs)
477

478
        self.params += numpy.array(probs)
479

480

481
class GapDirichlet2(GapDirichlet):
482
    """Represents the distribution of prevalences for each
483
    gap time."""
484

485
    def Update(self, data):
486
        """Computes the likelihood of the data.
487

488
        data: wait time observed by random arrival (y)
489

490
        Returns: float probability
491
        """
492
        k, y = data
493

494
        # get the current best guess for pmf_z
495
        pmf_zb = self.PredictivePmf(self.xs)
496

497
        # use it to compute prior pmf_x, pmf_y, pmf_z
498
        wtc = WaitTimeCalculator(pmf_zb, inverse=True)
499

500
        # use the observed passengers to estimate posterior pmf_x
501
        elapsed = ElapsedTimeEstimator(wtc,
502
                                       lam=0.0333,
503
                                       num_passengers=k)
504

505
        # use posterior_x and observed y to estimate observed z
506
        obs_zb = elapsed.post_x + Floor(y)
507
        probs = obs_zb.Probs(self.xs)
508

509
        mean_zb = obs_zb.Mean()
510
        self.mean_zbs.append(mean_zb)
511
        print k, y, mean_zb
512

513
        # use observed z to update beliefs about pmf_z
514
        self.params += numpy.array(probs)
515

516

517
class GapTimeEstimator(object):
518
    """Infers gap times using passenger data."""
519

520
    def __init__(self, xs, pcounts, passenger_data):
521
        self.xs = xs
522
        self.pcounts = pcounts
523
        self.passenger_data = passenger_data
524

525
        self.wait_times = [y for _k1, y, _k2 in passenger_data]
526
        self.pmf_y = thinkbayes.MakePmfFromList(self.wait_times, name="y")
527

528
        dirichlet = GapDirichlet2(self.xs)
529
        dirichlet.params /= 1.0
530

531
        dirichlet.Preload(self.pcounts)
532
        dirichlet.params /= 20.0
533

534
        self.prior_zb = dirichlet.PredictivePmf(self.xs, name="prior zb")
535
        
536
        for k1, y, _k2 in passenger_data:
537
            dirichlet.Update((k1, y))
538

539
        self.pmf_mean_zb = dirichlet.PmfMeanZb()
540

541
        self.post_zb = dirichlet.PredictivePmf(self.xs, name="post zb")
542
        self.post_z = UnbiasPmf(self.post_zb, name="post z")
543

544
    def PlotPmfs(self):
545
        """Plot the PMFs."""
546
        print 'Mean y', self.pmf_y.Mean()
547
        print 'Mean z', self.post_z.Mean()
548
        print 'Mean zb', self.post_zb.Mean()
549

550
        thinkplot.Pmf(self.pmf_y)
551
        thinkplot.Pmf(self.post_z)
552
        thinkplot.Pmf(self.post_zb)
553

554
    def MakePlot(self):
555
        """Plot the CDFs."""
556
        thinkplot.Cdf(self.pmf_y.MakeCdf())
557
        thinkplot.Cdf(self.prior_zb.MakeCdf())
558
        thinkplot.Cdf(self.post_zb.MakeCdf())
559
        thinkplot.Cdf(self.pmf_mean_zb.MakeCdf())
560
        thinkplot.Show()
561

562

563
def Floor(x, factor=10):
564
    """Rounds down to the nearest multiple of factor.
565

566
    When factor=10, all numbers from 10 to 19 get floored to 10.
567
    """
568
    return int(x/factor) * factor
569

570

571
def TestGte():
572
    """Tests the GapTimeEstimator."""
573
    random.seed(17)
574

575
    xs = [60, 120, 240]
576
    
577
    gap_times = [60, 60, 60, 60, 60, 120, 120, 120, 240, 240]
578

579
    # distribution of gap time (z)
580
    pdf_z = thinkbayes.EstimatedPdf(gap_times)
581
    pmf_z = pdf_z.MakePmf(xs, name="z")
582

583
    wtc = WaitTimeCalculator(pmf_z, inverse=False)
584

585
    lam = 0.0333
586
    n = 100
587
    passenger_data = wtc.GenerateSamplePassengers(lam, n)
588

589
    pcounts = [0, 0, 0]
590

591
    ite = GapTimeEstimator(xs, pcounts, passenger_data)
592

593
    thinkplot.Clf()
594

595
    # thinkplot.Cdf(wtc.pmf_z.MakeCdf(name="actual z"))    
596
    thinkplot.Cdf(wtc.pmf_zb.MakeCdf(name="actual zb"))
597
    ite.MakePlot()
598

599

600
class WaitMixtureEstimator(object):
601
    """Encapsulates the process of estimating wait time with uncertain lam.
602
    """
603

604
    def __init__(self, wtc, are, num_passengers=15):
605
        """Constructor.
606

607
        wtc: WaitTimeCalculator
608
        are: ArrivalTimeEstimator
609
        num_passengers: number of passengers seen on the platform
610
        """
611
        self.metapmf = thinkbayes.Pmf()
612

613
        for lam, prob in sorted(are.post_lam.Items()):
614
            ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
615
            self.metapmf.Set(ete.pmf_y, prob)
616

617
        self.mixture = thinkbayes.MakeMixture(self.metapmf)
618

619
        lam = are.post_lam.Mean()
620
        ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
621
        self.point = ete.pmf_y
622

623
    def MakePlot(self, root='redline4'):
624
        """Makes a plot showing the mixture."""
625
        thinkplot.Clf()
626

627
        # plot the MetaPmf
628
        for pmf, prob in sorted(self.metapmf.Items()):
629
            cdf = pmf.MakeCdf().Scale(1.0/60)
630
            width = 2/math.log(-math.log(prob))
631
            thinkplot.Plot(cdf.xs, cdf.ps,
632
                           alpha=0.2, linewidth=width, color='blue', 
633
                           label='')
634

635
        # plot the mixture and the distribution based on a point estimate
636
        thinkplot.PrePlot(2)
637
        #thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
638
        thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0/60))
639

640
        thinkplot.Save(root=root,
641
                       xlabel='Wait time (min)',
642
                       ylabel='CDF',
643
                       formats=FORMATS,
644
                       axis=[0,10,0,1])
645

646

647

648
def GenerateSampleData(gap_times, lam=0.0333, n=10):
649
    """Generates passenger data based on actual gap times.
650

651
    gap_times: sequence of float
652
    lam: arrival rate in passengers per second
653
    n: number of simulated observations
654
    """
655
    xs = MakeRange(low=10)
656
    pdf_z = thinkbayes.EstimatedPdf(gap_times)
657
    pmf_z = pdf_z.MakePmf(xs, name="z")
658

659
    wtc = WaitTimeCalculator(pmf_z, inverse=False)
660
    passenger_data = wtc.GenerateSamplePassengers(lam, n)
661
    return wtc, passenger_data
662

663

664
def RandomSeed(x):
665
    """Initialize the random and numpy.random generators.
666

667
    x: int seed
668
    """
669
    random.seed(x)
670
    numpy.random.seed(x)
671
    
672

673
def RunSimpleProcess(gap_times, lam=0.0333, num_passengers=15, plot=True):
674
    """Runs the basic analysis and generates figures.
675

676
    gap_times: sequence of float
677
    lam: arrival rate in passengers per second
678
    num_passengers: int number of passengers on the platform
679
    plot: boolean, whether to generate plots
680

681
    Returns: WaitTimeCalculator, ElapsedTimeEstimator
682
    """
683
    global UPPER_BOUND
684
    UPPER_BOUND = 1200
685

686
    cdf_z = thinkbayes.MakeCdfFromList(gap_times).Scale(1.0/60)
687
    print 'CI z', cdf_z.CredibleInterval(90)
688

689
    xs = MakeRange(low=10)
690

691
    pdf_z = thinkbayes.EstimatedPdf(gap_times)
692
    pmf_z = pdf_z.MakePmf(xs, name="z")
693

694
    wtc = WaitTimeCalculator(pmf_z, inverse=False)    
695

696
    if plot:
697
        wtc.PlotPmfs()
698
        wtc.MakePlot()
699

700
    ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
701

702
    if plot:
703
        ete.MakePlot()
704

705
    return wtc, ete
706

707

708
def RunMixProcess(gap_times, lam=0.0333, num_passengers=15, plot=True):
709
    """Runs the analysis for unknown lambda.
710

711
    gap_times: sequence of float
712
    lam: arrival rate in passengers per second
713
    num_passengers: int number of passengers on the platform
714
    plot: boolean, whether to generate plots
715

716
    Returns: WaitMixtureEstimator
717
    """
718
    global UPPER_BOUND
719
    UPPER_BOUND = 1200
720

721
    wtc, _ete = RunSimpleProcess(gap_times, lam, num_passengers)
722

723
    RandomSeed(20)
724
    passenger_data = wtc.GenerateSamplePassengers(lam, n=5)
725

726
    total_y = 0
727
    total_k2 = 0
728
    for k1, y, k2 in passenger_data:
729
        print k1, y/60, k2
730
        total_y += y/60
731
        total_k2 += k2
732
    print total_k2, total_y
733
    print 'Average arrival rate', total_k2 / total_y
734

735
    are = ArrivalRateEstimator(passenger_data)
736

737
    if plot:
738
        are.MakePlot()
739

740
    wme = WaitMixtureEstimator(wtc, are, num_passengers)
741

742
    if plot:
743
        wme.MakePlot()
744

745
    return wme
746

747

748
def RunLoop(gap_times, nums, lam=0.0333):
749
    """Runs the basic analysis for a range of num_passengers.
750

751
    gap_times: sequence of float
752
    nums: sequence of values for num_passengers
753
    lam: arrival rate in passengers per second
754

755
    Returns: WaitMixtureEstimator
756
    """
757
    global UPPER_BOUND
758
    UPPER_BOUND = 4000
759

760
    thinkplot.Clf()
761

762
    RandomSeed(18)
763

764
    # resample gap_times
765
    n = 220
766
    cdf_z = thinkbayes.MakeCdfFromList(gap_times)
767
    sample_z = cdf_z.Sample(n)
768
    pmf_z = thinkbayes.MakePmfFromList(sample_z)
769

770
    # compute the biased pmf and add some long delays
771
    cdf_zp = BiasPmf(pmf_z).MakeCdf()
772
    sample_zb = cdf_zp.Sample(n) + [1800, 2400, 3000]
773

774
    # smooth the distribution of zb
775
    pdf_zb = thinkbayes.EstimatedPdf(sample_zb)
776
    xs = MakeRange(low=60)
777
    pmf_zb = pdf_zb.MakePmf(xs)
778

779
    # unbias the distribution of zb and make wtc
780
    pmf_z = UnbiasPmf(pmf_zb)
781
    wtc = WaitTimeCalculator(pmf_z)
782

783
    probs = []
784
    for num_passengers in nums:
785
        ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
786

787
        # compute the posterior prob of waiting more than 15 minutes
788
        cdf_y = ete.pmf_y.MakeCdf()
789
        prob = 1 - cdf_y.Prob(900)
790
        probs.append(prob)
791

792
        # thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers)))
793
    
794
    thinkplot.Plot(nums, probs)
795
    thinkplot.Save(root='redline5',
796
                   xlabel='Num passengers',
797
                   ylabel='P(y > 15 min)',
798
                   formats=FORMATS,
799
                   )
800

801

802
def main(script):
803
    RunLoop(OBSERVED_GAP_TIMES, nums=[0, 5, 10, 15, 20, 25, 30, 35])
804
    RunMixProcess(OBSERVED_GAP_TIMES)
805
    
806

807
if __name__ == '__main__':
808
    main(*sys.argv)
809

810
Product

Resources

Company