CoCalc -- CommitDataForChapter1.ipynb

📚 The CoCalc Library - books, templates and other resources

cocalc-examples / bayesian-methods-for-hackers / sandbox / CommitDataForChapter1.ipynb

¹³²⁹²³ views
License: OTHER

Kernel: Unknown Kernel

To replace the data in chapter 1 with this real time data.

In [6]:

from requests import get
response = get('https://api.github.com/repos/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/stats/commit_activity').json()
weekly_totals = np.array(map(lambda x: x['total'], response))
weekly_totals = weekly_totals[np.where(weekly_totals)[0]]  # gives me 52 weeks, but project started < 1 year ago so it backwards fills with 0s

In [27]:

count_data = weekly_totals
n_count_data = len(weekly_totals)

plt.bar(range(n_count_data), weekly_totals);
print weekly_totals
print n_count_data

Out[27]:

[13 16  7 26  5 46 25  6  4 28 19 22 13 28  7 10  8 15 13  6 36  5  4  8  8
  3  2 11 20  9  1]
31

In [12]:

import pymc as pm


alpha = 1.0 / count_data.mean()  # Recall count_data is the
                               # variable that holds our txt counts

lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)

tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)

In [13]:

@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_count_data)
    out[:tau] = lambda_1  # lambda before tau is lambda1
    out[tau:] = lambda_2  # lambda after tau is lambda2
    return out

In [14]:

observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

model = pm.Model([observation, lambda_1, lambda_2, tau])

In [15]:

# Mysterious code to be explained in Chapter 3.
mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)

Out[15]:

[****************100%******************]  40000 of 40000 complete

In [16]:

lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]
tau_samples = mcmc.trace('tau')[:]

In [29]:

figsize(12.5, 10)
# histogram of the samples:

ax = plt.subplot(311)
ax.set_autoscaley_on(False)

plt.hist(lambda_1_samples, histtype='stepfilled', bins=30, alpha=0.85,
         label="posterior of $\lambda_1$", color="#A60628", normed=True)
plt.legend(loc="upper left")
plt.title(r"Posterior distributions of the variables \
    $\lambda_1,\;\lambda_2,\;\tau$")

plt.xlabel("$\lambda_1$ value")

ax = plt.subplot(312)
ax.set_autoscaley_on(False)

plt.hist(lambda_2_samples, histtype='stepfilled', bins=30, alpha=0.85,
         label="posterior of $\lambda_2$", color="#7A68A6", normed=True)
plt.legend(loc="upper left")

plt.xlabel("$\lambda_2$ value")

plt.subplot(313)


#w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
plt.hist(tau_samples, bins=n_count_data, alpha=1,
         label=r"posterior of $\tau$",
         color="#467821", rwidth=2.)
plt.xticks(np.arange(n_count_data))

plt.legend(loc="upper left")
plt.ylim([0, .75])
plt.xlabel("$\tau$ (in days)")
plt.ylabel("probability")

Out[29]:

<matplotlib.text.Text at 0x1155ed950>

In [21]:

n_count_data

Out[21]:

31

In [28]:

lambda_2_samples

Out[28]:

array([ 7.7774429 ,  7.7774429 ,  7.7774429 , ...,  8.21626815,
        8.21626815,  8.80155957])

In [ ]:

Product

Resources

Company