CoCalc -- Chp_11.ipynb

📚 The CoCalc Library - books, templates and other resources

cocalc-examples / statistical-rethinking-python-PyMC3 / Chp_11.ipynb

¹³²⁹²² views
License: OTHER

Kernel: Python 3

In [1]:

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import scipy as sp
from collections import OrderedDict
from theano import shared, tensor as tt

%config InlineBackend.figure_format = 'retina'
plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])

Code 11.1

In [2]:

trolley_df = pd.read_csv('Data/Trolley.csv', sep=';')
trolley_df.head()

Out[2]:

Code 11.2

In [3]:

ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .plot(kind='bar'))

ax.set_xlabel("response", fontsize=14);
ax.set_ylabel("Frequency", fontsize=14);

Out[3]:

Code 11.3

In [4]:

ax = (trolley_df.response
                .value_counts()
                .sort_index()
                .cumsum()
                .div(trolley_df.shape[0])
                .plot(marker='o'))

ax.set_xlim(0.9, 7.1);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("cumulative proportion", fontsize=14);

Out[4]:

Code 11.4

In [5]:

resp_lco = (trolley_df.response
                      .value_counts()
                      .sort_index()
                      .cumsum()
                      .iloc[:-1]
                      .div(trolley_df.shape[0])
                      .apply(lambda p: np.log(p / (1. - p))))

In [6]:

ax = resp_lco.plot(marker='o')

ax.set_xlim(0.9, 7);
ax.set_xlabel("response", fontsize=14)
ax.set_ylabel("log-cumulative-odds", fontsize=14);

Out[6]:

Code 11.5

The following Ordered transformation is taken from PyMC discourse.

In [7]:

class Ordered(pm.distributions.transforms.ElemwiseTransform):
    name = "ordered"

    def forward(self, x):
        out = tt.zeros(x.shape)
        out = tt.inc_subtensor(out[0], x[0])
        out = tt.inc_subtensor(out[1:], tt.log(x[1:] - x[:-1]))
        return out
    
    def forward_val(self, x, point=None):
        x, = pm.distributions.distribution.draw_values([x], point=point)
        return self.forward(x)

    def backward(self, y):
        out = tt.zeros(y.shape)
        out = tt.inc_subtensor(out[0], y[0])
        out = tt.inc_subtensor(out[1:], tt.exp(y[1:]))
        return tt.cumsum(out)

    def jacobian_det(self, y):
        return tt.sum(y[1:])

In [8]:

with pm.Model() as m11_1:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6, testval=np.arange(6) - 2.5)

    pa = pm.math.sigmoid(a)

    p_cum = tt.concatenate([[0.], pa, [1.]])
    p = p_cum[1:] - p_cum[:-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1)

In [9]:

with m11_1:
    map_11_1 = pm.find_MAP()

Out[9]:

logp = -18,941, ||grad|| = 0.45229: 100%|██████████| 14/14 [00:00<00:00, 23.21it/s] ]

Code 11.6

In [10]:

map_11_1['a']

Out[10]:

array([-1.9160707 , -1.26658298, -0.71862013,  0.24778795,  0.88986631,
        1.76937289])

Code 11.7

In [11]:

sp.special.expit(map_11_1['a'])

Out[11]:

array([ 0.12830038,  0.21984275,  0.32769691,  0.56163196,  0.70886258,
        0.85437967])

Code 11.8

In [12]:

with m11_1:
    trace_11_1 = pm.sample(1000, tune=1000)

Out[12]:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:16<00:00, 160.45it/s]

In [13]:

pm.df_summary(trace_11_1, varnames=['a'], alpha=.11).round(2)

Out[13]:

Code 11.9

In [14]:

def ordered_logistic_proba(a):
    pa = sp.special.expit(a)
    p_cum = np.concatenate(([0.], pa, [1.]))
    
    return p_cum[1:] - p_cum[:-1]

In [15]:

ordered_logistic_proba(trace_11_1['a'].mean(axis=0))

Out[15]:

array([ 0.12753048,  0.09170783,  0.10820073,  0.2341595 ,  0.14767958,
        0.14570807,  0.14501382])

Code 11.10

In [16]:

(ordered_logistic_proba(trace_11_1['a'].mean(axis=0)) \
     * (1 + np.arange(7))).sum()

Out[16]:

4.1999293593514384

Code 11.11

In [17]:

ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5)

Out[17]:

array([ 0.08143763,  0.06409094,  0.08244469,  0.20927244,  0.15948963,
        0.18473514,  0.21852952])

Code 11.12

In [18]:

(ordered_logistic_proba(trace_11_1['a'].mean(axis=0) - 0.5) \
     * (1 + np.arange(7))).sum()

Out[18]:

4.7296090400791702

Code 11.13

In [19]:

action = shared(trolley_df.action.values)
intention = shared(trolley_df.intention.values)
contact = shared(trolley_df.contact.values)

with pm.Model() as m11_2:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    phi = bA * action + bI * intention + bC * contact

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )

In [20]:

with m11_2:
    map_11_2 = pm.find_MAP()

Out[20]:

logp = -18,565, ||grad|| = 3.7472: 100%|██████████| 17/17 [00:00<00:00, 58.64it/s]  

Code 11.14

In [21]:

with pm.Model() as m11_3:
    a = pm.Normal(
        'a', 0., 10.,
        transform=Ordered(),
        shape=6,
        testval=trace_11_1['a'].mean(axis=0)
    )
    
    bA = pm.Normal('bA', 0., 10.)
    bI = pm.Normal('bI', 0., 10.)
    bC = pm.Normal('bC', 0., 10.)
    bAI = pm.Normal('bAI', 0., 10.)
    bCI = pm.Normal('bCI', 0., 10.)
    phi = phi = bA * action + bI * intention + bC * contact \
            + bAI * action * intention \
            + bCI * contact * intention

    pa = pm.math.sigmoid(
        tt.shape_padleft(a) - tt.shape_padright(phi)
    )
    p_cum = tt.concatenate([
        tt.zeros_like(tt.shape_padright(pa[:, 0])),
        pa,
        tt.ones_like(tt.shape_padright(pa[:, 0]))
    ], axis=1)
    p = p_cum[:, 1:] - p_cum[:, :-1]
    
    resp_obs = pm.Categorical(
        'resp_obs', p,
        observed=trolley_df.response - 1
    )

In [22]:

with m11_3:
    map_11_3 = pm.find_MAP()

Out[22]:

logp = -18,489, ||grad|| = 0.91902: 100%|██████████| 26/26 [00:00<00:00, 110.84it/s]  

Code 11.15

In [23]:

def get_coefs(map_est):
    coefs = OrderedDict()
    
    for i, ai in enumerate(map_est['a']):
        coefs['a_{}'.format(i)] = ai
        
    coefs['bA'] = map_est.get('bA', np.nan)
    coefs['bI'] = map_est.get('bI', np.nan)
    coefs['bC'] = map_est.get('bC', np.nan)
    coefs['bAI'] = map_est.get('bAI', np.nan)
    coefs['bCI'] = map_est.get('bCI', np.nan)
        
    return coefs

In [24]:

(pd.DataFrame.from_dict(
    OrderedDict([
        ('m11_1', get_coefs(map_11_1)),
        ('m11_2', get_coefs(map_11_2)),
        ('m11_3', get_coefs(map_11_3))
    ]))
   .astype(np.float64)
   .round(2))

Out[24]:

Code 11.16

In [25]:

with m11_2:
    trace_11_2 = pm.sample(1000, tune=1000)

Out[25]:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [03:19<00:00,  7.63it/s]

In [26]:

with m11_3:
    trace_11_3 = pm.sample(1000, tune=1000)

Out[26]:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [04:16<00:00,  7.37it/s]

In [27]:

comp_df = pm.compare([trace_11_1, trace_11_2, trace_11_3],
                     [m11_1, m11_2, m11_3])

comp_df.loc[:,'model'] = pd.Series(['m11.1', 'm11.2', 'm11.3'])
comp_df = comp_df.set_index('model')
comp_df

Out[27]:

Code 11.17-19

In [28]:

pp_df = pd.DataFrame(
    np.array([[0, 0, 0],
              [0, 0, 1],
              [1, 0, 0],
              [1, 0, 1],
              [0, 1, 0],
              [0, 1, 1]]),
    columns=['action', 'contact', 'intention'])

In [29]:

pp_df

Out[29]:

In [30]:

action.set_value(pp_df.action.values)
contact.set_value(pp_df.contact.values)
intention.set_value(pp_df.intention.values)

with m11_3:
    pp_trace_11_3 = pm.sample_ppc(trace_11_3, samples=1500)

Out[30]:

100%|██████████| 1500/1500 [00:17<00:00, 87.99it/s] 

In [31]:

PP_COLS = ['pp_{}'.format(i) for i, _ in enumerate(pp_trace_11_3['resp_obs'])]

pp_df = pd.concat((pp_df,
                   pd.DataFrame(pp_trace_11_3['resp_obs'].T, columns=PP_COLS)),
                  axis=1)

In [32]:

pp_cum_df = (pd.melt(
                    pp_df,
                    id_vars=['action', 'contact', 'intention'],
                    value_vars=PP_COLS, value_name='resp'
                )
               .groupby(['action', 'contact', 'intention', 'resp'])
               .size()
               .div(1500)
               .rename('proba')
               .reset_index()
               .pivot_table(
                   index=['action', 'contact', 'intention'],
                   values='proba',
                   columns='resp'
                )
               .cumsum(axis=1)
               .iloc[:, :-1])

In [33]:

pp_cum_df

Out[33]:

In [34]:

for (plot_action, plot_contact), plot_df in pp_cum_df.groupby(level=['action', 'contact']):
    fig, ax = plt.subplots(figsize=(8, 6))
    
    ax.plot([0, 1], plot_df, c='C0');
    ax.plot([0, 1], [0, 0], '--', c='C0');
    ax.plot([0, 1], [1, 1], '--', c='C0');
    
    ax.set_xlim(0, 1);
    ax.set_xlabel("intention");
    
    ax.set_ylim(-0.05, 1.05);
    ax.set_ylabel("probability");
    
    ax.set_title(
        "action = {action}, contact = {contact}".format(
            action=plot_action, contact=plot_contact
        )
    );

Out[34]:

Code 11.20

In [35]:

# define parameters
PROB_DRINK = 0.2 # 20% of days
RATE_WORK = 1. # average 1 manuscript per day

# sample one year of production
N = 365

In [36]:

drink = np.random.binomial(1, PROB_DRINK, size=N)
y = (1 - drink) * np.random.poisson(RATE_WORK, size=N)

Code 11.21

In [37]:

drink_zeros = drink.sum()
work_zeros = (y == 0).sum() - drink_zeros

In [38]:

bins = np.arange(y.max() + 1) - 0.5

plt.hist(y, bins=bins);
plt.bar(0., drink_zeros, width=1., bottom=work_zeros, color='C1', alpha=.5);

plt.xticks(bins + 0.5);
plt.xlabel("manuscripts completed");

plt.ylabel("Frequency");

Out[38]:

Code 11.22

In [39]:

with pm.Model() as m11_4:
    ap = pm.Normal('ap', 0., 1.)
    p = pm.math.sigmoid(ap)
    
    al = pm.Normal('al', 0., 10.)
    lambda_ = tt.exp(al)
    
    y_obs = pm.ZeroInflatedPoisson('y_obs', 1. - p, lambda_, observed=y)

In [40]:

with m11_4:
    map_11_4 = pm.find_MAP()

Out[40]:

logp = -462.79, ||grad|| = 0.00047532: 100%|██████████| 12/12 [00:00<00:00, 211.49it/s]s]

In [41]:

map_11_4

Out[41]:

{'al': array(0.05019473453063987), 'ap': array(-1.426975323508402)}

Code 11.23

In [42]:

sp.special.expit(map_11_4['ap']) # probability drink

Out[42]:

0.19357040138820736

In [43]:

np.exp(map_11_4['al']) # rate finish manuscripts, when not drinking

Out[43]:

1.0514758350937541

Code 11.24

In [44]:

def dzip(x, p, lambda_, log=True):
    like = p**(x == 0) + (1 - p) * sp.stats.poisson.pmf(x, lambda_)
    
    return np.log(like) if log else like

Code 11.25

In [45]:

PBAR = 0.5
THETA = 5.

In [46]:

a = PBAR * THETA
b = (1 - PBAR) * THETA

In [47]:

p = np.linspace(0, 1, 100)

plt.plot(p, sp.stats.beta.pdf(p, a, b));

plt.xlim(0, 1);
plt.xlabel("probability");

plt.ylabel("Density");

Out[47]:

Code 11.26

In [48]:

admit_df = pd.read_csv('Data/UCBadmit.csv', sep=';')
admit_df.head()

Out[48]:

In [49]:

with pm.Model() as m11_5:
    a = pm.Normal('a', 0., 2.)
    pbar = pm.Deterministic('pbar', pm.math.sigmoid(a))

    theta = pm.Exponential('theta', 1.)
    
    admit_obs = pm.BetaBinomial(
        'admit_obs',
        pbar * theta, (1. - pbar) * theta,
        admit_df.applications.values,
        observed=admit_df.admit.values
    )

In [50]:

with m11_5:
    trace_11_5 = pm.sample(1000, tune=1000)

Out[50]:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
100%|██████████| 2000/2000 [00:02<00:00, 718.29it/s]

Code 11.27

In [51]:

pm.summary(trace_11_5, alpha=.11).round(2)

Out[51]:

Code 11.28

In [52]:

np.percentile(trace_11_5['pbar'], [2.5, 50., 97.5])

Out[52]:

array([ 0.28260569,  0.40670155,  0.56068546])

Code 11.29

In [53]:

pbar_hat = trace_11_5['pbar'].mean()
theta_hat = trace_11_5['theta'].mean()

p_plot = np.linspace(0, 1, 100)

plt.plot(
    p_plot,
    sp.stats.beta.pdf(p_plot, pbar_hat * theta_hat, (1. - pbar_hat) * theta_hat)
);
plt.plot(
    p_plot,
    sp.stats.beta.pdf(
        p_plot[:, np.newaxis],
        trace_11_5['pbar'][:100] * trace_11_5['theta'][:100],
        (1. - trace_11_5['pbar'][:100]) * trace_11_5['theta'][:100]
    ),
    c='C0', alpha=0.1
);

plt.xlim(0., 1.);
plt.xlabel("probability admit");

plt.ylim(0., 3.);
plt.ylabel("Density");

Out[53]:

Code 11.30

In [54]:

with m11_5:
    pp_trace_11_5 = pm.sample_ppc(trace_11_5)

Out[54]:

100%|██████████| 1000/1000 [00:02<00:00, 355.73it/s]

In [55]:

x_case = np.arange(admit_df.shape[0])

plt.scatter(
    x_case,
    pp_trace_11_5['admit_obs'].mean(axis=0) \
        / admit_df.applications.values
);
plt.scatter(x_case, admit_df.admit / admit_df.applications);

high = np.percentile(pp_trace_11_5['admit_obs'], 95, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, high, marker='x', c='k');

low = np.percentile(pp_trace_11_5['admit_obs'], 5, axis=0) \
        / admit_df.applications.values
plt.scatter(x_case, low, marker='x', c='k');

Out[55]:

Code 11.31

In [56]:

mu = 3.
theta = 1.

x = np.linspace(0, 10, 100)
plt.plot(x, sp.stats.gamma.pdf(x, mu / theta, scale=theta));

Out[56]:

In [57]:

import sys, IPython, scipy, matplotlib, platform
print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__))

Out[57]:

This notebook was createad on a computer x86_64 running debian stretch/sid and using:
Python 3.6.2
IPython 6.1.0
PyMC3 3.2
NumPy 1.13.3
Pandas 0.20.3
SciPy 0.19.1
Matplotlib 2.1.0

Code 11.1

Code 11.2

Code 11.3

Code 11.4

Code 11.5

Code 11.6

Code 11.7

Code 11.8

Code 11.9

Code 11.10

Code 11.11

Code 11.12

Code 11.13

Code 11.14

Code 11.15

Code 11.16

Code 11.17-19

Code 11.20

Code 11.21

Code 11.22

Code 11.23

Code 11.24

Code 11.25

Code 11.26

Code 11.27

Code 11.28

Code 11.29

Code 11.30

Code 11.31

Product

Resources

Company