Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

📚 The CoCalc Library - books, templates and other resources

132916 views
License: OTHER
Kernel: Python 3
%matplotlib inline import pymc3 as pm import numpy as np import pandas as pd from scipy import stats import matplotlib.pyplot as plt import seaborn as sns import statsmodels.formula.api as smf %config InlineBackend.figure_format = 'retina' plt.style.use(['seaborn-colorblind', 'seaborn-darkgrid'])
/home/osvaldo/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters

Code 5.1

# load data d = pd.read_csv('Data/WaffleDivorce.csv', sep=';') # standardize predictor d['MedianAgeMarriage_s'] = (d.MedianAgeMarriage - d.MedianAgeMarriage.mean()) / d.MedianAgeMarriage.std()
with pm.Model() as model_5_1: a = pm.Normal('a', mu=10, sd=10) bA = pm.Normal('bA', mu=0, sd=1) sigma = pm.Uniform('sigma', lower=0, upper=10) # good (default) alternatives for sigma (in this and other models) are # sigma = pm.HalfNormal('sigma', 5) # sigma = pm.HalfCauchy('sigma', 5) # some people recomed avoiding "hard" boundaries unless they have a theoretical/data-based justification, like a correlation that is restricted to be [-1, 1]. mu = pm.Deterministic('mu', a + bA * d.MedianAgeMarriage_s) Divorce = pm.Normal('Divorce', mu=mu, sd=sigma, observed=d.Divorce) trace_5_1 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bA, a] 100%|██████████| 2000/2000 [00:01<00:00, 1027.91it/s]
varnames = ['a', 'bA', 'sigma'] pm.traceplot(trace_5_1, varnames);
Image in a Jupyter notebook

Code 5.2

mu_mean = trace_5_1['mu'] mu_hpd = pm.hpd(mu_mean) plt.plot(d.MedianAgeMarriage_s, d.Divorce, 'C0o') plt.plot(d.MedianAgeMarriage_s, mu_mean.mean(0), 'C2') idx = np.argsort(d.MedianAgeMarriage_s) plt.fill_between(d.MedianAgeMarriage_s[idx], mu_hpd[:,0][idx], mu_hpd[:,1][idx], color='C2', alpha=0.25) plt.xlabel('Meadian Age Marriage', fontsize=14) plt.ylabel('Divorce', fontsize=14);
Image in a Jupyter notebook
Code 5.3
d['Marriage_s'] = (d.Marriage - d.Marriage.mean()) / d.Marriage.std()
with pm.Model() as model_5_2: a = pm.Normal('a', mu=10, sd=10) bA = pm.Normal('bA', mu=0, sd=1) sigma = pm.Uniform('sigma', lower=0, upper=10) mu = pm.Deterministic('mu', a + bA * d.Marriage_s) Divorce = pm.Normal('Divorce', mu=mu, sd=sigma, observed=d.Divorce) trace_5_2 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bA, a] 100%|██████████| 2000/2000 [00:01<00:00, 1166.50it/s]
pm.traceplot(trace_5_2, varnames);
Image in a Jupyter notebook
mu_mean = trace_5_2['mu'] mu_hpd = pm.hpd(mu_mean) d.plot('Marriage_s', 'Divorce', kind='scatter', xlim = (-2, 3)) plt.plot(d.Marriage_s, mu_mean.mean(0), 'C2') idx = np.argsort(d.Marriage_s) plt.fill_between(d.Marriage_s[idx], mu_hpd[:,0][idx], mu_hpd[:,1][idx], color='C2', alpha=0.25);
Image in a Jupyter notebook

Code 5.4

with pm.Model() as model_5_3: a = pm.Normal('a', mu=10, sd=10) bA = pm.Normal('bA', mu=0, sd=1, shape=2) sigma = pm.Uniform('sigma', lower=0, upper=10) mu = pm.Deterministic('mu', a + bA[0] * d.Marriage_s + bA[1] * d.MedianAgeMarriage_s) Divorce = pm.Normal('Divorce', mu=mu, sd=sigma, observed=d.Divorce) trace_5_3 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bA, a] 100%|██████████| 2000/2000 [00:02<00:00, 699.95it/s]
varnames = ['a', 'bA', 'sigma'] pm.traceplot(trace_5_3, varnames);
Image in a Jupyter notebook
pm.summary(trace_5_3, varnames, alpha=.11).round(3)

Code 5.5

pm.forestplot(trace_5_3, varnames=varnames);
Image in a Jupyter notebook

Code 5.6

with pm.Model() as model_5_4: a = pm.Normal('a', mu=10, sd=10) b = pm.Normal('b', mu=0, sd=1) sigma = pm.Uniform('sigma', lower=0, upper=10) mu = pm.Deterministic('mu', a + b * d.MedianAgeMarriage_s) Marriage = pm.Normal('Marriage', mu=mu, sd=sigma, observed=d.Marriage_s) trace_5_4 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, b, a] 100%|██████████| 2000/2000 [00:01<00:00, 1320.20it/s]
varnames = ['a', 'b', 'sigma'] pm.traceplot(trace_5_4, varnames);
Image in a Jupyter notebook

Code 5.7

mu_pred = trace_5_4['mu'].mean(0) residuals = d.Marriage_s - mu_pred

Code 5.8

idx = np.argsort(d.MedianAgeMarriage_s) d.plot('MedianAgeMarriage_s', 'Marriage_s', kind='scatter', xlim = (-3, 3), ylim = (-3, 3)) plt.plot(d.MedianAgeMarriage_s[idx], mu_pred[idx], 'k') plt.vlines(d.MedianAgeMarriage_s, mu_pred, mu_pred + residuals, colors='grey');
Image in a Jupyter notebook

Code 5.9

R_avg = np.linspace(-3, 3, 100) mu_pred = trace_5_3['a'] + trace_5_3['bA'][:,0] * R_avg[:,None] mu_hpd = pm.hpd(mu_pred.T) divorce_hpd = pm.hpd(stats.norm.rvs(mu_pred, trace_5_3['sigma']).T) plt.plot(R_avg, mu_pred.mean(1), 'C0'); plt.fill_between(R_avg, mu_hpd[:,0], mu_hpd[:,1], color='C2', alpha=0.25) plt.fill_between(R_avg, divorce_hpd[:,0], divorce_hpd[:,1], color='C2', alpha=0.25) plt.xlabel('Marriage.s') plt.ylabel('Divorce') plt.title('MedianAgeMarriage_s = 0');
Image in a Jupyter notebook

Code 5.10

R_avg = np.linspace(-3, 3, 100) mu_pred = trace_5_3['a'] + trace_5_3['bA'][:,1] * R_avg[:,None] mu_hpd = pm.hpd(mu_pred.T) divorce_hpd = pm.hpd(stats.norm.rvs(mu_pred, trace_5_3['sigma']).T) plt.plot(R_avg, mu_pred.mean(1), 'C0'); plt.fill_between(R_avg, mu_hpd[:,0], mu_hpd[:,1], color='C2', alpha=0.25) plt.fill_between(R_avg, divorce_hpd[:,0], divorce_hpd[:,1], color='C2', alpha=0.25) plt.xlabel('MedianAgeMarriage.s') plt.ylabel('Divorce') plt.title('Marriage_s = 0');
Image in a Jupyter notebook

Code 5.11

mu_pred = trace_5_3['mu'] mu_hpd = pm.hpd(mu_pred) divorce_pred = pm.sample_ppc(trace_5_3, samples=1000, model=model_5_3)['Divorce'] divorce_hpd = pm.hpd(divorce_pred)
100%|██████████| 1000/1000 [00:00<00:00, 2766.11it/s]

Code 5.12

mu_hpd = pm.hpd(mu_pred, alpha=0.05) plt.errorbar(d.Divorce, divorce_pred.mean(0), yerr=np.abs(divorce_pred.mean(0)-mu_hpd.T) , fmt='C0o') plt.plot(d.Divorce, divorce_pred.mean(0), 'C0o') plt.xlabel('Observed divorce', fontsize=14) plt.ylabel('Predicted divorce', fontsize=14) min_x, max_x = d.Divorce.min(), d.Divorce.max() plt.plot([min_x, max_x], [min_x, max_x], 'k--');
Image in a Jupyter notebook

Code 5.14

plt.figure(figsize=(10,12)) residuals = d.Divorce - mu_pred.mean(0) idx = np.argsort(residuals) y_label = d.Loc[idx] y_points = np.linspace(0, 1, 50) plt.errorbar(residuals[idx], y_points, xerr=np.abs(divorce_pred.mean(0)-mu_hpd.T), fmt='C0o',lw=3) plt.errorbar(residuals[idx], y_points, xerr=np.abs(divorce_pred.mean(0)-divorce_hpd.T), fmt='C0o', lw=3, alpha=0.5) plt.yticks(y_points, y_label); plt.vlines(0, 0, 1, 'grey');
Image in a Jupyter notebook

Code 5.15

N = 100 x_real = stats.norm.rvs(size=N) x_spur = stats.norm.rvs(x_real) y = stats.norm.rvs(x_real) d = pd.DataFrame([y, x_real, x_spur]).T sns.pairplot(d);
Image in a Jupyter notebook

Code 5.16

d = pd.read_csv('Data/milk.csv', sep=';') d.head()

Code 5.17 to 5.20

dcc = d.dropna().copy()
with pm.Model() as model_5_5: a = pm.Normal('a', mu=10, sd=100) bn = pm.Normal('bn', mu=0, sd=1) sigma = pm.Uniform('sigma', lower=0, upper=1) mu = pm.Deterministic('mu', a + bn * dcc['neocortex.perc']) kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=dcc['kcal.per.g']) trace_5_5 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bn, a] 100%|██████████| 2000/2000 [00:09<00:00, 217.22it/s] There were 4 divergences after tuning. Increase `target_accept` or reparameterize. The number of effective samples is smaller than 25% for some parameters.
varnames = ['a', 'bn', 'sigma'] pm.traceplot(trace_5_5, varnames);
Image in a Jupyter notebook

Code 5.21

pm.summary(trace_5_5, varnames, alpha=.11).round(3)

Code 5.22

trace_5_5['bn'].mean() * (76 - 55)
0.09955421253996859

Code 5.23

seq = np.linspace(55, 76, 50) mu_pred = trace_5_5['a'] + trace_5_5['bn'] * seq[:,None] mu_hpd = pm.hpd(mu_pred.T) plt.plot(d['neocortex.perc'], d['kcal.per.g'], 'C0o') plt.plot(seq, mu_pred.mean(1), 'k') plt.plot(seq, mu_hpd[:,0], 'k--') plt.plot(seq, mu_hpd[:,1], 'k--') plt.xlabel('neocortex.perc', fontsize=14) plt.ylabel('kcal.per.g', fontsize=14);
Image in a Jupyter notebook

Code 5.24

dcc['log_mass'] = np.log(dcc['mass'])

Code 5.25

with pm.Model() as model_5_6: a = pm.Normal('a', mu=10, sd=100) bn = pm.Normal('bn', mu=0, sd=1) sigma = pm.Uniform('sigma', lower=0, upper=1) mu = pm.Deterministic('mu', a + bn * dcc['log_mass']) kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=dcc['kcal.per.g']) trace_5_6 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bn, a] 100%|██████████| 2000/2000 [00:01<00:00, 1062.03it/s]
pm.summary(trace_5_6, varnames, alpha=.11).round(3)

Code 5.26

with pm.Model() as model_5_7: a = pm.Normal('a', mu=10, sd=100) bn = pm.Normal('bn', mu=0, sd=1, shape=2) sigma = pm.Uniform('sigma', lower=0, upper=1) mu = pm.Deterministic('mu', a + bn[0] * dcc['neocortex.perc'] + bn[1] * dcc['log_mass']) kcal = pm.Normal('kcal', mu=mu, sd=sigma, observed=dcc['kcal.per.g']) trace_5_7 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bn, a] 100%|██████████| 2000/2000 [00:16<00:00, 123.40it/s] There were 3 divergences after tuning. Increase `target_accept` or reparameterize. There were 11 divergences after tuning. Increase `target_accept` or reparameterize. The number of effective samples is smaller than 25% for some parameters.
pm.summary(trace_5_7, varnames, alpha=.11).round(3)

Code 5.27

seq = np.linspace(55, 76, 50) mu_pred = trace_5_7['a'] + trace_5_7['bn'][:,0] * seq[:,None] + trace_5_7['bn'][:,1] * dcc['log_mass'].mean() mu_hpd = pm.hpd(mu_pred.T) plt.plot(seq, mu_pred.mean(1), 'k') plt.plot(seq, mu_hpd[:,0], 'k--') plt.plot(seq, mu_hpd[:,1], 'k--') plt.xlabel('neocortex.perc') plt.ylabel('kcal.per.g');
Image in a Jupyter notebook

Code 5.28

N = 100 rho = 0.7 x_pos = stats.norm.rvs(size=N) x_neg = stats.norm.rvs(rho*x_pos, (1-rho**2)**0.5) y = stats.norm.rvs(x_pos - x_neg) d = pd.DataFrame([y, x_real, x_spur]).T sns.pairplot(d);
Image in a Jupyter notebook

Code 5.29

N = 100 height = stats.norm.rvs(size=N, loc=10, scale=2) leg_prop = stats.uniform.rvs(size=N, loc=0.4, scale=0.1) leg_left = leg_prop * height + stats.norm.rvs(size=N, loc=0, scale=0.02) leg_right = leg_prop * height + stats.norm.rvs(size=N, loc=0, scale=0.02)

Code 5.30

with pm.Model() as m5_8: a = pm.Normal('a', mu=10, sd=100) bl = pm.Normal('bl', mu=2, sd=10) br = pm.Normal('br', mu=2, sd=10) mu = pm.Deterministic('mu', a + bl * leg_left + br * leg_right) sigma = pm.Uniform('sigma', lower=0 , upper=10) height_p = pm.Normal('height_p', mu=mu, sd=sigma, observed=height) trace_5_8 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, br, bl, a] 100%|██████████| 2000/2000 [01:49<00:00, 18.31it/s]
varnames=['a', 'bl', 'br', 'sigma'] pm.traceplot(trace_5_8, varnames); pm.summary(trace_5_8, varnames, alpha=.11).round(3)
Image in a Jupyter notebook

Code 5.31

pm.forestplot(trace_5_8, varnames=varnames);
Image in a Jupyter notebook

Code 5.32

plt.scatter(trace_5_8['br'], trace_5_8['bl']);
Image in a Jupyter notebook

Code 5.33

sum_blbr = trace_5_8['br'] + trace_5_8['bl'] pm.kdeplot(sum_blbr);
Image in a Jupyter notebook

Code 5.34

with pm.Model() as m5_9: a = pm.Normal('a',mu = 10, sd=100) bl = pm.Normal('bl',mu=2, sd= 10) mu = pm.Deterministic('mu',a + bl * leg_left) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) height = pm.Normal('height',mu=mu, sd=sigma, observed=height) trace_5_9 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bl, a] 100%|██████████| 2000/2000 [00:04<00:00, 497.31it/s]
varnames_1 = ['a', 'bl', 'sigma'] #pm.traceplot(trace_5_9, varnames_1) pm.summary(trace_5_9, varnames_1, alpha=.11).round(3)

Code 5.35

milk = pd.read_csv('Data/milk.csv', sep=';') milk.head()

Code 5.36

with pm.Model() as m5_10: a = pm.Normal('a',mu = 0.6, sd=10) bf = pm.Normal('bf',mu=0, sd= 1) mu = pm.Deterministic('mu',a + bf * milk['perc.fat']) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) kcalperg = pm.Normal('kcal.per.g',mu=mu, sd=sigma, observed=milk['kcal.per.g']) trace_5_10 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bf, a] 100%|██████████| 2000/2000 [00:03<00:00, 584.32it/s]
varnames = ['a', 'bf', 'sigma'] pm.summary(trace_5_10, varnames, alpha=.11).round(3)
with pm.Model() as m5_11: a = pm.Normal('a',mu = 0.6, sd=10) bl = pm.Normal('bl',mu=0, sd= 1) mu = pm.Deterministic('mu',a + bl * milk['perc.lactose']) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) kcalperg = pm.Normal('kcal.per.g',mu=mu, sd=sigma, observed=milk['kcal.per.g']) trace_5_11 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bl, a] 100%|██████████| 2000/2000 [00:04<00:00, 422.39it/s]
varnames = ['a', 'bl', 'sigma'] pm.summary(trace_5_11, varnames, alpha=.11).round(3)

Code 5.37

with pm.Model() as m5_12: a = pm.Normal('a',mu = 0.6, sd=10) bf = pm.Normal('bf',mu=0, sd= 1) bl = pm.Normal('bl',mu=0, sd= 1) mu = pm.Deterministic('mu',a + bf * milk['perc.fat'] + bl * milk['perc.lactose']) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) kcalperg = pm.Normal('kcal.per.g',mu=mu, sd=sigma, observed=milk['kcal.per.g']) trace_5_12 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bl, bf, a] 100%|██████████| 2000/2000 [00:14<00:00, 137.25it/s] The acceptance probability does not match the target. It is 0.8913043977304509, but should be close to 0.8. Try to increase the number of tuning steps. There were 10 divergences after tuning. Increase `target_accept` or reparameterize. The acceptance probability does not match the target. It is 0.7166625089873978, but should be close to 0.8. Try to increase the number of tuning steps. The number of effective samples is smaller than 25% for some parameters.
varnames = ['a', 'bf', 'bl', 'sigma'] pm.summary(trace_5_12, varnames, alpha=.11).round(3)

Code 5.38

df = milk[['kcal.per.g','perc.fat','perc.lactose']] sns.pairplot(df);
Image in a Jupyter notebook

Code 5.39

milk.corr()['perc.fat']['perc.lactose']
-0.9416373456839282

Code 5.40

def simcoll(r = 0.9): milk['x'] = stats.norm.rvs(size=len(milk), loc = r * milk['perc.fat'], scale = np.sqrt((1 - r**2) * milk['perc.fat'].var())) X = np.column_stack((milk['perc.fat'], milk['x'])) m = smf.OLS(milk['kcal.per.g'], X).fit() cov = m.cov_params() return (np.diag(cov)[1])**0.5 def repsimcoll(r= 0.9, N = 100): stddev = [simcoll(r) for _ in range(N)] return np.mean(stddev) lista = [] for i in np.arange(start = 0, stop = 0.99, step = 0.01): lista.append(repsimcoll (r= i, N = 100)) plt.plot(np.arange(start = 0, stop = 0.99, step = 0.01), lista) plt.xlabel('correlation', fontsize=14) plt.ylabel('stddev', fontsize=14);
Image in a Jupyter notebook

Code 5.41

# number of plants N = 100 # simulate initial heights h0 = stats.norm.rvs(size = N, loc = 10, scale = 2) # assign treatments and simulate fungus and growth treatment = np.repeat([0, 1], [N/2]*2) fungus = np.random.binomial(n=1, p=(0.5-treatment * 0.4), size=N) h1 = h0 + stats.norm.rvs(size= N, loc= 5- 3*fungus, scale=1) # compose a clean data frame d = pd.DataFrame({'h0': h0, 'h1': h1, 'Treatment':treatment, 'Fungus': fungus})

Code 5.42

with pm.Model() as m5_13: a = pm.Normal('a',mu = 0, sd=100) bh = pm.Normal('bh',mu = 0, sd=10) bt = pm.Normal('bt',mu = 0, sd=10) bf = pm.Normal('bf',mu = 0, sd=10) mu = pm.Deterministic('mu',a + bh * h0 + bt * treatment + bf * fungus) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) h1 = pm.Normal('h1', mu = mu, sd=sigma, observed = d['h1'].get_values()) trace_5_13 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bf, bt, bh, a] 100%|██████████| 2000/2000 [00:09<00:00, 219.22it/s]
varnames = ['a', 'bh', 'bt', 'bf', 'sigma'] pm.summary(trace_5_13, varnames, alpha=.11).round(3)

Code 5.43

with pm.Model() as m5_14: a = pm.Normal('a',mu = 0, sd=100) bh = pm.Normal('bh',mu = 0, sd=10) bt = pm.Normal('bt',mu = 0, sd=10) mu = pm.Deterministic('mu',a + bh * h0 + bt * treatment) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) h1 = pm.Normal('h1', mu = mu, sd=sigma, observed =d['h1']) trace_5_14 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bt, bh, a] 100%|██████████| 2000/2000 [00:06<00:00, 323.02it/s] The acceptance probability does not match the target. It is 0.9031963539075056, but should be close to 0.8. Try to increase the number of tuning steps.
varnames = ['a', 'bh', 'bt', 'sigma'] pm.summary(trace_5_14, varnames, alpha=.11).round(3)

Code 5.44

d = pd.read_csv('Data/Howell1.csv', sep=';') d.head()

Code 5.45

with pm.Model() as m5_15: a = pm.Normal('a',mu = 178, sd=100) bm = pm.Normal('bm',mu = 0, sd=10) mu = pm.Deterministic('mu',a + bm * d['male']) sigma = pm.Uniform('sigma', lower= 0 , upper= 50) height = pm.Normal('height', mu = mu, sd=sigma, observed = d['height']) trace_5_15 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, bm, a] 100%|██████████| 2000/2000 [00:02<00:00, 838.72it/s]
varnames = ['a', 'bm', 'sigma'] pm.summary(trace_5_15, varnames, alpha=.11).round(3)

Code 5.46

mu.male = trace_5_15['a'] + trace_5_15['bm'] pm.hpd(mu.male)
array([138.85167759, 145.38548207])

Code 5.47

with pm.Model() as m5_15b: af = pm.Normal('af',mu = 178, sd=100) am = pm.Normal('am',mu = 178, sd=100) mu = pm.Deterministic('mu',af * (1 - d['male']) + am * d['male']) sigma = pm.Uniform('sigma', lower= 0 , upper= 50) height = pm.Normal('height', mu = mu, sd=sigma, observed = d['height']) trace_5_15b = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, am, af] 100%|██████████| 2000/2000 [00:01<00:00, 1144.87it/s]

Code 5.48

d = pd.read_csv('Data/milk.csv', sep=';') d = d.drop_duplicates()

Code 5.49

d['clade.NWM'] = np.where( d['clade'] == 'New World Monkey', 1, 0) d['clade.NWM'].get_values()
array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

Code 5.50

d['clade.OWM'] = np.where( d['clade'] == 'Old World Monkey', 1, 0) d['clade.S'] = np.where( d['clade'] == 'Strepsirrhine', 1, 0)

Code 5.51

with pm.Model() as m5_16: a = pm.Normal('a', mu = 0.6, sd=10) b_NWM = pm.Normal('b_NWM',mu = 0, sd=1) b_OWM = pm.Normal('b_OWM',mu = 0, sd=1) b_S = pm.Normal('b_S',mu = 0, sd=1) mu = pm.Deterministic('mu', a + b_NWM * d['clade.NWM'] + b_OWM * d['clade.OWM'] + b_S * d['clade.S']) # instead of adding this as a deterministic when running the model # it is possible to add them, after sampling using something like this # trace_5_16.add_values({'mu_NWM', trace_5_16[a] + trace_5_16['b_NWM']}) mu_ape = pm.Deterministic('mu_ape', a + 0) mu_NWM = pm.Deterministic('mu_NWM', a + b_NWM) mu_OWM = pm.Deterministic('mu_OWM', a + b_OWM) mu_S = pm.Deterministic('mu_S', a + b_S) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) kcal_per_g = pm.Normal('kcal_per_g', mu = mu, sd=sigma, observed = d['kcal.per.g']) trace_5_16 = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, b_S, b_OWM, b_NWM, a] 100%|██████████| 2000/2000 [00:02<00:00, 689.23it/s]
varnames = ['a', 'b_NWM', 'b_OWM', 'b_S', 'sigma'] pm.summary(trace_5_16, varnames, alpha=.11).round(3)

Code 5.52

varnames = ['mu_ape', 'mu_NWM', 'b_OWM', 'b_S'] pm.summary(trace_5_16, varnames, alpha=.11).round(3)[['mean', 'sd', 'hpd_5.5', 'hpd_94.5']]

Code 5.53

diff_NMW_OWM = trace_5_16['mu_NWM'] - trace_5_16['mu_OWM'] np.percentile(diff_NMW_OWM, 2.5), np.percentile(diff_NMW_OWM, 50), np.percentile(diff_NMW_OWM, 97.5)
(-0.20627036770774942, -0.07802395845314053, 0.05473488710942071)

Code 5.54

z = pd.Categorical(d['clade']) d['clade_id'] = z.codes

Code 5.55

with pm.Model() as m5_16_alt: a = pm.Normal('a',mu = 0.6, sd=10) mu = pm.Deterministic('mu', a) sigma = pm.Uniform('sigma', lower= 0 , upper= 10) kcal_per_g = pm.Normal('kcal_per_g', mu = mu, sd=sigma, observed = d['kcal.per.g']) trace_5_16_alt = pm.sample(1000, tune=1000)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (2 chains in 2 jobs) NUTS: [sigma_interval__, mu] 100%|██████████| 2000/2000 [00:01<00:00, 1755.60it/s]
varnames = ['mu', 'sigma'] pm.summary(trace_5_16_alt, varnames, alpha=.11).round(3)

The following cells (5.56-5.61) correspond to example code for the use of R's function: lm. Therefore they have no output.

Code 5.62

data = pd.read_csv('Data/cars.csv', sep=',') pm.GLM.from_formula('dist ~ speed', data=data)
Intercept∼Flat()speed∼Normal(mu=0, sd=1000.0)sd∼HalfCauchy(beta=10)y∼Normal(mu=f(f(array, f(Intercept, speed)), f()), sd=f(sd))\begin{array}{rcl} \text{Intercept} &\sim & \text{Flat}()\\\text{speed} &\sim & \text{Normal}(\mathit{mu}=0,~\mathit{sd}=1000.0)\\\text{sd} &\sim & \text{HalfCauchy}(\mathit{beta}=10)\\\text{y} &\sim & \text{Normal}(\mathit{mu}=f(f(array,~f(\text{Intercept},~\text{speed})),~f()),~\mathit{sd}=f(\text{sd})) \end{array}
import sys, IPython, scipy, matplotlib, platform print("This notebook was createad on a computer %s running %s and using:\nPython %s\nIPython %s\nPyMC3 %s\nNumPy %s\nPandas %s\nSciPy %s\nMatplotlib %s\nSeaborn %s\n" % (platform.machine(), ' '.join(platform.linux_distribution()[:2]), sys.version[:5], IPython.__version__, pm.__version__, np.__version__, pd.__version__, scipy.__version__, matplotlib.__version__, sns.__version__))
This notebook was createad on a computer x86_64 running debian stretch/sid and using: Python 3.6.3 IPython 6.2.1 PyMC3 3.3 NumPy 1.14.1 Pandas 0.22.0 SciPy 1.0.0 Matplotlib 2.1.2 Seaborn 0.8.1