CoCalc -- 05-linear-model-and-neural-net-from-scratch.ipynb

GitHub Repository: fastai/course22
Path: blob/master/clean/05-linear-model-and-neural-net-from-scratch.ipynb
⁸⁰⁷ views

Kernel: python3

Introduction

In [ ]:

import os
from pathlib import Path

iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if iskaggle: path = Path('../input/titanic')
else:
    path = Path('titanic')
    if not path.exists():
        import zipfile,kaggle
        kaggle.api.competition_download_cli(str(path))
        zipfile.ZipFile(f'{path}.zip').extractall(path)

In [ ]:

import torch, numpy as np, pandas as pd
np.set_printoptions(linewidth=140)
torch.set_printoptions(linewidth=140, sci_mode=False, edgeitems=7)
pd.set_option('display.width', 140)

Cleaning the data

In [ ]:

df = pd.read_csv(path/'train.csv')
df

In [ ]:

df.isna().sum()

In [ ]:

modes = df.mode().iloc[0]
modes

In [ ]:

df.fillna(modes, inplace=True)

In [ ]:

df.isna().sum()

In [ ]:

import numpy as np

df.describe(include=(np.number))

In [ ]:

df['Fare'].hist();

In [ ]:

df['LogFare'] = np.log(df['Fare']+1)

In [ ]:

df['LogFare'].hist();

In [ ]:

pclasses = sorted(df.Pclass.unique())
pclasses

In [ ]:

df.describe(include=[object])

In [ ]:

df = pd.get_dummies(df, columns=["Sex","Pclass","Embarked"])
df.columns

In [ ]:

added_cols = ['Sex_male', 'Sex_female', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S']
df[added_cols].head()

In [ ]:

from torch import tensor

t_dep = tensor(df.Survived)

In [ ]:

indep_cols = ['Age', 'SibSp', 'Parch', 'LogFare'] + added_cols

t_indep = tensor(df[indep_cols].values, dtype=torch.float)
t_indep

In [ ]:

t_indep.shape

Setting up a linear model

In [ ]:

torch.manual_seed(442)

n_coeff = t_indep.shape[1]
coeffs = torch.rand(n_coeff)-0.5
coeffs

In [ ]:

t_indep*coeffs

In [ ]:

vals,indices = t_indep.max(dim=0)
t_indep = t_indep / vals

In [ ]:

t_indep*coeffs

In [ ]:

preds = (t_indep*coeffs).sum(axis=1)

In [ ]:

preds[:10]

In [ ]:

loss = torch.abs(preds-t_dep).mean()
loss

In [ ]:

def calc_preds(coeffs, indeps): return (indeps*coeffs).sum(axis=1)
def calc_loss(coeffs, indeps, deps): return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

Doing a gradient descent step

In [ ]:

coeffs.requires_grad_()

In [ ]:

loss = calc_loss(coeffs, t_indep, t_dep)
loss

In [ ]:

loss.backward()

In [ ]:

coeffs.grad

In [ ]:

loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
coeffs.grad

In [ ]:

loss = calc_loss(coeffs, t_indep, t_dep)
loss.backward()
with torch.no_grad():
    coeffs.sub_(coeffs.grad * 0.1)
    coeffs.grad.zero_()
    print(calc_loss(coeffs, t_indep, t_dep))

Training the linear model

In [ ]:

from fastai.data.transforms import RandomSplitter
trn_split,val_split=RandomSplitter(seed=42)(df)

In [ ]:

trn_indep,val_indep = t_indep[trn_split],t_indep[val_split]
trn_dep,val_dep = t_dep[trn_split],t_dep[val_split]
len(trn_indep),len(val_indep)

In [ ]:

def update_coeffs(coeffs, lr):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [ ]:

def one_epoch(coeffs, lr):
    loss = calc_loss(coeffs, trn_indep, trn_dep)
    loss.backward()
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end="; ")

In [ ]:

def init_coeffs(): return (torch.rand(n_coeff)-0.5).requires_grad_()

In [ ]:

def train_model(epochs=30, lr=0.01):
    torch.manual_seed(442)
    coeffs = init_coeffs()
    for i in range(epochs): one_epoch(coeffs, lr=lr)
    return coeffs

In [ ]:

coeffs = train_model(18, lr=0.2)

In [ ]:

def show_coeffs(): return dict(zip(indep_cols, coeffs.requires_grad_(False)))
show_coeffs()

Measuring accuracy

In [ ]:

preds = calc_preds(coeffs, val_indep)

In [ ]:

results = val_dep.bool()==(preds>0.5)
results[:16]

In [ ]:

results.float().mean()

In [ ]:

def acc(coeffs): return (val_dep.bool()==(calc_preds(coeffs, val_indep)>0.5)).float().mean()
acc(coeffs)

Using sigmoid

In [ ]:

preds[:28]

In [ ]:

import sympy
sympy.plot("1/(1+exp(-x))", xlim=(-5,5));

In [ ]:

def calc_preds(coeffs, indeps): return torch.sigmoid((indeps*coeffs).sum(axis=1))

In [ ]:

coeffs = train_model(lr=100)

In [ ]:

acc(coeffs)

In [ ]:

show_coeffs()

Submitting to Kaggle

In [ ]:

tst_df = pd.read_csv(path/'test.csv')

In [ ]:

tst_df['Fare'] = tst_df.Fare.fillna(0)

In [ ]:

tst_df.fillna(modes, inplace=True)
tst_df['LogFare'] = np.log(tst_df['Fare']+1)
tst_df = pd.get_dummies(tst_df, columns=["Sex","Pclass","Embarked"])

tst_indep = tensor(tst_df[indep_cols].values, dtype=torch.float)
tst_indep = tst_indep / vals

In [ ]:

tst_df['Survived'] = (calc_preds(tst_indep, coeffs)>0.5).int()

In [ ]:

sub_df = tst_df[['PassengerId','Survived']]
sub_df.to_csv('sub.csv', index=False)

In [ ]:

!head sub.csv

Using matrix product

In [ ]:

(val_indep*coeffs).sum(axis=1)

In [ ]:

val_indep@coeffs

In [ ]:

def calc_preds(coeffs, indeps): return torch.sigmoid(indeps@coeffs)

In [ ]:

def init_coeffs(): return (torch.rand(n_coeff, 1)*0.1).requires_grad_()

In [ ]:

trn_dep = trn_dep[:,None]
val_dep = val_dep[:,None]

In [ ]:

coeffs = train_model(lr=100)

In [ ]:

acc(coeffs)

A neural network

In [ ]:

def init_coeffs(n_hidden=20):
    layer1 = (torch.rand(n_coeff, n_hidden)-0.5)/n_hidden
    layer2 = torch.rand(n_hidden, 1)-0.3
    const = torch.rand(1)[0]
    return layer1.requires_grad_(),layer2.requires_grad_(),const.requires_grad_()

In [ ]:

import torch.nn.functional as F

def calc_preds(coeffs, indeps):
    l1,l2,const = coeffs
    res = F.relu(indeps@l1)
    res = res@l2 + const
    return torch.sigmoid(res)

In [ ]:

def update_coeffs(coeffs, lr):
    for layer in coeffs:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [ ]:

coeffs = train_model(lr=1.4)

In [ ]:

coeffs = train_model(lr=20)

In [ ]:

acc(coeffs)

Deep learning

In [ ]:

def init_coeffs():
    hiddens = [10, 10]  # <-- set this to the size of each hidden layer you want
    sizes = [n_coeff] + hiddens + [1]
    n = len(sizes)
    layers = [(torch.rand(sizes[i], sizes[i+1])-0.3)/sizes[i+1]*4 for i in range(n-1)]
    consts = [(torch.rand(1)[0]-0.5)*0.1 for i in range(n-1)]
    for l in layers+consts: l.requires_grad_()
    return layers,consts

In [ ]:

import torch.nn.functional as F

def calc_preds(coeffs, indeps):
    layers,consts = coeffs
    n = len(layers)
    res = indeps
    for i,l in enumerate(layers):
        res = res@l + consts[i]
        if i!=n-1: res = F.relu(res)
    return torch.sigmoid(res)

In [ ]:

def update_coeffs(coeffs, lr):
    layers,consts = coeffs
    for layer in layers+consts:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [ ]:

coeffs = train_model(lr=4)

In [ ]:

acc(coeffs)

Final thoughts

In [ ]:

Introduction

Cleaning the data

Setting up a linear model

Doing a gradient descent step

Training the linear model

Measuring accuracy

Using sigmoid

Submitting to Kaggle

Using matrix product

A neural network

Deep learning

Final thoughts

Product

Resources

Company