After solving all the questions in the exam save your notebook with the name username.ipynb
and submit it to: https://www.dropbox.com/request/KN8GwdAIi0Hl2jk2mg2E
The following code implements a simple one-neuron neural network:
import numpy as np
import pylab as pl
%matplotlib inline
def sigmoid(x):
return 1.0/(1.0 + np.exp(-x))
def predict(w, x):
return sigmoid(np.dot(w[:2], np.array(x).T) + w[2])
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [0, 1, 1, 0]
w = np.array([-10, -10, 5])
predict(w, X)
Find a weight vector such that the neural network calculates the NOR function:
$$f(x,y)=\neg(x\vee y)$$Use the following function to test your answer:
def test_prediction(X, Y, w):
epsilon = 0.001
for i, x in enumerate(X):
print predict(w, x)
if np.abs(predict(w, x) - Y[i]) > epsilon:
raise Exception("Prediction error")
return True
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [1, 0, 0 ,0]
w = np.array([ -15, -15, 7])
test_prediction(X, Y, w)
The following function calculates the loss function of the neural network
def loss(w, x, y):
return ((predict(w, x) - y) ** 2) / 2
Write a function that calculates the gradient of the loss with respect to the weights:
$$ \frac{\partial E}{\partial w} $$def de_dw(w, x, y):
delta = np.zeros(len(w))
r = predict(w,x)
out_error = y - r
# update weights
delta[0] = -out_error*r*(1-r)*x[0]
delta[1] = -out_error*r*(1-r)*x[1]
delta[2] = -out_error*r*(1-r)
return delta
X = np.array([0, 0])
Y = [1]
w = np.array([0, 0, 0])
de_dw(w,X,Y)
Use the following functions to test your code:
def num_de_dw(w, x, y, epsilon):
deltas = np.identity(len(w)) * epsilon
de = np.zeros(len(w))
for i in range(len(w)):
de[i] = (loss(w + deltas[i, :], x, y) - loss(w - deltas[i, :], x, y)) / (2 * epsilon)
return de
def test_de_dw():
num_tests = 100
epsilon = 0.01
for i in range(num_tests):
tw = np.random.randn(3)
tx = np.random.randn(2)
ty = np.random.randn(1)
if np.linalg.norm(de_dw(tw, tx,ty) - num_de_dw(tw, tx, ty, epsilon)) > epsilon:
raise Exception("de_dw test failed!")
test_de_dw()
Now, we can use the gradient function to train the neural network using gradient descent
def evaluate(w, X, Y):
result = 0
for i, x in enumerate(X):
result += loss(w, x, Y[i])
return result
def train(X, Y, epochs, eta, w_ini):
losses = []
w = w_ini
for i in range(epochs):
delta = np.zeros(len(w))
for i, x in enumerate(X):
delta += de_dw(w, x, Y[i])
w = w - eta * delta
losses.append(evaluate(w, X, Y))
return w, losses
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [0, 0, 0, 1]
w, losses = train(X, Y, 50, 1, [0, 0, 0])
pl.plot(losses)
print w
print predict(w, X)
Now we will modify the loss function to include a regularization term: $$ E(w,D)=\frac{1}{2}\sum_{(x_{i},y_{i})\in D}(f(w,x_{i})-y_{i})^{2}+\frac{\text{1}}{2}\beta\left\Vert w\right\Vert _{2}^{2}$$
where $f(w,x_{i})$ is the prediction calculated by the neural network.
To accomplish this you must modify the following functions:
def reg_loss(w, beta, X, y):
return (((predict(w, x) - y) ** 2) / 2 ) + 0.5*beta*(np.linalg.norm(w)**2)
def reg_de_dw(w, beta, x, y):
delta = np.zeros(len(w))
r = predict(w,x)
out_error = y - r + beta*np.linalg.norm(w)
# update weights
delta[0] = -out_error*r*(1-r)*x[0]
delta[1] = -out_error*r*(1-r)*x[1]
delta[2] = -out_error*r*(1-r)
# put your code he
return delta
X = [0, 0]
Y = [1]
w = np.array([-20, -20, 20])
print reg_loss(w, 1, X, Y)
print reg_de_dw(w, 1, X, Y)
You can use the following functions to test your code:
def reg_num_de_dw(w, beta, x, y, epsilon):
deltas = np.identity(len(w)) * epsilon
de = np.zeros(len(w))
for i in range(len(w)):
de[i] = (reg_loss(w + deltas[i, :], beta, x, y) - reg_loss(w - deltas[i, :], beta, x, y)) / (2 * epsilon)
return de
def reg_test_de_dw():
num_tests = 100
epsilon = 0.9
beta = 1
for i in range(num_tests):
tw = np.random.randn(3)
tx = np.random.randn(2)
ty = np.random.randn(1)
print np.linalg.norm(reg_de_dw(tw, beta, tx, ty)
if np.linalg.norm(reg_de_dw(tw, beta, tx, ty) - reg_num_de_dw(tw, beta, tx, ty, epsilon)) > epsilon:
raise Exception("reg_de_dw test failed!")
print reg_test_de_dw()
Now train the neural network using regularization:
def evaluate(w, beta, X, Y):
result = 0
for i, x in enumerate(X):
result += reg_loss(w, beta, X, Y[i])
return result
def reg_train(X, Y, epochs, eta, w_ini, beta):
losses = []
w = np.array(w_ini)
w = w_ini
for i in range(epochs):
delta = np.zeros(len(w))
for i, x in enumerate(X):
delta += reg_de_dw(w, beta, x, Y[i])
w = w - eta * delta - eta
losses.append(reg_evaluate(w, beta, X, Y))
return w, losses
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [0, 0, 0, 1]
wr, losses1 = reg_train(X, Y, 50, 1, [0, 0, 0], 1)
pl.plot(losses1)
print wr
print predict(wr, X)
What is the effect of regularization? Discuss.
Here, we will build a kernel version of the previous neural network, i.e., a neural network able to work in a feature space induced by a kernel. To do this we will express the weight vector as a linear combination of vectors in a set $X$:
$$ w=\sum_{x_{i}\in X}\alpha_{i}\phi(x_{i}) $$Now, implement this modifying the following functions:
def k_predict(alpha, X, kernel, x):
result = 0
weights = []
for i, xs in enumerate(X):
weights.append(np.array(alpha[i]).dot([xs[0], xs[1], kernel(xs, xs)]))
weights = np.sum(weights, axis=0)
return sigmoid(np.dot(weights[:2], np.array(x).T)+0.5*w[2])
def k_loss(alpha, X, beta, kernel, x, y):
# your code here
return ((k_predict(alpha, X, kernel, x) - y) ** 2) / 2
Test your functions with the following code:
alpha = [1, 0.5, -0.3, -0.4]
Xs = [[0.1, -0.5],
[0.5, 1.0],
[-1.0, 0.5],
[1.0, 1.0]]
def k1(x, y):
return np.dot(x, y)
def k2(x, y):
return (np.dot(x, y) + 1) ** 2
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [1, 0, 0, 0]
for i, x in enumerate(X):
print k_predict(alpha, Xs, k1, x), k_loss(alpha, Xs, 1, k1, x, Y[i])
print "--------"
for i, x in enumerate(X):
print k_predict(alpha, Xs, k2, x), k_loss(alpha, Xs, 1, k2, x, Y[i])
Train the kernel neural network using gradient descent.
def loss_k(alpha, Xs, kernel, x, y):
return (((k_predict(alpha, Xs, kernel, x) - y) ** 2) / 2 )
def de_dw_k(alpha, Xs, kernel, x, y):
delta = np.zeros(len(w))
r = k_predict(alpha, Xs, kernel, x)
out_error = y - r
# update weights
delta[0] = -out_error*sigmoid(r)*(1-sigmoid(r))
delta[1] = -out_error*sigmoid(r)*(1-sigmoid(r))
delta[2] = out_error*sigmoid(r)*(1-sigmoid(r))
# put your code he
return delta
def evaluate(alpha, Xs, kernel, X, Y):
result = 0
for i, x in enumerate(X):
result += loss_k(alpha, Xs, kernel, x, Y[i])
return result
def train_k(X, Y, epochs, eta, w_ini, alpha, Xs, kernel):
losses = []
w = []
for i in range(epochs):
delta = np.zeros(len(w))
for i, x in enumerate(X):
delta += de_dw_k(alpha, Xs, kernel, x, Y[i])
w = w - eta * delta
losses.append(evaluate(alpha, Xs, kernel, X, Y))
return w, losses
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [1, 0, 0, 0]
X = [[0, 0],
[0, 1],
[1, 0],
[1, 1]]
Y = [0, 0, 1, 0]
wr, losses = train_k(X, Y, 50, 0.1, [0, 0, 0], alpha, Xs, k1)
pl.plot(losses)
print wr
print predict(wr, X)
def GaussianKernel(v1, v2, sigma):
return np.exp(-np.linalg.norm(v1-v2, 2)**2/(2.*sigma**2))
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
for i in X:
print GaussianKernel(i,i,1)