Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/Neural Networks and Deep Learning/Week 4/Deep Neural Network Application Image Classification/dnn_app_utils_v3.py
Views: 13377
import numpy as np1import matplotlib.pyplot as plt2import h5py345def sigmoid(Z):6"""7Implements the sigmoid activation in numpy89Arguments:10Z -- numpy array of any shape1112Returns:13A -- output of sigmoid(z), same shape as Z14cache -- returns Z as well, useful during backpropagation15"""1617A = 1/(1+np.exp(-Z))18cache = Z1920return A, cache2122def relu(Z):23"""24Implement the RELU function.2526Arguments:27Z -- Output of the linear layer, of any shape2829Returns:30A -- Post-activation parameter, of the same shape as Z31cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently32"""3334A = np.maximum(0,Z)3536assert(A.shape == Z.shape)3738cache = Z39return A, cache404142def relu_backward(dA, cache):43"""44Implement the backward propagation for a single RELU unit.4546Arguments:47dA -- post-activation gradient, of any shape48cache -- 'Z' where we store for computing backward propagation efficiently4950Returns:51dZ -- Gradient of the cost with respect to Z52"""5354Z = cache55dZ = np.array(dA, copy=True) # just converting dz to a correct object.5657# When z <= 0, you should set dz to 0 as well.58dZ[Z <= 0] = 05960assert (dZ.shape == Z.shape)6162return dZ6364def sigmoid_backward(dA, cache):65"""66Implement the backward propagation for a single SIGMOID unit.6768Arguments:69dA -- post-activation gradient, of any shape70cache -- 'Z' where we store for computing backward propagation efficiently7172Returns:73dZ -- Gradient of the cost with respect to Z74"""7576Z = cache7778s = 1/(1+np.exp(-Z))79dZ = dA * s * (1-s)8081assert (dZ.shape == Z.shape)8283return dZ848586def load_data():87train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")88train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features89train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels9091test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")92test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features93test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels9495classes = np.array(test_dataset["list_classes"][:]) # the list of classes9697train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))98test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))99100return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes101102103def initialize_parameters(n_x, n_h, n_y):104"""105Argument:106n_x -- size of the input layer107n_h -- size of the hidden layer108n_y -- size of the output layer109110Returns:111parameters -- python dictionary containing your parameters:112W1 -- weight matrix of shape (n_h, n_x)113b1 -- bias vector of shape (n_h, 1)114W2 -- weight matrix of shape (n_y, n_h)115b2 -- bias vector of shape (n_y, 1)116"""117118np.random.seed(1)119120W1 = np.random.randn(n_h, n_x)*0.01121b1 = np.zeros((n_h, 1))122W2 = np.random.randn(n_y, n_h)*0.01123b2 = np.zeros((n_y, 1))124125assert(W1.shape == (n_h, n_x))126assert(b1.shape == (n_h, 1))127assert(W2.shape == (n_y, n_h))128assert(b2.shape == (n_y, 1))129130parameters = {"W1": W1,131"b1": b1,132"W2": W2,133"b2": b2}134135return parameters136137138def initialize_parameters_deep(layer_dims):139"""140Arguments:141layer_dims -- python array (list) containing the dimensions of each layer in our network142143Returns:144parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":145Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])146bl -- bias vector of shape (layer_dims[l], 1)147"""148149np.random.seed(1)150parameters = {}151L = len(layer_dims) # number of layers in the network152153for l in range(1, L):154parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01155parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))156157assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))158assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))159160161return parameters162163def linear_forward(A, W, b):164"""165Implement the linear part of a layer's forward propagation.166167Arguments:168A -- activations from previous layer (or input data): (size of previous layer, number of examples)169W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)170b -- bias vector, numpy array of shape (size of the current layer, 1)171172Returns:173Z -- the input of the activation function, also called pre-activation parameter174cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently175"""176177Z = W.dot(A) + b178179assert(Z.shape == (W.shape[0], A.shape[1]))180cache = (A, W, b)181182return Z, cache183184def linear_activation_forward(A_prev, W, b, activation):185"""186Implement the forward propagation for the LINEAR->ACTIVATION layer187188Arguments:189A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)190W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)191b -- bias vector, numpy array of shape (size of the current layer, 1)192activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"193194Returns:195A -- the output of the activation function, also called the post-activation value196cache -- a python dictionary containing "linear_cache" and "activation_cache";197stored for computing the backward pass efficiently198"""199200if activation == "sigmoid":201# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".202Z, linear_cache = linear_forward(A_prev, W, b)203A, activation_cache = sigmoid(Z)204205elif activation == "relu":206# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".207Z, linear_cache = linear_forward(A_prev, W, b)208A, activation_cache = relu(Z)209210assert (A.shape == (W.shape[0], A_prev.shape[1]))211cache = (linear_cache, activation_cache)212213return A, cache214215def L_model_forward(X, parameters):216"""217Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation218219Arguments:220X -- data, numpy array of shape (input size, number of examples)221parameters -- output of initialize_parameters_deep()222223Returns:224AL -- last post-activation value225caches -- list of caches containing:226every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)227the cache of linear_sigmoid_forward() (there is one, indexed L-1)228"""229230caches = []231A = X232L = len(parameters) // 2 # number of layers in the neural network233234# Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.235for l in range(1, L):236A_prev = A237A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")238caches.append(cache)239240# Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.241AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")242caches.append(cache)243244assert(AL.shape == (1,X.shape[1]))245246return AL, caches247248def compute_cost(AL, Y):249"""250Implement the cost function defined by equation (7).251252Arguments:253AL -- probability vector corresponding to your label predictions, shape (1, number of examples)254Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)255256Returns:257cost -- cross-entropy cost258"""259260m = Y.shape[1]261262# Compute loss from aL and y.263cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))264265cost = np.squeeze(cost) # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).266assert(cost.shape == ())267268return cost269270def linear_backward(dZ, cache):271"""272Implement the linear portion of backward propagation for a single layer (layer l)273274Arguments:275dZ -- Gradient of the cost with respect to the linear output (of current layer l)276cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer277278Returns:279dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev280dW -- Gradient of the cost with respect to W (current layer l), same shape as W281db -- Gradient of the cost with respect to b (current layer l), same shape as b282"""283A_prev, W, b = cache284m = A_prev.shape[1]285286dW = 1./m * np.dot(dZ,A_prev.T)287db = 1./m * np.sum(dZ, axis = 1, keepdims = True)288dA_prev = np.dot(W.T,dZ)289290assert (dA_prev.shape == A_prev.shape)291assert (dW.shape == W.shape)292assert (db.shape == b.shape)293294return dA_prev, dW, db295296def linear_activation_backward(dA, cache, activation):297"""298Implement the backward propagation for the LINEAR->ACTIVATION layer.299300Arguments:301dA -- post-activation gradient for current layer l302cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently303activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"304305Returns:306dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev307dW -- Gradient of the cost with respect to W (current layer l), same shape as W308db -- Gradient of the cost with respect to b (current layer l), same shape as b309"""310linear_cache, activation_cache = cache311312if activation == "relu":313dZ = relu_backward(dA, activation_cache)314dA_prev, dW, db = linear_backward(dZ, linear_cache)315316elif activation == "sigmoid":317dZ = sigmoid_backward(dA, activation_cache)318dA_prev, dW, db = linear_backward(dZ, linear_cache)319320return dA_prev, dW, db321322def L_model_backward(AL, Y, caches):323"""324Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group325326Arguments:327AL -- probability vector, output of the forward propagation (L_model_forward())328Y -- true "label" vector (containing 0 if non-cat, 1 if cat)329caches -- list of caches containing:330every cache of linear_activation_forward() with "relu" (there are (L-1) or them, indexes from 0 to L-2)331the cache of linear_activation_forward() with "sigmoid" (there is one, index L-1)332333Returns:334grads -- A dictionary with the gradients335grads["dA" + str(l)] = ...336grads["dW" + str(l)] = ...337grads["db" + str(l)] = ...338"""339grads = {}340L = len(caches) # the number of layers341m = AL.shape[1]342Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL343344# Initializing the backpropagation345dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))346347# Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]348current_cache = caches[L-1]349grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")350351for l in reversed(range(L-1)):352# lth layer: (RELU -> LINEAR) gradients.353current_cache = caches[l]354dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")355grads["dA" + str(l)] = dA_prev_temp356grads["dW" + str(l + 1)] = dW_temp357grads["db" + str(l + 1)] = db_temp358359return grads360361def update_parameters(parameters, grads, learning_rate):362"""363Update parameters using gradient descent364365Arguments:366parameters -- python dictionary containing your parameters367grads -- python dictionary containing your gradients, output of L_model_backward368369Returns:370parameters -- python dictionary containing your updated parameters371parameters["W" + str(l)] = ...372parameters["b" + str(l)] = ...373"""374375L = len(parameters) // 2 # number of layers in the neural network376377# Update rule for each parameter. Use a for loop.378for l in range(L):379parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]380parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]381382return parameters383384def predict(X, y, parameters):385"""386This function is used to predict the results of a L-layer neural network.387388Arguments:389X -- data set of examples you would like to label390parameters -- parameters of the trained model391392Returns:393p -- predictions for the given dataset X394"""395396m = X.shape[1]397n = len(parameters) // 2 # number of layers in the neural network398p = np.zeros((1,m))399400# Forward propagation401probas, caches = L_model_forward(X, parameters)402403404# convert probas to 0/1 predictions405for i in range(0, probas.shape[1]):406if probas[0,i] > 0.5:407p[0,i] = 1408else:409p[0,i] = 0410411#print results412#print ("predictions: " + str(p))413#print ("true labels: " + str(y))414print("Accuracy: " + str(np.sum((p == y)/m)))415416return p417418def print_mislabeled_images(classes, X, y, p):419"""420Plots images where predictions and truth were different.421X -- dataset422y -- true labels423p -- predictions424"""425a = p + y426mislabeled_indices = np.asarray(np.where(a == 1))427plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots428num_images = len(mislabeled_indices[0])429for i in range(num_images):430index = mislabeled_indices[1][i]431432plt.subplot(2, num_images, i + 1)433plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')434plt.axis('off')435plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[y[0,index]].decode("utf-8"))436437438