Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.
Path: blob/master/C5 - Sequence Models/Week 1/Building a Recurrent Neural Network - Step by Step/rnn_utils.py
Views: 4819
import numpy as np12def softmax(x):3e_x = np.exp(x - np.max(x))4return e_x / e_x.sum(axis=0)567def sigmoid(x):8return 1 / (1 + np.exp(-x))91011def initialize_adam(parameters) :12"""13Initializes v and s as two python dictionaries with:14- keys: "dW1", "db1", ..., "dWL", "dbL"15- values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.1617Arguments:18parameters -- python dictionary containing your parameters.19parameters["W" + str(l)] = Wl20parameters["b" + str(l)] = bl2122Returns:23v -- python dictionary that will contain the exponentially weighted average of the gradient.24v["dW" + str(l)] = ...25v["db" + str(l)] = ...26s -- python dictionary that will contain the exponentially weighted average of the squared gradient.27s["dW" + str(l)] = ...28s["db" + str(l)] = ...2930"""3132L = len(parameters) // 2 # number of layers in the neural networks33v = {}34s = {}3536# Initialize v, s. Input: "parameters". Outputs: "v, s".37for l in range(L):38### START CODE HERE ### (approx. 4 lines)39v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)40v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)41s["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)42s["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)43### END CODE HERE ###4445return v, s464748def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01,49beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):50"""51Update parameters using Adam5253Arguments:54parameters -- python dictionary containing your parameters:55parameters['W' + str(l)] = Wl56parameters['b' + str(l)] = bl57grads -- python dictionary containing your gradients for each parameters:58grads['dW' + str(l)] = dWl59grads['db' + str(l)] = dbl60v -- Adam variable, moving average of the first gradient, python dictionary61s -- Adam variable, moving average of the squared gradient, python dictionary62learning_rate -- the learning rate, scalar.63beta1 -- Exponential decay hyperparameter for the first moment estimates64beta2 -- Exponential decay hyperparameter for the second moment estimates65epsilon -- hyperparameter preventing division by zero in Adam updates6667Returns:68parameters -- python dictionary containing your updated parameters69v -- Adam variable, moving average of the first gradient, python dictionary70s -- Adam variable, moving average of the squared gradient, python dictionary71"""7273L = len(parameters) // 2 # number of layers in the neural networks74v_corrected = {} # Initializing first moment estimate, python dictionary75s_corrected = {} # Initializing second moment estimate, python dictionary7677# Perform Adam update on all parameters78for l in range(L):79# Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".80### START CODE HERE ### (approx. 2 lines)81v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)]82v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)]83### END CODE HERE ###8485# Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".86### START CODE HERE ### (approx. 2 lines)87v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (1 - beta1**t)88v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (1 - beta1**t)89### END CODE HERE ###9091# Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".92### START CODE HERE ### (approx. 2 lines)93s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * (grads["dW" + str(l+1)] ** 2)94s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1 - beta2) * (grads["db" + str(l+1)] ** 2)95### END CODE HERE ###9697# Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".98### START CODE HERE ### (approx. 2 lines)99s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)] / (1 - beta2 ** t)100s_corrected["db" + str(l+1)] = s["db" + str(l+1)] / (1 - beta2 ** t)101### END CODE HERE ###102103# Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".104### START CODE HERE ### (approx. 2 lines)105parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)106parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v_corrected["db" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)107### END CODE HERE ###108109return parameters, v, s110111