CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
amanchadha

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Building a Recurrent Neural Network - Step by Step/rnn_utils.py
Views: 4819
1
import numpy as np
2
3
def softmax(x):
4
e_x = np.exp(x - np.max(x))
5
return e_x / e_x.sum(axis=0)
6
7
8
def sigmoid(x):
9
return 1 / (1 + np.exp(-x))
10
11
12
def initialize_adam(parameters) :
13
"""
14
Initializes v and s as two python dictionaries with:
15
- keys: "dW1", "db1", ..., "dWL", "dbL"
16
- values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.
17
18
Arguments:
19
parameters -- python dictionary containing your parameters.
20
parameters["W" + str(l)] = Wl
21
parameters["b" + str(l)] = bl
22
23
Returns:
24
v -- python dictionary that will contain the exponentially weighted average of the gradient.
25
v["dW" + str(l)] = ...
26
v["db" + str(l)] = ...
27
s -- python dictionary that will contain the exponentially weighted average of the squared gradient.
28
s["dW" + str(l)] = ...
29
s["db" + str(l)] = ...
30
31
"""
32
33
L = len(parameters) // 2 # number of layers in the neural networks
34
v = {}
35
s = {}
36
37
# Initialize v, s. Input: "parameters". Outputs: "v, s".
38
for l in range(L):
39
### START CODE HERE ### (approx. 4 lines)
40
v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
41
v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
42
s["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
43
s["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
44
### END CODE HERE ###
45
46
return v, s
47
48
49
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01,
50
beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8):
51
"""
52
Update parameters using Adam
53
54
Arguments:
55
parameters -- python dictionary containing your parameters:
56
parameters['W' + str(l)] = Wl
57
parameters['b' + str(l)] = bl
58
grads -- python dictionary containing your gradients for each parameters:
59
grads['dW' + str(l)] = dWl
60
grads['db' + str(l)] = dbl
61
v -- Adam variable, moving average of the first gradient, python dictionary
62
s -- Adam variable, moving average of the squared gradient, python dictionary
63
learning_rate -- the learning rate, scalar.
64
beta1 -- Exponential decay hyperparameter for the first moment estimates
65
beta2 -- Exponential decay hyperparameter for the second moment estimates
66
epsilon -- hyperparameter preventing division by zero in Adam updates
67
68
Returns:
69
parameters -- python dictionary containing your updated parameters
70
v -- Adam variable, moving average of the first gradient, python dictionary
71
s -- Adam variable, moving average of the squared gradient, python dictionary
72
"""
73
74
L = len(parameters) // 2 # number of layers in the neural networks
75
v_corrected = {} # Initializing first moment estimate, python dictionary
76
s_corrected = {} # Initializing second moment estimate, python dictionary
77
78
# Perform Adam update on all parameters
79
for l in range(L):
80
# Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".
81
### START CODE HERE ### (approx. 2 lines)
82
v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)]
83
v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)]
84
### END CODE HERE ###
85
86
# Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".
87
### START CODE HERE ### (approx. 2 lines)
88
v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (1 - beta1**t)
89
v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (1 - beta1**t)
90
### END CODE HERE ###
91
92
# Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".
93
### START CODE HERE ### (approx. 2 lines)
94
s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * (grads["dW" + str(l+1)] ** 2)
95
s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1 - beta2) * (grads["db" + str(l+1)] ** 2)
96
### END CODE HERE ###
97
98
# Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".
99
### START CODE HERE ### (approx. 2 lines)
100
s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)] / (1 - beta2 ** t)
101
s_corrected["db" + str(l+1)] = s["db" + str(l+1)] / (1 - beta2 ** t)
102
### END CODE HERE ###
103
104
# Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".
105
### START CODE HERE ### (approx. 2 lines)
106
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)
107
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v_corrected["db" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)
108
### END CODE HERE ###
109
110
return parameters, v, s
111