CoCalc -- rnn_utils.py

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C5 - Sequence Models/Week 1/Building a Recurrent Neural Network - Step by Step/rnn_utils.py
⁵¹⁸¹ views
1
import numpy as np
2

3
def softmax(x):
4
    e_x = np.exp(x - np.max(x))
5
    return e_x / e_x.sum(axis=0)
6

7

8
def sigmoid(x):
9
    return 1 / (1 + np.exp(-x))
10

11

12
def initialize_adam(parameters) :
13
    """
14
    Initializes v and s as two python dictionaries with:
15
                - keys: "dW1", "db1", ..., "dWL", "dbL" 
16
                - values: numpy arrays of zeros of the same shape as the corresponding gradients/parameters.
17
    
18
    Arguments:
19
    parameters -- python dictionary containing your parameters.
20
                    parameters["W" + str(l)] = Wl
21
                    parameters["b" + str(l)] = bl
22
    
23
    Returns: 
24
    v -- python dictionary that will contain the exponentially weighted average of the gradient.
25
                    v["dW" + str(l)] = ...
26
                    v["db" + str(l)] = ...
27
    s -- python dictionary that will contain the exponentially weighted average of the squared gradient.
28
                    s["dW" + str(l)] = ...
29
                    s["db" + str(l)] = ...
30

31
    """
32
    
33
    L = len(parameters) // 2 # number of layers in the neural networks
34
    v = {}
35
    s = {}
36
    
37
    # Initialize v, s. Input: "parameters". Outputs: "v, s".
38
    for l in range(L):
39
    ### START CODE HERE ### (approx. 4 lines)
40
        v["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
41
        v["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
42
        s["dW" + str(l+1)] = np.zeros(parameters["W" + str(l+1)].shape)
43
        s["db" + str(l+1)] = np.zeros(parameters["b" + str(l+1)].shape)
44
    ### END CODE HERE ###
45
    
46
    return v, s
47

48

49
def update_parameters_with_adam(parameters, grads, v, s, t, learning_rate = 0.01,
50
                                beta1 = 0.9, beta2 = 0.999,  epsilon = 1e-8):
51
    """
52
    Update parameters using Adam
53
    
54
    Arguments:
55
    parameters -- python dictionary containing your parameters:
56
                    parameters['W' + str(l)] = Wl
57
                    parameters['b' + str(l)] = bl
58
    grads -- python dictionary containing your gradients for each parameters:
59
                    grads['dW' + str(l)] = dWl
60
                    grads['db' + str(l)] = dbl
61
    v -- Adam variable, moving average of the first gradient, python dictionary
62
    s -- Adam variable, moving average of the squared gradient, python dictionary
63
    learning_rate -- the learning rate, scalar.
64
    beta1 -- Exponential decay hyperparameter for the first moment estimates 
65
    beta2 -- Exponential decay hyperparameter for the second moment estimates 
66
    epsilon -- hyperparameter preventing division by zero in Adam updates
67

68
    Returns:
69
    parameters -- python dictionary containing your updated parameters 
70
    v -- Adam variable, moving average of the first gradient, python dictionary
71
    s -- Adam variable, moving average of the squared gradient, python dictionary
72
    """
73
    
74
    L = len(parameters) // 2                 # number of layers in the neural networks
75
    v_corrected = {}                         # Initializing first moment estimate, python dictionary
76
    s_corrected = {}                         # Initializing second moment estimate, python dictionary
77
    
78
    # Perform Adam update on all parameters
79
    for l in range(L):
80
        # Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".
81
        ### START CODE HERE ### (approx. 2 lines)
82
        v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)] 
83
        v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)] 
84
        ### END CODE HERE ###
85

86
        # Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".
87
        ### START CODE HERE ### (approx. 2 lines)
88
        v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (1 - beta1**t)
89
        v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (1 - beta1**t)
90
        ### END CODE HERE ###
91

92
        # Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".
93
        ### START CODE HERE ### (approx. 2 lines)
94
        s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * (grads["dW" + str(l+1)] ** 2)
95
        s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1 - beta2) * (grads["db" + str(l+1)] ** 2)
96
        ### END CODE HERE ###
97

98
        # Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".
99
        ### START CODE HERE ### (approx. 2 lines)
100
        s_corrected["dW" + str(l+1)] = s["dW" + str(l+1)] / (1 - beta2 ** t)
101
        s_corrected["db" + str(l+1)] = s["db" + str(l+1)] / (1 - beta2 ** t)
102
        ### END CODE HERE ###
103

104
        # Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".
105
        ### START CODE HERE ### (approx. 2 lines)
106
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)
107
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * v_corrected["db" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)
108
        ### END CODE HERE ###
109

110
    return parameters, v, s
111
Product

Resources

Company