CoCalc -- dnn_app_utils

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: y33-j3T/Coursera-Deep-Learning
Path: blob/master/Neural Networks and Deep Learning/Week 4/Deep Neural Network Application Image Classification/dnn_app_utils_v3.py
Views: ¹³³⁷⁷
1
import numpy as np
2
import matplotlib.pyplot as plt
3
import h5py
4

5

6
def sigmoid(Z):
7
    """
8
    Implements the sigmoid activation in numpy
9
    
10
    Arguments:
11
    Z -- numpy array of any shape
12
    
13
    Returns:
14
    A -- output of sigmoid(z), same shape as Z
15
    cache -- returns Z as well, useful during backpropagation
16
    """
17
    
18
    A = 1/(1+np.exp(-Z))
19
    cache = Z
20
    
21
    return A, cache
22

23
def relu(Z):
24
    """
25
    Implement the RELU function.
26

27
    Arguments:
28
    Z -- Output of the linear layer, of any shape
29

30
    Returns:
31
    A -- Post-activation parameter, of the same shape as Z
32
    cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
33
    """
34
    
35
    A = np.maximum(0,Z)
36
    
37
    assert(A.shape == Z.shape)
38
    
39
    cache = Z 
40
    return A, cache
41

42

43
def relu_backward(dA, cache):
44
    """
45
    Implement the backward propagation for a single RELU unit.
46

47
    Arguments:
48
    dA -- post-activation gradient, of any shape
49
    cache -- 'Z' where we store for computing backward propagation efficiently
50

51
    Returns:
52
    dZ -- Gradient of the cost with respect to Z
53
    """
54
    
55
    Z = cache
56
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
57
    
58
    # When z <= 0, you should set dz to 0 as well. 
59
    dZ[Z <= 0] = 0
60
    
61
    assert (dZ.shape == Z.shape)
62
    
63
    return dZ
64

65
def sigmoid_backward(dA, cache):
66
    """
67
    Implement the backward propagation for a single SIGMOID unit.
68

69
    Arguments:
70
    dA -- post-activation gradient, of any shape
71
    cache -- 'Z' where we store for computing backward propagation efficiently
72

73
    Returns:
74
    dZ -- Gradient of the cost with respect to Z
75
    """
76
    
77
    Z = cache
78
    
79
    s = 1/(1+np.exp(-Z))
80
    dZ = dA * s * (1-s)
81
    
82
    assert (dZ.shape == Z.shape)
83
    
84
    return dZ
85

86

87
def load_data():
88
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
89
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
90
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
91

92
    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
93
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
94
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
95

96
    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
97
    
98
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
99
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
100
    
101
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
102

103

104
def initialize_parameters(n_x, n_h, n_y):
105
    """
106
    Argument:
107
    n_x -- size of the input layer
108
    n_h -- size of the hidden layer
109
    n_y -- size of the output layer
110
    
111
    Returns:
112
    parameters -- python dictionary containing your parameters:
113
                    W1 -- weight matrix of shape (n_h, n_x)
114
                    b1 -- bias vector of shape (n_h, 1)
115
                    W2 -- weight matrix of shape (n_y, n_h)
116
                    b2 -- bias vector of shape (n_y, 1)
117
    """
118
    
119
    np.random.seed(1)
120
    
121
    W1 = np.random.randn(n_h, n_x)*0.01
122
    b1 = np.zeros((n_h, 1))
123
    W2 = np.random.randn(n_y, n_h)*0.01
124
    b2 = np.zeros((n_y, 1))
125
    
126
    assert(W1.shape == (n_h, n_x))
127
    assert(b1.shape == (n_h, 1))
128
    assert(W2.shape == (n_y, n_h))
129
    assert(b2.shape == (n_y, 1))
130
    
131
    parameters = {"W1": W1,
132
                  "b1": b1,
133
                  "W2": W2,
134
                  "b2": b2}
135
    
136
    return parameters     
137

138

139
def initialize_parameters_deep(layer_dims):
140
    """
141
    Arguments:
142
    layer_dims -- python array (list) containing the dimensions of each layer in our network
143
    
144
    Returns:
145
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
146
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
147
                    bl -- bias vector of shape (layer_dims[l], 1)
148
    """
149
    
150
    np.random.seed(1)
151
    parameters = {}
152
    L = len(layer_dims)            # number of layers in the network
153

154
    for l in range(1, L):
155
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
156
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
157
        
158
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
159
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
160

161
        
162
    return parameters
163

164
def linear_forward(A, W, b):
165
    """
166
    Implement the linear part of a layer's forward propagation.
167

168
    Arguments:
169
    A -- activations from previous layer (or input data): (size of previous layer, number of examples)
170
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
171
    b -- bias vector, numpy array of shape (size of the current layer, 1)
172

173
    Returns:
174
    Z -- the input of the activation function, also called pre-activation parameter 
175
    cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently
176
    """
177
    
178
    Z = W.dot(A) + b
179
    
180
    assert(Z.shape == (W.shape[0], A.shape[1]))
181
    cache = (A, W, b)
182
    
183
    return Z, cache
184

185
def linear_activation_forward(A_prev, W, b, activation):
186
    """
187
    Implement the forward propagation for the LINEAR->ACTIVATION layer
188

189
    Arguments:
190
    A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
191
    W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
192
    b -- bias vector, numpy array of shape (size of the current layer, 1)
193
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
194

195
    Returns:
196
    A -- the output of the activation function, also called the post-activation value 
197
    cache -- a python dictionary containing "linear_cache" and "activation_cache";
198
             stored for computing the backward pass efficiently
199
    """
200
    
201
    if activation == "sigmoid":
202
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
203
        Z, linear_cache = linear_forward(A_prev, W, b)
204
        A, activation_cache = sigmoid(Z)
205
    
206
    elif activation == "relu":
207
        # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
208
        Z, linear_cache = linear_forward(A_prev, W, b)
209
        A, activation_cache = relu(Z)
210
    
211
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
212
    cache = (linear_cache, activation_cache)
213

214
    return A, cache
215

216
def L_model_forward(X, parameters):
217
    """
218
    Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
219
    
220
    Arguments:
221
    X -- data, numpy array of shape (input size, number of examples)
222
    parameters -- output of initialize_parameters_deep()
223
    
224
    Returns:
225
    AL -- last post-activation value
226
    caches -- list of caches containing:
227
                every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
228
                the cache of linear_sigmoid_forward() (there is one, indexed L-1)
229
    """
230

231
    caches = []
232
    A = X
233
    L = len(parameters) // 2                  # number of layers in the neural network
234
    
235
    # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
236
    for l in range(1, L):
237
        A_prev = A 
238
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
239
        caches.append(cache)
240
    
241
    # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
242
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
243
    caches.append(cache)
244
    
245
    assert(AL.shape == (1,X.shape[1]))
246
            
247
    return AL, caches
248

249
def compute_cost(AL, Y):
250
    """
251
    Implement the cost function defined by equation (7).
252

253
    Arguments:
254
    AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
255
    Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
256

257
    Returns:
258
    cost -- cross-entropy cost
259
    """
260
    
261
    m = Y.shape[1]
262

263
    # Compute loss from aL and y.
264
    cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
265
    
266
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
267
    assert(cost.shape == ())
268
    
269
    return cost
270

271
def linear_backward(dZ, cache):
272
    """
273
    Implement the linear portion of backward propagation for a single layer (layer l)
274

275
    Arguments:
276
    dZ -- Gradient of the cost with respect to the linear output (of current layer l)
277
    cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
278

279
    Returns:
280
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
281
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
282
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
283
    """
284
    A_prev, W, b = cache
285
    m = A_prev.shape[1]
286

287
    dW = 1./m * np.dot(dZ,A_prev.T)
288
    db = 1./m * np.sum(dZ, axis = 1, keepdims = True)
289
    dA_prev = np.dot(W.T,dZ)
290
    
291
    assert (dA_prev.shape == A_prev.shape)
292
    assert (dW.shape == W.shape)
293
    assert (db.shape == b.shape)
294
    
295
    return dA_prev, dW, db
296

297
def linear_activation_backward(dA, cache, activation):
298
    """
299
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
300
    
301
    Arguments:
302
    dA -- post-activation gradient for current layer l 
303
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
304
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
305
    
306
    Returns:
307
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
308
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
309
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
310
    """
311
    linear_cache, activation_cache = cache
312
    
313
    if activation == "relu":
314
        dZ = relu_backward(dA, activation_cache)
315
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
316
        
317
    elif activation == "sigmoid":
318
        dZ = sigmoid_backward(dA, activation_cache)
319
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
320
    
321
    return dA_prev, dW, db
322

323
def L_model_backward(AL, Y, caches):
324
    """
325
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
326
    
327
    Arguments:
328
    AL -- probability vector, output of the forward propagation (L_model_forward())
329
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
330
    caches -- list of caches containing:
331
                every cache of linear_activation_forward() with "relu" (there are (L-1) or them, indexes from 0 to L-2)
332
                the cache of linear_activation_forward() with "sigmoid" (there is one, index L-1)
333
    
334
    Returns:
335
    grads -- A dictionary with the gradients
336
             grads["dA" + str(l)] = ... 
337
             grads["dW" + str(l)] = ...
338
             grads["db" + str(l)] = ... 
339
    """
340
    grads = {}
341
    L = len(caches) # the number of layers
342
    m = AL.shape[1]
343
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
344
    
345
    # Initializing the backpropagation
346
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
347
    
348
    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
349
    current_cache = caches[L-1]
350
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
351
    
352
    for l in reversed(range(L-1)):
353
        # lth layer: (RELU -> LINEAR) gradients.
354
        current_cache = caches[l]
355
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
356
        grads["dA" + str(l)] = dA_prev_temp
357
        grads["dW" + str(l + 1)] = dW_temp
358
        grads["db" + str(l + 1)] = db_temp
359

360
    return grads
361

362
def update_parameters(parameters, grads, learning_rate):
363
    """
364
    Update parameters using gradient descent
365
    
366
    Arguments:
367
    parameters -- python dictionary containing your parameters 
368
    grads -- python dictionary containing your gradients, output of L_model_backward
369
    
370
    Returns:
371
    parameters -- python dictionary containing your updated parameters 
372
                  parameters["W" + str(l)] = ... 
373
                  parameters["b" + str(l)] = ...
374
    """
375
    
376
    L = len(parameters) // 2 # number of layers in the neural network
377

378
    # Update rule for each parameter. Use a for loop.
379
    for l in range(L):
380
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
381
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
382
        
383
    return parameters
384

385
def predict(X, y, parameters):
386
    """
387
    This function is used to predict the results of a  L-layer neural network.
388
    
389
    Arguments:
390
    X -- data set of examples you would like to label
391
    parameters -- parameters of the trained model
392
    
393
    Returns:
394
    p -- predictions for the given dataset X
395
    """
396
    
397
    m = X.shape[1]
398
    n = len(parameters) // 2 # number of layers in the neural network
399
    p = np.zeros((1,m))
400
    
401
    # Forward propagation
402
    probas, caches = L_model_forward(X, parameters)
403

404
    
405
    # convert probas to 0/1 predictions
406
    for i in range(0, probas.shape[1]):
407
        if probas[0,i] > 0.5:
408
            p[0,i] = 1
409
        else:
410
            p[0,i] = 0
411
    
412
    #print results
413
    #print ("predictions: " + str(p))
414
    #print ("true labels: " + str(y))
415
    print("Accuracy: "  + str(np.sum((p == y)/m)))
416
        
417
    return p
418

419
def print_mislabeled_images(classes, X, y, p):
420
    """
421
    Plots images where predictions and truth were different.
422
    X -- dataset
423
    y -- true labels
424
    p -- predictions
425
    """
426
    a = p + y
427
    mislabeled_indices = np.asarray(np.where(a == 1))
428
    plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots
429
    num_images = len(mislabeled_indices[0])
430
    for i in range(num_images):
431
        index = mislabeled_indices[1][i]
432
        
433
        plt.subplot(2, num_images, i + 1)
434
        plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')
435
        plt.axis('off')
436
        plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[y[0,index]].decode("utf-8"))
437

438
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.