CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
y33-j3T

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: y33-j3T/Coursera-Deep-Learning
Path: blob/master/Neural Networks and Deep Learning/Week 4/Deep Neural Network Application Image Classification/dnn_app_utils_v3.py
Views: 13377
1
import numpy as np
2
import matplotlib.pyplot as plt
3
import h5py
4
5
6
def sigmoid(Z):
7
"""
8
Implements the sigmoid activation in numpy
9
10
Arguments:
11
Z -- numpy array of any shape
12
13
Returns:
14
A -- output of sigmoid(z), same shape as Z
15
cache -- returns Z as well, useful during backpropagation
16
"""
17
18
A = 1/(1+np.exp(-Z))
19
cache = Z
20
21
return A, cache
22
23
def relu(Z):
24
"""
25
Implement the RELU function.
26
27
Arguments:
28
Z -- Output of the linear layer, of any shape
29
30
Returns:
31
A -- Post-activation parameter, of the same shape as Z
32
cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
33
"""
34
35
A = np.maximum(0,Z)
36
37
assert(A.shape == Z.shape)
38
39
cache = Z
40
return A, cache
41
42
43
def relu_backward(dA, cache):
44
"""
45
Implement the backward propagation for a single RELU unit.
46
47
Arguments:
48
dA -- post-activation gradient, of any shape
49
cache -- 'Z' where we store for computing backward propagation efficiently
50
51
Returns:
52
dZ -- Gradient of the cost with respect to Z
53
"""
54
55
Z = cache
56
dZ = np.array(dA, copy=True) # just converting dz to a correct object.
57
58
# When z <= 0, you should set dz to 0 as well.
59
dZ[Z <= 0] = 0
60
61
assert (dZ.shape == Z.shape)
62
63
return dZ
64
65
def sigmoid_backward(dA, cache):
66
"""
67
Implement the backward propagation for a single SIGMOID unit.
68
69
Arguments:
70
dA -- post-activation gradient, of any shape
71
cache -- 'Z' where we store for computing backward propagation efficiently
72
73
Returns:
74
dZ -- Gradient of the cost with respect to Z
75
"""
76
77
Z = cache
78
79
s = 1/(1+np.exp(-Z))
80
dZ = dA * s * (1-s)
81
82
assert (dZ.shape == Z.shape)
83
84
return dZ
85
86
87
def load_data():
88
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
89
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
90
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
91
92
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
93
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
94
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
95
96
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
97
98
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
99
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
100
101
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
102
103
104
def initialize_parameters(n_x, n_h, n_y):
105
"""
106
Argument:
107
n_x -- size of the input layer
108
n_h -- size of the hidden layer
109
n_y -- size of the output layer
110
111
Returns:
112
parameters -- python dictionary containing your parameters:
113
W1 -- weight matrix of shape (n_h, n_x)
114
b1 -- bias vector of shape (n_h, 1)
115
W2 -- weight matrix of shape (n_y, n_h)
116
b2 -- bias vector of shape (n_y, 1)
117
"""
118
119
np.random.seed(1)
120
121
W1 = np.random.randn(n_h, n_x)*0.01
122
b1 = np.zeros((n_h, 1))
123
W2 = np.random.randn(n_y, n_h)*0.01
124
b2 = np.zeros((n_y, 1))
125
126
assert(W1.shape == (n_h, n_x))
127
assert(b1.shape == (n_h, 1))
128
assert(W2.shape == (n_y, n_h))
129
assert(b2.shape == (n_y, 1))
130
131
parameters = {"W1": W1,
132
"b1": b1,
133
"W2": W2,
134
"b2": b2}
135
136
return parameters
137
138
139
def initialize_parameters_deep(layer_dims):
140
"""
141
Arguments:
142
layer_dims -- python array (list) containing the dimensions of each layer in our network
143
144
Returns:
145
parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
146
Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
147
bl -- bias vector of shape (layer_dims[l], 1)
148
"""
149
150
np.random.seed(1)
151
parameters = {}
152
L = len(layer_dims) # number of layers in the network
153
154
for l in range(1, L):
155
parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
156
parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
157
158
assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
159
assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
160
161
162
return parameters
163
164
def linear_forward(A, W, b):
165
"""
166
Implement the linear part of a layer's forward propagation.
167
168
Arguments:
169
A -- activations from previous layer (or input data): (size of previous layer, number of examples)
170
W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
171
b -- bias vector, numpy array of shape (size of the current layer, 1)
172
173
Returns:
174
Z -- the input of the activation function, also called pre-activation parameter
175
cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently
176
"""
177
178
Z = W.dot(A) + b
179
180
assert(Z.shape == (W.shape[0], A.shape[1]))
181
cache = (A, W, b)
182
183
return Z, cache
184
185
def linear_activation_forward(A_prev, W, b, activation):
186
"""
187
Implement the forward propagation for the LINEAR->ACTIVATION layer
188
189
Arguments:
190
A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples)
191
W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
192
b -- bias vector, numpy array of shape (size of the current layer, 1)
193
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
194
195
Returns:
196
A -- the output of the activation function, also called the post-activation value
197
cache -- a python dictionary containing "linear_cache" and "activation_cache";
198
stored for computing the backward pass efficiently
199
"""
200
201
if activation == "sigmoid":
202
# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
203
Z, linear_cache = linear_forward(A_prev, W, b)
204
A, activation_cache = sigmoid(Z)
205
206
elif activation == "relu":
207
# Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
208
Z, linear_cache = linear_forward(A_prev, W, b)
209
A, activation_cache = relu(Z)
210
211
assert (A.shape == (W.shape[0], A_prev.shape[1]))
212
cache = (linear_cache, activation_cache)
213
214
return A, cache
215
216
def L_model_forward(X, parameters):
217
"""
218
Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
219
220
Arguments:
221
X -- data, numpy array of shape (input size, number of examples)
222
parameters -- output of initialize_parameters_deep()
223
224
Returns:
225
AL -- last post-activation value
226
caches -- list of caches containing:
227
every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
228
the cache of linear_sigmoid_forward() (there is one, indexed L-1)
229
"""
230
231
caches = []
232
A = X
233
L = len(parameters) // 2 # number of layers in the neural network
234
235
# Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
236
for l in range(1, L):
237
A_prev = A
238
A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation = "relu")
239
caches.append(cache)
240
241
# Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
242
AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation = "sigmoid")
243
caches.append(cache)
244
245
assert(AL.shape == (1,X.shape[1]))
246
247
return AL, caches
248
249
def compute_cost(AL, Y):
250
"""
251
Implement the cost function defined by equation (7).
252
253
Arguments:
254
AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
255
Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
256
257
Returns:
258
cost -- cross-entropy cost
259
"""
260
261
m = Y.shape[1]
262
263
# Compute loss from aL and y.
264
cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
265
266
cost = np.squeeze(cost) # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
267
assert(cost.shape == ())
268
269
return cost
270
271
def linear_backward(dZ, cache):
272
"""
273
Implement the linear portion of backward propagation for a single layer (layer l)
274
275
Arguments:
276
dZ -- Gradient of the cost with respect to the linear output (of current layer l)
277
cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
278
279
Returns:
280
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
281
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
282
db -- Gradient of the cost with respect to b (current layer l), same shape as b
283
"""
284
A_prev, W, b = cache
285
m = A_prev.shape[1]
286
287
dW = 1./m * np.dot(dZ,A_prev.T)
288
db = 1./m * np.sum(dZ, axis = 1, keepdims = True)
289
dA_prev = np.dot(W.T,dZ)
290
291
assert (dA_prev.shape == A_prev.shape)
292
assert (dW.shape == W.shape)
293
assert (db.shape == b.shape)
294
295
return dA_prev, dW, db
296
297
def linear_activation_backward(dA, cache, activation):
298
"""
299
Implement the backward propagation for the LINEAR->ACTIVATION layer.
300
301
Arguments:
302
dA -- post-activation gradient for current layer l
303
cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
304
activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
305
306
Returns:
307
dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
308
dW -- Gradient of the cost with respect to W (current layer l), same shape as W
309
db -- Gradient of the cost with respect to b (current layer l), same shape as b
310
"""
311
linear_cache, activation_cache = cache
312
313
if activation == "relu":
314
dZ = relu_backward(dA, activation_cache)
315
dA_prev, dW, db = linear_backward(dZ, linear_cache)
316
317
elif activation == "sigmoid":
318
dZ = sigmoid_backward(dA, activation_cache)
319
dA_prev, dW, db = linear_backward(dZ, linear_cache)
320
321
return dA_prev, dW, db
322
323
def L_model_backward(AL, Y, caches):
324
"""
325
Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
326
327
Arguments:
328
AL -- probability vector, output of the forward propagation (L_model_forward())
329
Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
330
caches -- list of caches containing:
331
every cache of linear_activation_forward() with "relu" (there are (L-1) or them, indexes from 0 to L-2)
332
the cache of linear_activation_forward() with "sigmoid" (there is one, index L-1)
333
334
Returns:
335
grads -- A dictionary with the gradients
336
grads["dA" + str(l)] = ...
337
grads["dW" + str(l)] = ...
338
grads["db" + str(l)] = ...
339
"""
340
grads = {}
341
L = len(caches) # the number of layers
342
m = AL.shape[1]
343
Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
344
345
# Initializing the backpropagation
346
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
347
348
# Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
349
current_cache = caches[L-1]
350
grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
351
352
for l in reversed(range(L-1)):
353
# lth layer: (RELU -> LINEAR) gradients.
354
current_cache = caches[l]
355
dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
356
grads["dA" + str(l)] = dA_prev_temp
357
grads["dW" + str(l + 1)] = dW_temp
358
grads["db" + str(l + 1)] = db_temp
359
360
return grads
361
362
def update_parameters(parameters, grads, learning_rate):
363
"""
364
Update parameters using gradient descent
365
366
Arguments:
367
parameters -- python dictionary containing your parameters
368
grads -- python dictionary containing your gradients, output of L_model_backward
369
370
Returns:
371
parameters -- python dictionary containing your updated parameters
372
parameters["W" + str(l)] = ...
373
parameters["b" + str(l)] = ...
374
"""
375
376
L = len(parameters) // 2 # number of layers in the neural network
377
378
# Update rule for each parameter. Use a for loop.
379
for l in range(L):
380
parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
381
parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
382
383
return parameters
384
385
def predict(X, y, parameters):
386
"""
387
This function is used to predict the results of a L-layer neural network.
388
389
Arguments:
390
X -- data set of examples you would like to label
391
parameters -- parameters of the trained model
392
393
Returns:
394
p -- predictions for the given dataset X
395
"""
396
397
m = X.shape[1]
398
n = len(parameters) // 2 # number of layers in the neural network
399
p = np.zeros((1,m))
400
401
# Forward propagation
402
probas, caches = L_model_forward(X, parameters)
403
404
405
# convert probas to 0/1 predictions
406
for i in range(0, probas.shape[1]):
407
if probas[0,i] > 0.5:
408
p[0,i] = 1
409
else:
410
p[0,i] = 0
411
412
#print results
413
#print ("predictions: " + str(p))
414
#print ("true labels: " + str(y))
415
print("Accuracy: " + str(np.sum((p == y)/m)))
416
417
return p
418
419
def print_mislabeled_images(classes, X, y, p):
420
"""
421
Plots images where predictions and truth were different.
422
X -- dataset
423
y -- true labels
424
p -- predictions
425
"""
426
a = p + y
427
mislabeled_indices = np.asarray(np.where(a == 1))
428
plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots
429
num_images = len(mislabeled_indices[0])
430
for i in range(num_images):
431
index = mislabeled_indices[1][i]
432
433
plt.subplot(2, num_images, i + 1)
434
plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')
435
plt.axis('off')
436
plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[y[0,index]].decode("utf-8"))
437
438