CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
y33-j3T

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: y33-j3T/Coursera-Deep-Learning
Path: blob/master/Convolutional Neural Networks/week3/Car detection for Autonomous Driving/yad2k/models/keras_yolo.py
Views: 13383
1
"""YOLO_v2 Model Defined in Keras."""
2
import sys
3
4
import numpy as np
5
import tensorflow as tf
6
from keras import backend as K
7
from keras.layers import Lambda
8
from keras.layers.merge import concatenate
9
from keras.models import Model
10
11
from ..utils import compose
12
from .keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, darknet_body)
13
14
sys.path.append('..')
15
16
voc_anchors = np.array(
17
[[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]])
18
19
voc_classes = [
20
"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
21
"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
22
"pottedplant", "sheep", "sofa", "train", "tvmonitor"
23
]
24
25
26
def space_to_depth_x2(x):
27
"""Thin wrapper for Tensorflow space_to_depth with block_size=2."""
28
# Import currently required to make Lambda work.
29
# See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
30
import tensorflow as tf
31
return tf.space_to_depth(x, block_size=2)
32
33
34
def space_to_depth_x2_output_shape(input_shape):
35
"""Determine space_to_depth output shape for block_size=2.
36
37
Note: For Lambda with TensorFlow backend, output shape may not be needed.
38
"""
39
return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 *
40
input_shape[3]) if input_shape[1] else (input_shape[0], None, None,
41
4 * input_shape[3])
42
43
44
def yolo_body(inputs, num_anchors, num_classes):
45
"""Create YOLO_V2 model CNN body in Keras."""
46
darknet = Model(inputs, darknet_body()(inputs))
47
conv20 = compose(
48
DarknetConv2D_BN_Leaky(1024, (3, 3)),
49
DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output)
50
51
conv13 = darknet.layers[43].output
52
conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13)
53
# TODO: Allow Keras Lambda to use func arguments for output_shape?
54
conv21_reshaped = Lambda(
55
space_to_depth_x2,
56
output_shape=space_to_depth_x2_output_shape,
57
name='space_to_depth')(conv21)
58
59
x = concatenate([conv21_reshaped, conv20])
60
x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x)
61
x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x)
62
return Model(inputs, x)
63
64
65
def yolo_head(feats, anchors, num_classes):
66
"""Convert final layer features to bounding box parameters.
67
68
Parameters
69
----------
70
feats : tensor
71
Final convolutional layer features.
72
anchors : array-like
73
Anchor box widths and heights.
74
num_classes : int
75
Number of target classes.
76
77
Returns
78
-------
79
box_xy : tensor
80
x, y box predictions adjusted by spatial location in conv layer.
81
box_wh : tensor
82
w, h box predictions adjusted by anchors and conv spatial resolution.
83
box_conf : tensor
84
Probability estimate for whether each box contains any object.
85
box_class_pred : tensor
86
Probability distribution estimate for each box over class labels.
87
"""
88
num_anchors = len(anchors)
89
# Reshape to batch, height, width, num_anchors, box_params.
90
anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
91
# Static implementation for fixed models.
92
# TODO: Remove or add option for static implementation.
93
# _, conv_height, conv_width, _ = K.int_shape(feats)
94
# conv_dims = K.variable([conv_width, conv_height])
95
96
# Dynamic implementation of conv dims for fully convolutional model.
97
conv_dims = K.shape(feats)[1:3] # assuming channels last
98
# In YOLO the height index is the inner most iteration.
99
conv_height_index = K.arange(0, stop=conv_dims[0])
100
conv_width_index = K.arange(0, stop=conv_dims[1])
101
conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
102
103
# TODO: Repeat_elements and tf.split doesn't support dynamic splits.
104
# conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
105
conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
106
conv_width_index = K.flatten(K.transpose(conv_width_index))
107
conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
108
conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
109
conv_index = K.cast(conv_index, K.dtype(feats))
110
111
feats = K.reshape(feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
112
conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))
113
114
# Static generation of conv_index:
115
# conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
116
# conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering.
117
# conv_index = K.variable(
118
# conv_index.reshape(1, conv_height, conv_width, 1, 2))
119
# feats = Reshape(
120
# (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)
121
122
box_confidence = K.sigmoid(feats[..., 4:5])
123
box_xy = K.sigmoid(feats[..., :2])
124
box_wh = K.exp(feats[..., 2:4])
125
box_class_probs = K.softmax(feats[..., 5:])
126
127
# Adjust preditions to each spatial grid point and anchor size.
128
# Note: YOLO iterates over height index before width index.
129
box_xy = (box_xy + conv_index) / conv_dims
130
box_wh = box_wh * anchors_tensor / conv_dims
131
132
return box_confidence, box_xy, box_wh, box_class_probs
133
134
135
def yolo_boxes_to_corners(box_xy, box_wh):
136
"""Convert YOLO box predictions to bounding box corners."""
137
box_mins = box_xy - (box_wh / 2.)
138
box_maxes = box_xy + (box_wh / 2.)
139
140
return K.concatenate([
141
box_mins[..., 1:2], # y_min
142
box_mins[..., 0:1], # x_min
143
box_maxes[..., 1:2], # y_max
144
box_maxes[..., 0:1] # x_max
145
])
146
147
148
def yolo_loss(args,
149
anchors,
150
num_classes,
151
rescore_confidence=False,
152
print_loss=False):
153
"""YOLO localization loss function.
154
155
Parameters
156
----------
157
yolo_output : tensor
158
Final convolutional layer features.
159
160
true_boxes : tensor
161
Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
162
containing box x_center, y_center, width, height, and class.
163
164
detectors_mask : array
165
0/1 mask for detector positions where there is a matching ground truth.
166
167
matching_true_boxes : array
168
Corresponding ground truth boxes for positive detector positions.
169
Already adjusted for conv height and width.
170
171
anchors : tensor
172
Anchor boxes for model.
173
174
num_classes : int
175
Number of object classes.
176
177
rescore_confidence : bool, default=False
178
If true then set confidence target to IOU of best predicted box with
179
the closest matching ground truth box.
180
181
print_loss : bool, default=False
182
If True then use a tf.Print() to print the loss components.
183
184
Returns
185
-------
186
mean_loss : float
187
mean localization loss across minibatch
188
"""
189
(yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
190
num_anchors = len(anchors)
191
object_scale = 5
192
no_object_scale = 1
193
class_scale = 1
194
coordinates_scale = 1
195
pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
196
yolo_output, anchors, num_classes)
197
198
# Unadjusted box predictions for loss.
199
# TODO: Remove extra computation shared with yolo_head.
200
yolo_output_shape = K.shape(yolo_output)
201
feats = K.reshape(yolo_output, [
202
-1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
203
num_classes + 5
204
])
205
pred_boxes = K.concatenate(
206
(K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)
207
208
# TODO: Adjust predictions by image width/height for non-square images?
209
# IOUs may be off due to different aspect ratio.
210
211
# Expand pred x,y,w,h to allow comparison with ground truth.
212
# batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
213
pred_xy = K.expand_dims(pred_xy, 4)
214
pred_wh = K.expand_dims(pred_wh, 4)
215
216
pred_wh_half = pred_wh / 2.
217
pred_mins = pred_xy - pred_wh_half
218
pred_maxes = pred_xy + pred_wh_half
219
220
true_boxes_shape = K.shape(true_boxes)
221
222
# batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
223
true_boxes = K.reshape(true_boxes, [
224
true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
225
])
226
true_xy = true_boxes[..., 0:2]
227
true_wh = true_boxes[..., 2:4]
228
229
# Find IOU of each predicted box with each ground truth box.
230
true_wh_half = true_wh / 2.
231
true_mins = true_xy - true_wh_half
232
true_maxes = true_xy + true_wh_half
233
234
intersect_mins = K.maximum(pred_mins, true_mins)
235
intersect_maxes = K.minimum(pred_maxes, true_maxes)
236
intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
237
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
238
239
pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
240
true_areas = true_wh[..., 0] * true_wh[..., 1]
241
242
union_areas = pred_areas + true_areas - intersect_areas
243
iou_scores = intersect_areas / union_areas
244
245
# Best IOUs for each location.
246
best_ious = K.max(iou_scores, axis=4) # Best IOU scores.
247
best_ious = K.expand_dims(best_ious)
248
249
# A detector has found an object if IOU > thresh for some true box.
250
object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
251
252
# TODO: Darknet region training includes extra coordinate loss for early
253
# training steps to encourage predictions to match anchor priors.
254
255
# Determine confidence weights from object and no_object weights.
256
# NOTE: YOLO does not use binary cross-entropy here.
257
no_object_weights = (no_object_scale * (1 - object_detections) *
258
(1 - detectors_mask))
259
no_objects_loss = no_object_weights * K.square(-pred_confidence)
260
261
if rescore_confidence:
262
objects_loss = (object_scale * detectors_mask *
263
K.square(best_ious - pred_confidence))
264
else:
265
objects_loss = (object_scale * detectors_mask *
266
K.square(1 - pred_confidence))
267
confidence_loss = objects_loss + no_objects_loss
268
269
# Classification loss for matching detections.
270
# NOTE: YOLO does not use categorical cross-entropy loss here.
271
matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
272
matching_classes = K.one_hot(matching_classes, num_classes)
273
classification_loss = (class_scale * detectors_mask *
274
K.square(matching_classes - pred_class_prob))
275
276
# Coordinate loss for matching detection boxes.
277
matching_boxes = matching_true_boxes[..., 0:4]
278
coordinates_loss = (coordinates_scale * detectors_mask *
279
K.square(matching_boxes - pred_boxes))
280
281
confidence_loss_sum = K.sum(confidence_loss)
282
classification_loss_sum = K.sum(classification_loss)
283
coordinates_loss_sum = K.sum(coordinates_loss)
284
total_loss = 0.5 * (
285
confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
286
if print_loss:
287
total_loss = tf.Print(
288
total_loss, [
289
total_loss, confidence_loss_sum, classification_loss_sum,
290
coordinates_loss_sum
291
],
292
message='yolo_loss, conf_loss, class_loss, box_coord_loss:')
293
294
return total_loss
295
296
297
def yolo(inputs, anchors, num_classes):
298
"""Generate a complete YOLO_v2 localization model."""
299
num_anchors = len(anchors)
300
body = yolo_body(inputs, num_anchors, num_classes)
301
outputs = yolo_head(body.output, anchors, num_classes)
302
return outputs
303
304
305
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6):
306
"""Filter YOLO boxes based on object and class confidence."""
307
308
box_scores = box_confidence * box_class_probs
309
box_classes = K.argmax(box_scores, axis=-1)
310
box_class_scores = K.max(box_scores, axis=-1)
311
prediction_mask = box_class_scores >= threshold
312
313
# TODO: Expose tf.boolean_mask to Keras backend?
314
boxes = tf.boolean_mask(boxes, prediction_mask)
315
scores = tf.boolean_mask(box_class_scores, prediction_mask)
316
classes = tf.boolean_mask(box_classes, prediction_mask)
317
318
return boxes, scores, classes
319
320
321
def yolo_eval(yolo_outputs,
322
image_shape,
323
max_boxes=10,
324
score_threshold=.6,
325
iou_threshold=.5):
326
"""Evaluate YOLO model on given input batch and return filtered boxes."""
327
box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
328
boxes = yolo_boxes_to_corners(box_xy, box_wh)
329
boxes, scores, classes = yolo_filter_boxes(
330
box_confidence, boxes, box_class_probs, threshold=score_threshold)
331
332
# Scale boxes back to original image shape.
333
height = image_shape[0]
334
width = image_shape[1]
335
image_dims = K.stack([height, width, height, width])
336
image_dims = K.reshape(image_dims, [1, 4])
337
boxes = boxes * image_dims
338
339
# TODO: Something must be done about this ugly hack!
340
max_boxes_tensor = K.variable(max_boxes, dtype='int32')
341
K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
342
nms_index = tf.image.non_max_suppression(
343
boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
344
boxes = K.gather(boxes, nms_index)
345
scores = K.gather(scores, nms_index)
346
classes = K.gather(classes, nms_index)
347
348
return boxes, scores, classes
349
350
351
def preprocess_true_boxes(true_boxes, anchors, image_size):
352
"""Find detector in YOLO where ground truth box should appear.
353
354
Parameters
355
----------
356
true_boxes : array
357
List of ground truth boxes in form of relative x, y, w, h, class.
358
Relative coordinates are in the range [0, 1] indicating a percentage
359
of the original image dimensions.
360
anchors : array
361
List of anchors in form of w, h.
362
Anchors are assumed to be in the range [0, conv_size] where conv_size
363
is the spatial dimension of the final convolutional features.
364
image_size : array-like
365
List of image dimensions in form of h, w in pixels.
366
367
Returns
368
-------
369
detectors_mask : array
370
0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
371
that should be compared with a matching ground truth box.
372
matching_true_boxes: array
373
Same shape as detectors_mask with the corresponding ground truth box
374
adjusted for comparison with predicted parameters at training time.
375
"""
376
height, width = image_size
377
num_anchors = len(anchors)
378
# Downsampling factor of 5x 2-stride max_pools == 32.
379
# TODO: Remove hardcoding of downscaling calculations.
380
assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
381
assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
382
conv_height = height // 32
383
conv_width = width // 32
384
num_box_params = true_boxes.shape[1]
385
detectors_mask = np.zeros(
386
(conv_height, conv_width, num_anchors, 1), dtype=np.float32)
387
matching_true_boxes = np.zeros(
388
(conv_height, conv_width, num_anchors, num_box_params),
389
dtype=np.float32)
390
391
for box in true_boxes:
392
# scale box to convolutional feature spatial dimensions
393
box_class = box[4:5]
394
box = box[0:4] * np.array(
395
[conv_width, conv_height, conv_width, conv_height])
396
i = np.floor(box[1]).astype('int')
397
j = min(np.floor(box[0]).astype('int'),1)
398
best_iou = 0
399
best_anchor = 0
400
401
for k, anchor in enumerate(anchors):
402
# Find IOU between box shifted to origin and anchor box.
403
box_maxes = box[2:4] / 2.
404
box_mins = -box_maxes
405
anchor_maxes = (anchor / 2.)
406
anchor_mins = -anchor_maxes
407
408
intersect_mins = np.maximum(box_mins, anchor_mins)
409
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
410
intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
411
intersect_area = intersect_wh[0] * intersect_wh[1]
412
box_area = box[2] * box[3]
413
anchor_area = anchor[0] * anchor[1]
414
iou = intersect_area / (box_area + anchor_area - intersect_area)
415
if iou > best_iou:
416
best_iou = iou
417
best_anchor = k
418
419
if best_iou > 0:
420
detectors_mask[i, j, best_anchor] = 1
421
adjusted_box = np.array(
422
[
423
box[0] - j, box[1] - i,
424
np.log(box[2] / anchors[best_anchor][0]),
425
np.log(box[3] / anchors[best_anchor][1]), box_class
426
],
427
dtype=np.float32)
428
matching_true_boxes[i, j, best_anchor] = adjusted_box
429
return detectors_mask, matching_true_boxes
430
431