CoCalc -- keras

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

GitHub Repository: amanchadha/coursera-deep-learning-specialization
Path: blob/master/C4 - Convolutional Neural Networks/Week 3/Car detection for Autonomous Driving/yad2k/models/keras_yolo.py
Views: ⁴⁸²²
1
"""YOLO_v2 Model Defined in Keras."""
2
import sys
3

4
import numpy as np
5
import tensorflow as tf
6
from tensorflow.keras import backend as K
7
from tensorflow.keras.layers import Lambda
8
from tensorflow.keras.layers import concatenate
9
from tensorflow.keras.models import Model
10

11
from ..utils import compose
12
from .keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky,
13
                              darknet_body)
14

15
sys.path.append('..')
16

17
voc_anchors = np.array(
18
    [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]])
19

20
voc_classes = [
21
    "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
22
    "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
23
    "pottedplant", "sheep", "sofa", "train", "tvmonitor"
24
]
25

26

27
def space_to_depth_x2(x):
28
    """Thin wrapper for Tensorflow space_to_depth with block_size=2."""
29
    # Import currently required to make Lambda work.
30
    # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273
31
    import tensorflow as tf
32
    return tf.nn.space_to_depth(x, block_size=2)
33

34

35
def space_to_depth_x2_output_shape(input_shape):
36
    """Determine space_to_depth output shape for block_size=2.
37

38
    Note: For Lambda with TensorFlow backend, output shape may not be needed.
39
    """
40
    return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 *
41
            input_shape[3]) if input_shape[1] else (input_shape[0], None, None,
42
                                                    4 * input_shape[3])
43

44
def yolo_body(inputs, num_anchors, num_classes):
45
    """Create YOLO_V2 model CNN body in Keras."""
46
    darknet = Model(inputs, darknet_body()(inputs))
47
    conv20 = compose(
48
        DarknetConv2D_BN_Leaky(1024, (3, 3)),
49
        DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output)
50

51
    conv13 = darknet.layers[43].output
52
    conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13)
53
    # TODO: Allow Keras Lambda to use func arguments for output_shape?
54
    conv21_reshaped = Lambda(
55
        space_to_depth_x2,
56
        output_shape=space_to_depth_x2_output_shape,
57
        name='space_to_depth')(conv21)
58

59
    x = concatenate([conv21_reshaped, conv20])
60
    x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x)
61
    x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x)
62
    return Model(inputs, x)
63

64

65
def yolo_head(feats, anchors, num_classes):
66
    """Convert final layer features to bounding box parameters.
67

68
    Parameters
69
    ----------
70
    feats : tensor
71
        Final convolutional layer features.
72
    anchors : array-like
73
        Anchor box widths and heights.
74
    num_classes : int
75
        Number of target classes.
76

77
    Returns
78
    -------
79
    box_xy : tensor
80
        x, y box predictions adjusted by spatial location in conv layer.
81
    box_wh : tensor
82
        w, h box predictions adjusted by anchors and conv spatial resolution.
83
    box_conf : tensor
84
        Probability estimate for whether each box contains any object.
85
    box_class_pred : tensor
86
        Probability distribution estimate for each box over class labels.
87
    """
88
    num_anchors = len(anchors)
89
    # Reshape to batch, height, width, num_anchors, box_params.
90
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
91

92
    # Static implementation for fixed models.
93
    # TODO: Remove or add option for static implementation.
94
    # _, conv_height, conv_width, _ = K.int_shape(feats)
95
    # conv_dims = K.variable([conv_width, conv_height])
96

97
    # Dynamic implementation of conv dims for fully convolutional model.
98
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
99
    # In YOLO the height index is the inner most iteration.
100
    conv_height_index = K.arange(0, stop=conv_dims[0])
101
    conv_width_index = K.arange(0, stop=conv_dims[1])
102
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
103

104
    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
105
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
106
    conv_width_index = K.tile(
107
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
108
    conv_width_index = K.flatten(K.transpose(conv_width_index))
109
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
110
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
111
    conv_index = K.cast(conv_index, K.dtype(feats))
112

113
    feats = K.reshape(
114
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
115
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))
116

117
    # Static generation of conv_index:
118
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
119
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
120
    # conv_index = K.variable(
121
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
122
    # feats = Reshape(
123
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)
124

125
    box_xy = K.sigmoid(feats[..., :2])
126
    box_wh = K.exp(feats[..., 2:4])
127
    box_confidence = K.sigmoid(feats[..., 4:5])
128
    box_class_probs = K.softmax(feats[..., 5:])
129

130
    # Adjust preditions to each spatial grid point and anchor size.
131
    # Note: YOLO iterates over height index before width index.
132
    box_xy = (box_xy + conv_index) / conv_dims
133
    box_wh = box_wh * anchors_tensor / conv_dims
134

135
    return box_xy, box_wh, box_confidence, box_class_probs
136

137

138
def yolo_boxes_to_corners(box_xy, box_wh):
139
    """Convert YOLO box predictions to bounding box corners."""
140
    box_mins = box_xy - (box_wh / 2.)
141
    box_maxes = box_xy + (box_wh / 2.)
142

143
    return K.concatenate([
144
        box_mins[..., 1:2],  # y_min
145
        box_mins[..., 0:1],  # x_min
146
        box_maxes[..., 1:2],  # y_max
147
        box_maxes[..., 0:1]  # x_max
148
    ])
149

150

151
def yolo_loss(args,
152
              anchors,
153
              num_classes,
154
              rescore_confidence=False,
155
              print_loss=False):
156
    """YOLO localization loss function.
157

158
    Parameters
159
    ----------
160
    yolo_output : tensor
161
        Final convolutional layer features.
162

163
    true_boxes : tensor
164
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
165
        containing box x_center, y_center, width, height, and class.
166

167
    detectors_mask : array
168
        0/1 mask for detector positions where there is a matching ground truth.
169

170
    matching_true_boxes : array
171
        Corresponding ground truth boxes for positive detector positions.
172
        Already adjusted for conv height and width.
173

174
    anchors : tensor
175
        Anchor boxes for model.
176

177
    num_classes : int
178
        Number of object classes.
179

180
    rescore_confidence : bool, default=False
181
        If true then set confidence target to IOU of best predicted box with
182
        the closest matching ground truth box.
183

184
    print_loss : bool, default=False
185
        If True then use a tf.Print() to print the loss components.
186

187
    Returns
188
    -------
189
    mean_loss : float
190
        mean localization loss across minibatch
191
    """
192
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
193
    num_anchors = len(anchors)
194
    object_scale = 5
195
    no_object_scale = 1
196
    class_scale = 1
197
    coordinates_scale = 1
198
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
199
        yolo_output, anchors, num_classes)
200

201
    # Unadjusted box predictions for loss.
202
    # TODO: Remove extra computation shared with yolo_head.
203
    yolo_output_shape = K.shape(yolo_output)
204
    feats = K.reshape(yolo_output, [
205
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
206
        num_classes + 5
207
    ])
208
    pred_boxes = K.concatenate(
209
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)
210

211
    # TODO: Adjust predictions by image width/height for non-square images?
212
    # IOUs may be off due to different aspect ratio.
213

214
    # Expand pred x,y,w,h to allow comparison with ground truth.
215
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
216
    pred_xy = K.expand_dims(pred_xy, 4)
217
    pred_wh = K.expand_dims(pred_wh, 4)
218

219
    pred_wh_half = pred_wh / 2.
220
    pred_mins = pred_xy - pred_wh_half
221
    pred_maxes = pred_xy + pred_wh_half
222

223
    true_boxes_shape = K.shape(true_boxes)
224

225
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
226
    true_boxes = K.reshape(true_boxes, [
227
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
228
    ])
229
    true_xy = true_boxes[..., 0:2]
230
    true_wh = true_boxes[..., 2:4]
231

232
    # Find IOU of each predicted box with each ground truth box.
233
    true_wh_half = true_wh / 2.
234
    true_mins = true_xy - true_wh_half
235
    true_maxes = true_xy + true_wh_half
236

237
    intersect_mins = K.maximum(pred_mins, true_mins)
238
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
239
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
240
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
241

242
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
243
    true_areas = true_wh[..., 0] * true_wh[..., 1]
244

245
    union_areas = pred_areas + true_areas - intersect_areas
246
    iou_scores = intersect_areas / union_areas
247

248
    # Best IOUs for each location.
249
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
250
    best_ious = K.expand_dims(best_ious)
251

252
    # A detector has found an object if IOU > thresh for some true box.
253
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
254

255
    # TODO: Darknet region training includes extra coordinate loss for early
256
    # training steps to encourage predictions to match anchor priors.
257

258
    # Determine confidence weights from object and no_object weights.
259
    # NOTE: YOLO does not use binary cross-entropy here.
260
    no_object_weights = (no_object_scale * (1 - object_detections) *
261
                         (1 - detectors_mask))
262
    no_objects_loss = no_object_weights * K.square(-pred_confidence)
263

264
    if rescore_confidence:
265
        objects_loss = (object_scale * detectors_mask *
266
                        K.square(best_ious - pred_confidence))
267
    else:
268
        objects_loss = (object_scale * detectors_mask *
269
                        K.square(1 - pred_confidence))
270
    confidence_loss = objects_loss + no_objects_loss
271

272
    # Classification loss for matching detections.
273
    # NOTE: YOLO does not use categorical cross-entropy loss here.
274
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
275
    matching_classes = K.one_hot(matching_classes, num_classes)
276
    classification_loss = (class_scale * detectors_mask *
277
                           K.square(matching_classes - pred_class_prob))
278

279
    # Coordinate loss for matching detection boxes.
280
    matching_boxes = matching_true_boxes[..., 0:4]
281
    coordinates_loss = (coordinates_scale * detectors_mask *
282
                        K.square(matching_boxes - pred_boxes))
283

284
    confidence_loss_sum = K.sum(confidence_loss)
285
    classification_loss_sum = K.sum(classification_loss)
286
    coordinates_loss_sum = K.sum(coordinates_loss)
287
    total_loss = 0.5 * (
288
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
289
    if print_loss:
290
        total_loss = tf.Print(
291
            total_loss, [
292
                total_loss, confidence_loss_sum, classification_loss_sum,
293
                coordinates_loss_sum
294
            ],
295
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')
296

297
    return total_loss
298

299

300
def yolo(inputs, anchors, num_classes):
301
    """Generate a complete YOLO_v2 localization model."""
302
    num_anchors = len(anchors)
303
    body = yolo_body(inputs, num_anchors, num_classes)
304
    outputs = yolo_head(body.output, anchors, num_classes)
305
    return outputs
306

307

308
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6):
309
    """Filter YOLO boxes based on object and class confidence."""
310
    box_scores = box_confidence * box_class_probs
311
    box_classes = K.argmax(box_scores, axis=-1)
312
    box_class_scores = K.max(box_scores, axis=-1)
313
    prediction_mask = box_class_scores >= threshold
314

315
    # TODO: Expose tf.boolean_mask to Keras backend?
316
    boxes = tf.boolean_mask(boxes, prediction_mask)
317
    scores = tf.boolean_mask(box_class_scores, prediction_mask)
318
    classes = tf.boolean_mask(box_classes, prediction_mask)
319
    return boxes, scores, classes
320

321

322
def yolo_eval(yolo_outputs,
323
              image_shape,
324
              max_boxes=10,
325
              score_threshold=.6,
326
              iou_threshold=.5):
327
    """Evaluate YOLO model on given input batch and return filtered boxes."""
328
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
329
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
330
    boxes, scores, classes = yolo_filter_boxes(
331
        boxes, box_confidence, box_class_probs, threshold=score_threshold)
332

333
    # Scale boxes back to original image shape.
334
    height = image_shape[0]
335
    width = image_shape[1]
336
    image_dims = K.stack([height, width, height, width])
337
    image_dims = K.reshape(image_dims, [1, 4])
338
    boxes = boxes * image_dims
339

340
    # TODO: Something must be done about this ugly hack!
341
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
342
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
343
    nms_index = tf.image.non_max_suppression(
344
        boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
345
    boxes = K.gather(boxes, nms_index)
346
    scores = K.gather(scores, nms_index)
347
    classes = K.gather(classes, nms_index)
348
    return boxes, scores, classes
349

350

351
def preprocess_true_boxes(true_boxes, anchors, image_size):
352
    """Find detector in YOLO where ground truth box should appear.
353

354
    Parameters
355
    ----------
356
    true_boxes : array
357
        List of ground truth boxes in form of relative x, y, w, h, class.
358
        Relative coordinates are in the range [0, 1] indicating a percentage
359
        of the original image dimensions.
360
    anchors : array
361
        List of anchors in form of w, h.
362
        Anchors are assumed to be in the range [0, conv_size] where conv_size
363
        is the spatial dimension of the final convolutional features.
364
    image_size : array-like
365
        List of image dimensions in form of h, w in pixels.
366

367
    Returns
368
    -------
369
    detectors_mask : array
370
        0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
371
        that should be compared with a matching ground truth box.
372
    matching_true_boxes: array
373
        Same shape as detectors_mask with the corresponding ground truth box
374
        adjusted for comparison with predicted parameters at training time.
375
    """
376
    height, width = image_size
377
    num_anchors = len(anchors)
378
    # Downsampling factor of 5x 2-stride max_pools == 32.
379
    # TODO: Remove hardcoding of downscaling calculations.
380
    assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
381
    assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
382
    conv_height = height // 32
383
    conv_width = width // 32
384
    num_box_params = true_boxes.shape[1]
385
    detectors_mask = np.zeros(
386
        (conv_height, conv_width, num_anchors, 1), dtype=np.float32)
387
    matching_true_boxes = np.zeros(
388
        (conv_height, conv_width, num_anchors, num_box_params),
389
        dtype=np.float32)
390

391
    for box in true_boxes:
392
        # scale box to convolutional feature spatial dimensions
393
        box_class = box[4:5]
394
        box = box[0:4] * np.array(
395
            [conv_width, conv_height, conv_width, conv_height])
396
        i = np.floor(box[1]).astype('int')
397
        j = np.floor(box[0]).astype('int')
398
        best_iou = 0
399
        best_anchor = 0
400
        for k, anchor in enumerate(anchors):
401
            # Find IOU between box shifted to origin and anchor box.
402
            box_maxes = box[2:4] / 2.
403
            box_mins = -box_maxes
404
            anchor_maxes = (anchor / 2.)
405
            anchor_mins = -anchor_maxes
406

407
            intersect_mins = np.maximum(box_mins, anchor_mins)
408
            intersect_maxes = np.minimum(box_maxes, anchor_maxes)
409
            intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
410
            intersect_area = intersect_wh[0] * intersect_wh[1]
411
            box_area = box[2] * box[3]
412
            anchor_area = anchor[0] * anchor[1]
413
            iou = intersect_area / (box_area + anchor_area - intersect_area)
414
            if iou > best_iou:
415
                best_iou = iou
416
                best_anchor = k
417

418
        if best_iou > 0:
419
            detectors_mask[i, j, best_anchor] = 1
420
            adjusted_box = np.array(
421
                [
422
                    box[0] - j, box[1] - i,
423
                    np.log(box[2] / anchors[best_anchor][0]),
424
                    np.log(box[3] / anchors[best_anchor][1]), box_class
425
                ],
426
                dtype=np.float32)
427
            matching_true_boxes[i, j, best_anchor] = adjusted_box
428
    return detectors_mask, matching_true_boxes
429

430
Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.

Product

Resources

Company

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more, all in one place. Commercial Alternative to JupyterHub.

Real-time collaboration for Jupyter Notebooks, Linux Terminals, LaTeX, VS Code, R IDE, and more,
all in one place. Commercial Alternative to JupyterHub.