"""
Mask R-CNN
Common utility functions and classes.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
"""
import sys
import os
import logging
import math
import random
import numpy as np
import tensorflow as tf
import scipy
import skimage.color
import skimage.io
import skimage.transform
import urllib.request
import shutil
import warnings
from distutils.version import LooseVersion
COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
def extract_bboxes(mask):
"""Compute bounding boxes from masks.
mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
Returns: bbox array [num_instances, (y1, x1, y2, x2)].
"""
boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
for i in range(mask.shape[-1]):
m = mask[:, :, i]
horizontal_indicies = np.where(np.any(m, axis=0))[0]
vertical_indicies = np.where(np.any(m, axis=1))[0]
if horizontal_indicies.shape[0]:
x1, x2 = horizontal_indicies[[0, -1]]
y1, y2 = vertical_indicies[[0, -1]]
x2 += 1
y2 += 1
else:
x1, x2, y1, y2 = 0, 0, 0, 0
boxes[i] = np.array([y1, x1, y2, x2])
return boxes.astype(np.int32)
def compute_iou(box, boxes, box_area, boxes_area):
"""Calculates IoU of the given box with the array of the given boxes.
box: 1D vector [y1, x1, y2, x2]
boxes: [boxes_count, (y1, x1, y2, x2)]
box_area: float. the area of 'box'
boxes_area: array of length boxes_count.
Note: the areas are passed in rather than calculated here for
efficiency. Calculate once in the caller to avoid duplicate work.
"""
y1 = np.maximum(box[0], boxes[:, 0])
y2 = np.minimum(box[2], boxes[:, 2])
x1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[3], boxes[:, 3])
intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
union = box_area + boxes_area[:] - intersection[:]
iou = intersection / union
return iou
def compute_overlaps(boxes1, boxes2):
"""Computes IoU overlaps between two sets of boxes.
boxes1, boxes2: [N, (y1, x1, y2, x2)].
For better performance, pass the largest set first and the smaller second.
"""
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
for i in range(overlaps.shape[1]):
box2 = boxes2[i]
overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
return overlaps
def compute_overlaps_masks(masks1, masks2):
"""Computes IoU overlaps between two sets of masks.
masks1, masks2: [Height, Width, instances]
"""
if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
return np.zeros((masks1.shape[-1], masks2.shape[-1]))
masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
area1 = np.sum(masks1, axis=0)
area2 = np.sum(masks2, axis=0)
intersections = np.dot(masks1.T, masks2)
union = area1[:, None] + area2[None, :] - intersections
overlaps = intersections / union
return overlaps
def non_max_suppression(boxes, scores, threshold):
"""Performs non-maximum suppression and returns indices of kept boxes.
boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
scores: 1-D array of box scores.
threshold: Float. IoU threshold to use for filtering.
"""
assert boxes.shape[0] > 0
if boxes.dtype.kind != "f":
boxes = boxes.astype(np.float32)
y1 = boxes[:, 0]
x1 = boxes[:, 1]
y2 = boxes[:, 2]
x2 = boxes[:, 3]
area = (y2 - y1) * (x2 - x1)
ixs = scores.argsort()[::-1]
pick = []
while len(ixs) > 0:
i = ixs[0]
pick.append(i)
iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
remove_ixs = np.where(iou > threshold)[0] + 1
ixs = np.delete(ixs, remove_ixs)
ixs = np.delete(ixs, 0)
return np.array(pick, dtype=np.int32)
def apply_box_deltas(boxes, deltas):
"""Applies the given deltas to the given boxes.
boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
deltas: [N, (dy, dx, log(dh), log(dw))]
"""
boxes = boxes.astype(np.float32)
height = boxes[:, 2] - boxes[:, 0]
width = boxes[:, 3] - boxes[:, 1]
center_y = boxes[:, 0] + 0.5 * height
center_x = boxes[:, 1] + 0.5 * width
center_y += deltas[:, 0] * height
center_x += deltas[:, 1] * width
height *= np.exp(deltas[:, 2])
width *= np.exp(deltas[:, 3])
y1 = center_y - 0.5 * height
x1 = center_x - 0.5 * width
y2 = y1 + height
x2 = x1 + width
return np.stack([y1, x1, y2, x2], axis=1)
def box_refinement_graph(box, gt_box):
"""Compute refinement needed to transform box to gt_box.
box and gt_box are [N, (y1, x1, y2, x2)]
"""
box = tf.cast(box, tf.float32)
gt_box = tf.cast(gt_box, tf.float32)
height = box[:, 2] - box[:, 0]
width = box[:, 3] - box[:, 1]
center_y = box[:, 0] + 0.5 * height
center_x = box[:, 1] + 0.5 * width
gt_height = gt_box[:, 2] - gt_box[:, 0]
gt_width = gt_box[:, 3] - gt_box[:, 1]
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
dy = (gt_center_y - center_y) / height
dx = (gt_center_x - center_x) / width
dh = tf.log(gt_height / height)
dw = tf.log(gt_width / width)
result = tf.stack([dy, dx, dh, dw], axis=1)
return result
def box_refinement(box, gt_box):
"""Compute refinement needed to transform box to gt_box.
box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
assumed to be outside the box.
"""
box = box.astype(np.float32)
gt_box = gt_box.astype(np.float32)
height = box[:, 2] - box[:, 0]
width = box[:, 3] - box[:, 1]
center_y = box[:, 0] + 0.5 * height
center_x = box[:, 1] + 0.5 * width
gt_height = gt_box[:, 2] - gt_box[:, 0]
gt_width = gt_box[:, 3] - gt_box[:, 1]
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
dy = (gt_center_y - center_y) / height
dx = (gt_center_x - center_x) / width
dh = np.log(gt_height / height)
dw = np.log(gt_width / width)
return np.stack([dy, dx, dh, dw], axis=1)
class Dataset(object):
"""The base class for dataset classes.
To use it, create a new class that adds functions specific to the dataset
you want to use. For example:
class CatsAndDogsDataset(Dataset):
def load_cats_and_dogs(self):
...
def load_mask(self, image_id):
...
def image_reference(self, image_id):
...
See COCODataset and ShapesDataset as examples.
"""
def __init__(self, class_map=None):
self._image_ids = []
self.image_info = []
self.class_info = [{"source": "", "id": 0, "name": "BG"}]
self.source_class_ids = {}
def add_class(self, source, class_id, class_name):
assert "." not in source, "Source name cannot contain a dot"
for info in self.class_info:
if info['source'] == source and info["id"] == class_id:
return
self.class_info.append({
"source": source,
"id": class_id,
"name": class_name,
})
def add_image(self, source, image_id, path, **kwargs):
image_info = {
"id": image_id,
"source": source,
"path": path,
}
image_info.update(kwargs)
self.image_info.append(image_info)
def image_reference(self, image_id):
"""Return a link to the image in its source Website or details about
the image that help looking it up or debugging it.
Override for your dataset, but pass to this function
if you encounter images not in your dataset.
"""
return ""
def prepare(self, class_map=None):
"""Prepares the Dataset class for use.
TODO: class map is not supported yet. When done, it should handle mapping
classes from different datasets to the same class ID.
"""
def clean_name(name):
"""Returns a shorter version of object names for cleaner display."""
return ",".join(name.split(",")[:1])
self.num_classes = len(self.class_info)
self.class_ids = np.arange(self.num_classes)
self.class_names = [clean_name(c["name"]) for c in self.class_info]
self.num_images = len(self.image_info)
self._image_ids = np.arange(self.num_images)
self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
for info, id in zip(self.class_info, self.class_ids)}
self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
for info, id in zip(self.image_info, self.image_ids)}
self.sources = list(set([i['source'] for i in self.class_info]))
self.source_class_ids = {}
for source in self.sources:
self.source_class_ids[source] = []
for i, info in enumerate(self.class_info):
if i == 0 or source == info['source']:
self.source_class_ids[source].append(i)
def map_source_class_id(self, source_class_id):
"""Takes a source class ID and returns the int class ID assigned to it.
For example:
dataset.map_source_class_id("coco.12") -> 23
"""
return self.class_from_source_map[source_class_id]
def get_source_class_id(self, class_id, source):
"""Map an internal class ID to the corresponding class ID in the source dataset."""
info = self.class_info[class_id]
assert info['source'] == source
return info['id']
@property
def image_ids(self):
return self._image_ids
def source_image_link(self, image_id):
"""Returns the path or URL to the image.
Override this to return a URL to the image if it's available online for easy
debugging.
"""
return self.image_info[image_id]["path"]
def load_image(self, image_id):
"""Load the specified image and return a [H,W,3] Numpy array.
"""
image = skimage.io.imread(self.image_info[image_id]['path'])
if image.ndim != 3:
image = skimage.color.gray2rgb(image)
if image.shape[-1] == 4:
image = image[..., :3]
return image
def load_mask(self, image_id):
"""Load instance masks for the given image.
Different datasets use different ways to store masks. Override this
method to load instance masks and return them in the form of am
array of binary masks of shape [height, width, instances].
Returns:
masks: A bool array of shape [height, width, instance count] with
a binary mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
mask = np.empty([0, 0, 0])
class_ids = np.empty([0], np.int32)
return mask, class_ids
def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
"""Resizes an image keeping the aspect ratio unchanged.
min_dim: if provided, resizes the image such that it's smaller
dimension == min_dim
max_dim: if provided, ensures that the image longest side doesn't
exceed this value.
min_scale: if provided, ensure that the image is scaled up by at least
this percent even if min_dim doesn't require it.
mode: Resizing mode.
none: No resizing. Return the image unchanged.
square: Resize and pad with zeros to get a square image
of size [max_dim, max_dim].
pad64: Pads width and height with zeros to make them multiples of 64.
If min_dim or min_scale are provided, it scales the image up
before padding. max_dim is ignored in this mode.
The multiple of 64 is needed to ensure smooth scaling of feature
maps up and down the 6 levels of the FPN pyramid (2**6=64).
crop: Picks random crops from the image. First, scales the image based
on min_dim and min_scale, then picks a random crop of
size min_dim x min_dim. Can be used in training only.
max_dim is not used in this mode.
Returns:
image: the resized image
window: (y1, x1, y2, x2). If max_dim is provided, padding might
be inserted in the returned image. If so, this window is the
coordinates of the image part of the full image (excluding
the padding). The x2, y2 pixels are not included.
scale: The scale factor used to resize the image
padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
"""
image_dtype = image.dtype
h, w = image.shape[:2]
window = (0, 0, h, w)
scale = 1
padding = [(0, 0), (0, 0), (0, 0)]
crop = None
if mode == "none":
return image, window, scale, padding, crop
if min_dim:
scale = max(1, min_dim / min(h, w))
if min_scale and scale < min_scale:
scale = min_scale
if max_dim and mode == "square":
image_max = max(h, w)
if round(image_max * scale) > max_dim:
scale = max_dim / image_max
if scale != 1:
image = resize(image, (round(h * scale), round(w * scale)),
preserve_range=True)
if mode == "square":
h, w = image.shape[:2]
top_pad = (max_dim - h) // 2
bottom_pad = max_dim - h - top_pad
left_pad = (max_dim - w) // 2
right_pad = max_dim - w - left_pad
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
image = np.pad(image, padding, mode='constant', constant_values=0)
window = (top_pad, left_pad, h + top_pad, w + left_pad)
elif mode == "pad64":
h, w = image.shape[:2]
assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
if h % 64 > 0:
max_h = h - (h % 64) + 64
top_pad = (max_h - h) // 2
bottom_pad = max_h - h - top_pad
else:
top_pad = bottom_pad = 0
if w % 64 > 0:
max_w = w - (w % 64) + 64
left_pad = (max_w - w) // 2
right_pad = max_w - w - left_pad
else:
left_pad = right_pad = 0
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
image = np.pad(image, padding, mode='constant', constant_values=0)
window = (top_pad, left_pad, h + top_pad, w + left_pad)
elif mode == "crop":
h, w = image.shape[:2]
y = random.randint(0, (h - min_dim))
x = random.randint(0, (w - min_dim))
crop = (y, x, min_dim, min_dim)
image = image[y:y + min_dim, x:x + min_dim]
window = (0, 0, min_dim, min_dim)
else:
raise Exception("Mode {} not supported".format(mode))
return image.astype(image_dtype), window, scale, padding, crop
def resize_mask(mask, scale, padding, crop=None):
"""Resizes a mask using the given scale and padding.
Typically, you get the scale and padding from resize_image() to
ensure both, the image and the mask, are resized consistently.
scale: mask scaling factor
padding: Padding to add to the mask in the form
[(top, bottom), (left, right), (0, 0)]
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
if crop is not None:
y, x, h, w = crop
mask = mask[y:y + h, x:x + w]
else:
mask = np.pad(mask, padding, mode='constant', constant_values=0)
return mask
def minimize_mask(bbox, mask, mini_shape):
"""Resize masks to a smaller version to reduce memory load.
Mini-masks can be resized back to image scale using expand_masks()
See inspect_data.ipynb notebook for more details.
"""
mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
for i in range(mask.shape[-1]):
m = mask[:, :, i].astype(bool)
y1, x1, y2, x2 = bbox[i][:4]
m = m[y1:y2, x1:x2]
if m.size == 0:
raise Exception("Invalid bounding box with area of zero")
m = resize(m, mini_shape)
mini_mask[:, :, i] = np.around(m).astype(np.bool)
return mini_mask
def expand_mask(bbox, mini_mask, image_shape):
"""Resizes mini masks back to image size. Reverses the change
of minimize_mask().
See inspect_data.ipynb notebook for more details.
"""
mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
for i in range(mask.shape[-1]):
m = mini_mask[:, :, i]
y1, x1, y2, x2 = bbox[i][:4]
h = y2 - y1
w = x2 - x1
m = resize(m, (h, w))
mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
return mask
def mold_mask(mask, config):
pass
def unmold_mask(mask, bbox, image_shape):
"""Converts a mask generated by the neural network to a format similar
to its original shape.
mask: [height, width] of type float. A small, typically 28x28 mask.
bbox: [y1, x1, y2, x2]. The box to fit the mask in.
Returns a binary mask with the same size as the original image.
"""
threshold = 0.5
y1, x1, y2, x2 = bbox
mask = resize(mask, (y2 - y1, x2 - x1))
mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
full_mask = np.zeros(image_shape[:2], dtype=np.bool)
full_mask[y1:y2, x1:x2] = mask
return full_mask
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
"""
scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
shape: [height, width] spatial shape of the feature map over which
to generate anchors.
feature_stride: Stride of the feature map relative to the image in pixels.
anchor_stride: Stride of anchors on the feature map. For example, if the
value is 2 then generate anchors for every other feature map pixel.
"""
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
scales = scales.flatten()
ratios = ratios.flatten()
heights = scales / np.sqrt(ratios)
widths = scales * np.sqrt(ratios)
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
box_centers = np.stack(
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
boxes = np.concatenate([box_centers - 0.5 * box_sizes,
box_centers + 0.5 * box_sizes], axis=1)
return boxes
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
anchor_stride):
"""Generate anchors at different levels of a feature pyramid. Each scale
is associated with a level of the pyramid, but each ratio is used in
all levels of the pyramid.
Returns:
anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
with the same order of the given scales. So, anchors of scale[0] come
first, then anchors of scale[1], and so on.
"""
anchors = []
for i in range(len(scales)):
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
feature_strides[i], anchor_stride))
return np.concatenate(anchors, axis=0)
def trim_zeros(x):
"""It's common to have tensors larger than the available data and
pad with zeros. This function removes rows that are all zeros.
x: [rows, columns].
"""
assert len(x.shape) == 2
return x[~np.all(x == 0, axis=1)]
def compute_matches(gt_boxes, gt_class_ids, gt_masks,
pred_boxes, pred_class_ids, pred_scores, pred_masks,
iou_threshold=0.5, score_threshold=0.0):
"""Finds matches between prediction and ground truth instances.
Returns:
gt_match: 1-D array. For each GT box it has the index of the matched
predicted box.
pred_match: 1-D array. For each predicted box, it has the index of
the matched ground truth box.
overlaps: [pred_boxes, gt_boxes] IoU overlaps.
"""
gt_boxes = trim_zeros(gt_boxes)
gt_masks = gt_masks[..., :gt_boxes.shape[0]]
pred_boxes = trim_zeros(pred_boxes)
pred_scores = pred_scores[:pred_boxes.shape[0]]
indices = np.argsort(pred_scores)[::-1]
pred_boxes = pred_boxes[indices]
pred_class_ids = pred_class_ids[indices]
pred_scores = pred_scores[indices]
pred_masks = pred_masks[..., indices]
overlaps = compute_overlaps_masks(pred_masks, gt_masks)
match_count = 0
pred_match = -1 * np.ones([pred_boxes.shape[0]])
gt_match = -1 * np.ones([gt_boxes.shape[0]])
for i in range(len(pred_boxes)):
sorted_ixs = np.argsort(overlaps[i])[::-1]
low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
if low_score_idx.size > 0:
sorted_ixs = sorted_ixs[:low_score_idx[0]]
for j in sorted_ixs:
if gt_match[j] > -1:
continue
iou = overlaps[i, j]
if iou < iou_threshold:
break
if pred_class_ids[i] == gt_class_ids[j]:
match_count += 1
gt_match[j] = i
pred_match[i] = j
break
return gt_match, pred_match, overlaps
def compute_ap(gt_boxes, gt_class_ids, gt_masks,
pred_boxes, pred_class_ids, pred_scores, pred_masks,
iou_threshold=0.5):
"""Compute Average Precision at a set IoU threshold (default 0.5).
Returns:
mAP: Mean Average Precision
precisions: List of precisions at different class score thresholds.
recalls: List of recall values at different class score thresholds.
overlaps: [pred_boxes, gt_boxes] IoU overlaps.
"""
gt_match, pred_match, overlaps = compute_matches(
gt_boxes, gt_class_ids, gt_masks,
pred_boxes, pred_class_ids, pred_scores, pred_masks,
iou_threshold)
precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
precisions = np.concatenate([[0], precisions, [0]])
recalls = np.concatenate([[0], recalls, [1]])
for i in range(len(precisions) - 2, -1, -1):
precisions[i] = np.maximum(precisions[i], precisions[i + 1])
indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
precisions[indices])
return mAP, precisions, recalls, overlaps
def compute_ap_range(gt_box, gt_class_id, gt_mask,
pred_box, pred_class_id, pred_score, pred_mask,
iou_thresholds=None, verbose=1):
"""Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
AP = []
for iou_threshold in iou_thresholds:
ap, precisions, recalls, overlaps =\
compute_ap(gt_box, gt_class_id, gt_mask,
pred_box, pred_class_id, pred_score, pred_mask,
iou_threshold=iou_threshold)
if verbose:
print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
AP.append(ap)
AP = np.array(AP).mean()
if verbose:
print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
iou_thresholds[0], iou_thresholds[-1], AP))
return AP
def compute_recall(pred_boxes, gt_boxes, iou):
"""Compute the recall at the given IoU threshold. It's an indication
of how many GT boxes were found by the given prediction boxes.
pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
"""
overlaps = compute_overlaps(pred_boxes, gt_boxes)
iou_max = np.max(overlaps, axis=1)
iou_argmax = np.argmax(overlaps, axis=1)
positive_ids = np.where(iou_max >= iou)[0]
matched_gt_boxes = iou_argmax[positive_ids]
recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
return recall, positive_ids
def batch_slice(inputs, graph_fn, batch_size, names=None):
"""Splits inputs into slices and feeds each slice to a copy of the given
computation graph and then combines the results. It allows you to run a
graph on a batch of inputs even if the graph is written to support one
instance only.
inputs: list of tensors. All must have the same first dimension length
graph_fn: A function that returns a TF tensor that's part of a graph.
batch_size: number of slices to divide the data into.
names: If provided, assigns names to the resulting tensors.
"""
if not isinstance(inputs, list):
inputs = [inputs]
outputs = []
for i in range(batch_size):
inputs_slice = [x[i] for x in inputs]
output_slice = graph_fn(*inputs_slice)
if not isinstance(output_slice, (tuple, list)):
output_slice = [output_slice]
outputs.append(output_slice)
outputs = list(zip(*outputs))
if names is None:
names = [None] * len(outputs)
result = [tf.stack(o, axis=0, name=n)
for o, n in zip(outputs, names)]
if len(result) == 1:
result = result[0]
return result
def download_trained_weights(coco_model_path, verbose=1):
"""Download COCO trained weights from Releases.
coco_model_path: local path of COCO trained weights
"""
if verbose > 0:
print("Downloading pretrained model to " + coco_model_path + " ...")
with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
shutil.copyfileobj(resp, out)
if verbose > 0:
print("... done downloading pretrained model!")
def norm_boxes(boxes, shape):
"""Converts boxes from pixel coordinates to normalized coordinates.
boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
shape: [..., (height, width)] in pixels
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
coordinates it's inside the box.
Returns:
[N, (y1, x1, y2, x2)] in normalized coordinates
"""
h, w = shape
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
return np.divide((boxes - shift), scale).astype(np.float32)
def denorm_boxes(boxes, shape):
"""Converts boxes from normalized coordinates to pixel coordinates.
boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
shape: [..., (height, width)] in pixels
Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
coordinates it's inside the box.
Returns:
[N, (y1, x1, y2, x2)] in pixel coordinates
"""
h, w = shape
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
"""A wrapper for Scikit-Image resize().
Scikit-Image generates warnings on every call to resize() if it doesn't
receive the right parameters. The right parameters depend on the version
of skimage. This solves the problem by using different parameters per
version. And it provides a central place to control resizing defaults.
"""
if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
return skimage.transform.resize(
image, output_shape,
order=order, mode=mode, cval=cval, clip=clip,
preserve_range=preserve_range, anti_aliasing=anti_aliasing,
anti_aliasing_sigma=anti_aliasing_sigma)
else:
return skimage.transform.resize(
image, output_shape,
order=order, mode=mode, cval=cval, clip=clip,
preserve_range=preserve_range)