"""
Mask R-CNN
Configurations and data loading code for MS COCO.
Copyright (c) 2017 Matterport, Inc.
Licensed under the MIT License (see LICENSE for details)
Written by Waleed Abdulla
------------------------------------------------------------
Usage: import the module (see Jupyter notebooks for examples), or run from
the command line as such:
# Train a new model starting from pre-trained COCO weights
python3 coco.py train --dataset=/path/to/coco/ --model=coco
# Train a new model starting from ImageNet weights. Also auto download COCO dataset
python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True
# Continue training a model that you had trained earlier
python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
# Continue training the last model you trained
python3 coco.py train --dataset=/path/to/coco/ --model=last
# Run COCO evaluatoin on the last model you trained
python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
"""
import os
import sys
import time
import numpy as np
import imgaug
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils
import zipfile
import urllib.request
import shutil
ROOT_DIR = os.path.abspath("../../")
sys.path.append(ROOT_DIR)
from mrcnn.config import Config
from mrcnn import model as modellib, utils
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
DEFAULT_DATASET_YEAR = "2014"
class CocoConfig(Config):
"""Configuration for training on MS COCO.
Derives from the base Config class and overrides values specific
to the COCO dataset.
"""
NAME = "coco"
IMAGES_PER_GPU = 2
NUM_CLASSES = 1 + 80
class CocoDataset(utils.Dataset):
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
class_map=None, return_coco=False, auto_download=False):
"""Load a subset of the COCO dataset.
dataset_dir: The root directory of the COCO dataset.
subset: What to load (train, val, minival, valminusminival)
year: What dataset year to load (2014, 2017) as a string, not an integer
class_ids: If provided, only loads images that have the given classes.
class_map: TODO: Not implemented yet. Supports maping classes from
different datasets to the same class ID.
return_coco: If True, returns the COCO object.
auto_download: Automatically download and unzip MS-COCO images and annotations
"""
if auto_download is True:
self.auto_download(dataset_dir, subset, year)
coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
if subset == "minival" or subset == "valminusminival":
subset = "val"
image_dir = "{}/{}{}".format(dataset_dir, subset, year)
if not class_ids:
class_ids = sorted(coco.getCatIds())
if class_ids:
image_ids = []
for id in class_ids:
image_ids.extend(list(coco.getImgIds(catIds=[id])))
image_ids = list(set(image_ids))
else:
image_ids = list(coco.imgs.keys())
for i in class_ids:
self.add_class("coco", i, coco.loadCats(i)[0]["name"])
for i in image_ids:
self.add_image(
"coco", image_id=i,
path=os.path.join(image_dir, coco.imgs[i]['file_name']),
width=coco.imgs[i]["width"],
height=coco.imgs[i]["height"],
annotations=coco.loadAnns(coco.getAnnIds(
imgIds=[i], catIds=class_ids, iscrowd=None)))
if return_coco:
return coco
def auto_download(self, dataDir, dataType, dataYear):
"""Download the COCO dataset/annotations if requested.
dataDir: The root directory of the COCO dataset.
dataType: What to load (train, val, minival, valminusminival)
dataYear: What dataset year to load (2014, 2017) as a string, not an integer
Note:
For 2014, use "train", "val", "minival", or "valminusminival"
For 2017, only "train" and "val" annotations are available
"""
if dataType == "minival" or dataType == "valminusminival":
imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
else:
imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
if not os.path.exists(dataDir):
os.makedirs(dataDir)
if not os.path.exists(imgDir):
os.makedirs(imgDir)
print("Downloading images to " + imgZipFile + " ...")
with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
shutil.copyfileobj(resp, out)
print("... done downloading.")
print("Unzipping " + imgZipFile)
with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
zip_ref.extractall(dataDir)
print("... done unzipping")
print("Will use images in " + imgDir)
annDir = "{}/annotations".format(dataDir)
if dataType == "minival":
annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
annFile = "{}/instances_minival2014.json".format(annDir)
annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
unZipDir = annDir
elif dataType == "valminusminival":
annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
annFile = "{}/instances_valminusminival2014.json".format(annDir)
annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
unZipDir = annDir
else:
annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
unZipDir = dataDir
if not os.path.exists(annDir):
os.makedirs(annDir)
if not os.path.exists(annFile):
if not os.path.exists(annZipFile):
print("Downloading zipped annotations to " + annZipFile + " ...")
with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
shutil.copyfileobj(resp, out)
print("... done downloading.")
print("Unzipping " + annZipFile)
with zipfile.ZipFile(annZipFile, "r") as zip_ref:
zip_ref.extractall(unZipDir)
print("... done unzipping")
print("Will use annotations in " + annFile)
def load_mask(self, image_id):
"""Load instance masks for the given image.
Different datasets use different ways to store masks. This
function converts the different mask format to one format
in the form of a bitmap [height, width, instances].
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
if image_info["source"] != "coco":
return super(CocoDataset, self).load_mask(image_id)
instance_masks = []
class_ids = []
annotations = self.image_info[image_id]["annotations"]
for annotation in annotations:
class_id = self.map_source_class_id(
"coco.{}".format(annotation['category_id']))
if class_id:
m = self.annToMask(annotation, image_info["height"],
image_info["width"])
if m.max() < 1:
continue
if annotation['iscrowd']:
class_id *= -1
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
instance_masks.append(m)
class_ids.append(class_id)
if class_ids:
mask = np.stack(instance_masks, axis=2).astype(np.bool)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
else:
return super(CocoDataset, self).load_mask(image_id)
def image_reference(self, image_id):
"""Return a link to the image in the COCO Website."""
info = self.image_info[image_id]
if info["source"] == "coco":
return "http://cocodataset.org/#explore?id={}".format(info["id"])
else:
super(CocoDataset, self).image_reference(image_id)
def annToRLE(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE to RLE.
:return: binary mask (numpy 2D array)
"""
segm = ann['segmentation']
if isinstance(segm, list):
rles = maskUtils.frPyObjects(segm, height, width)
rle = maskUtils.merge(rles)
elif isinstance(segm['counts'], list):
rle = maskUtils.frPyObjects(segm, height, width)
else:
rle = ann['segmentation']
return rle
def annToMask(self, ann, height, width):
"""
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
:return: binary mask (numpy 2D array)
"""
rle = self.annToRLE(ann, height, width)
m = maskUtils.decode(rle)
return m
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
"""Arrange resutls to match COCO specs in http://cocodataset.org/#format
"""
if rois is None:
return []
results = []
for image_id in image_ids:
for i in range(rois.shape[0]):
class_id = class_ids[i]
score = scores[i]
bbox = np.around(rois[i], 1)
mask = masks[:, :, i]
result = {
"image_id": image_id,
"category_id": dataset.get_source_class_id(class_id, "coco"),
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
"score": score,
"segmentation": maskUtils.encode(np.asfortranarray(mask))
}
results.append(result)
return results
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
"""Runs official COCO evaluation.
dataset: A Dataset object with valiadtion data
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
limit: if not 0, it's the number of images to use for evaluation
"""
image_ids = image_ids or dataset.image_ids
if limit:
image_ids = image_ids[:limit]
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
t_prediction = 0
t_start = time.time()
results = []
for i, image_id in enumerate(image_ids):
image = dataset.load_image(image_id)
t = time.time()
r = model.detect([image], verbose=0)[0]
t_prediction += (time.time() - t)
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
r["rois"], r["class_ids"],
r["scores"],
r["masks"].astype(np.uint8))
results.extend(image_results)
coco_results = coco.loadRes(results)
cocoEval = COCOeval(coco, coco_results, eval_type)
cocoEval.params.imgIds = coco_image_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
print("Prediction time: {}. Average {}/image".format(
t_prediction, t_prediction / len(image_ids)))
print("Total time: ", time.time() - t_start)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description='Train Mask R-CNN on MS COCO.')
parser.add_argument("command",
metavar="<command>",
help="'train' or 'evaluate' on MS COCO")
parser.add_argument('--dataset', required=True,
metavar="/path/to/coco/",
help='Directory of the MS-COCO dataset')
parser.add_argument('--year', required=False,
default=DEFAULT_DATASET_YEAR,
metavar="<year>",
help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
parser.add_argument('--model', required=True,
metavar="/path/to/weights.h5",
help="Path to weights .h5 file or 'coco'")
parser.add_argument('--logs', required=False,
default=DEFAULT_LOGS_DIR,
metavar="/path/to/logs/",
help='Logs and checkpoints directory (default=logs/)')
parser.add_argument('--limit', required=False,
default=500,
metavar="<image count>",
help='Images to use for evaluation (default=500)')
parser.add_argument('--download', required=False,
default=False,
metavar="<True|False>",
help='Automatically download and unzip MS-COCO files (default=False)',
type=bool)
args = parser.parse_args()
print("Command: ", args.command)
print("Model: ", args.model)
print("Dataset: ", args.dataset)
print("Year: ", args.year)
print("Logs: ", args.logs)
print("Auto Download: ", args.download)
if args.command == "train":
config = CocoConfig()
else:
class InferenceConfig(CocoConfig):
GPU_COUNT = 1
IMAGES_PER_GPU = 1
DETECTION_MIN_CONFIDENCE = 0
config = InferenceConfig()
config.display()
if args.command == "train":
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=args.logs)
else:
model = modellib.MaskRCNN(mode="inference", config=config,
model_dir=args.logs)
if args.model.lower() == "coco":
model_path = COCO_MODEL_PATH
elif args.model.lower() == "last":
model_path = model.find_last()
elif args.model.lower() == "imagenet":
model_path = model.get_imagenet_weights()
else:
model_path = args.model
print("Loading weights ", model_path)
model.load_weights(model_path, by_name=True)
if args.command == "train":
dataset_train = CocoDataset()
dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
if args.year in '2014':
dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
dataset_train.prepare()
dataset_val = CocoDataset()
val_type = "val" if args.year in '2017' else "minival"
dataset_val.load_coco(args.dataset, val_type, year=args.year, auto_download=args.download)
dataset_val.prepare()
augmentation = imgaug.augmenters.Fliplr(0.5)
print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=40,
layers='heads',
augmentation=augmentation)
print("Fine tune Resnet stage 4 and up")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=120,
layers='4+',
augmentation=augmentation)
print("Fine tune all layers")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=160,
layers='all',
augmentation=augmentation)
elif args.command == "evaluate":
dataset_val = CocoDataset()
val_type = "val" if args.year in '2017' else "minival"
coco = dataset_val.load_coco(args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download)
dataset_val.prepare()
print("Running COCO evaluation on {} images.".format(args.limit))
evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
else:
print("'{}' is not recognized. "
"Use 'train' or 'evaluate'".format(args.command))