"""1Mask R-CNN2Base Configurations class.34Copyright (c) 2017 Matterport, Inc.5Licensed under the MIT License (see LICENSE for details)6Written by Waleed Abdulla7"""89import numpy as np101112# Base Configuration Class13# Don't use this class directly. Instead, sub-class it and override14# the configurations you need to change.1516class Config(object):17"""Base configuration class. For custom configurations, create a18sub-class that inherits from this one and override properties19that need to be changed.20"""21# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.22# Useful if your code needs to do things differently depending on which23# experiment is running.24NAME = None # Override in sub-classes2526# NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.27GPU_COUNT = 12829# Number of images to train with on each GPU. A 12GB GPU can typically30# handle 2 images of 1024x1024px.31# Adjust based on your GPU memory and image sizes. Use the highest32# number that your GPU can handle for best performance.33IMAGES_PER_GPU = 23435# Number of training steps per epoch36# This doesn't need to match the size of the training set. Tensorboard37# updates are saved at the end of each epoch, so setting this to a38# smaller number means getting more frequent TensorBoard updates.39# Validation stats are also calculated at each epoch end and they40# might take a while, so don't set this too small to avoid spending41# a lot of time on validation stats.42STEPS_PER_EPOCH = 10004344# Number of validation steps to run at the end of every training epoch.45# A bigger number improves accuracy of validation stats, but slows46# down the training.47VALIDATION_STEPS = 504849# Backbone network architecture50# Supported values are: resnet50, resnet101.51# You can also provide a callable that should have the signature52# of model.resnet_graph. If you do so, you need to supply a callable53# to COMPUTE_BACKBONE_SHAPE as well54BACKBONE = "resnet101"5556# Only useful if you supply a callable to BACKBONE. Should compute57# the shape of each layer of the FPN Pyramid.58# See model.compute_backbone_shapes59COMPUTE_BACKBONE_SHAPE = None6061# The strides of each layer of the FPN Pyramid. These values62# are based on a Resnet101 backbone.63BACKBONE_STRIDES = [4, 8, 16, 32, 64]6465# Size of the fully-connected layers in the classification graph66FPN_CLASSIF_FC_LAYERS_SIZE = 10246768# Size of the top-down layers used to build the feature pyramid69TOP_DOWN_PYRAMID_SIZE = 2567071# Number of classification classes (including background)72NUM_CLASSES = 1 # Override in sub-classes7374# Length of square anchor side in pixels75RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)7677# Ratios of anchors at each cell (width/height)78# A value of 1 represents a square anchor, and 0.5 is a wide anchor79RPN_ANCHOR_RATIOS = [0.5, 1, 2]8081# Anchor stride82# If 1 then anchors are created for each cell in the backbone feature map.83# If 2, then anchors are created for every other cell, and so on.84RPN_ANCHOR_STRIDE = 18586# Non-max suppression threshold to filter RPN proposals.87# You can increase this during training to generate more propsals.88RPN_NMS_THRESHOLD = 0.78990# How many anchors per image to use for RPN training91RPN_TRAIN_ANCHORS_PER_IMAGE = 2569293# ROIs kept after tf.nn.top_k and before non-maximum suppression94PRE_NMS_LIMIT = 60009596# ROIs kept after non-maximum suppression (training and inference)97POST_NMS_ROIS_TRAINING = 200098POST_NMS_ROIS_INFERENCE = 100099100# If enabled, resizes instance masks to a smaller size to reduce101# memory load. Recommended when using high-resolution images.102USE_MINI_MASK = True103MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask104105# Input image resizing106# Generally, use the "square" resizing mode for training and predicting107# and it should work well in most cases. In this mode, images are scaled108# up such that the small side is = IMAGE_MIN_DIM, but ensuring that the109# scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is110# padded with zeros to make it a square so multiple images can be put111# in one batch.112# Available resizing modes:113# none: No resizing or padding. Return the image unchanged.114# square: Resize and pad with zeros to get a square image115# of size [max_dim, max_dim].116# pad64: Pads width and height with zeros to make them multiples of 64.117# If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales118# up before padding. IMAGE_MAX_DIM is ignored in this mode.119# The multiple of 64 is needed to ensure smooth scaling of feature120# maps up and down the 6 levels of the FPN pyramid (2**6=64).121# crop: Picks random crops from the image. First, scales the image based122# on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of123# size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.124# IMAGE_MAX_DIM is not used in this mode.125IMAGE_RESIZE_MODE = "square"126IMAGE_MIN_DIM = 800127IMAGE_MAX_DIM = 1024128# Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further129# up scaling. For example, if set to 2 then images are scaled up to double130# the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.131# However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.132IMAGE_MIN_SCALE = 0133# Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4134# Changing this requires other changes in the code. See the WIKI for more135# details: https://github.com/matterport/Mask_RCNN/wiki136IMAGE_CHANNEL_COUNT = 3137138# Image mean (RGB)139MEAN_PIXEL = np.array([123.7, 116.8, 103.9])140141# Number of ROIs per image to feed to classifier/mask heads142# The Mask RCNN paper uses 512 but often the RPN doesn't generate143# enough positive proposals to fill this and keep a positive:negative144# ratio of 1:3. You can increase the number of proposals by adjusting145# the RPN NMS threshold.146TRAIN_ROIS_PER_IMAGE = 200147148# Percent of positive ROIs used to train classifier/mask heads149ROI_POSITIVE_RATIO = 0.33150151# Pooled ROIs152POOL_SIZE = 7153MASK_POOL_SIZE = 14154155# Shape of output mask156# To change this you also need to change the neural network mask branch157MASK_SHAPE = [28, 28]158159# Maximum number of ground truth instances to use in one image160MAX_GT_INSTANCES = 100161162# Bounding box refinement standard deviation for RPN and final detections.163RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])164BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])165166# Max number of final detections167DETECTION_MAX_INSTANCES = 100168169# Minimum probability value to accept a detected instance170# ROIs below this threshold are skipped171DETECTION_MIN_CONFIDENCE = 0.7172173# Non-maximum suppression threshold for detection174DETECTION_NMS_THRESHOLD = 0.3175176# Learning rate and momentum177# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes178# weights to explode. Likely due to differences in optimizer179# implementation.180LEARNING_RATE = 0.001181LEARNING_MOMENTUM = 0.9182183# Weight decay regularization184WEIGHT_DECAY = 0.0001185186# Loss weights for more precise optimization.187# Can be used for R-CNN training setup.188LOSS_WEIGHTS = {189"rpn_class_loss": 1.,190"rpn_bbox_loss": 1.,191"mrcnn_class_loss": 1.,192"mrcnn_bbox_loss": 1.,193"mrcnn_mask_loss": 1.194}195196# Use RPN ROIs or externally generated ROIs for training197# Keep this True for most situations. Set to False if you want to train198# the head branches on ROI generated by code rather than the ROIs from199# the RPN. For example, to debug the classifier head without having to200# train the RPN.201USE_RPN_ROIS = True202203# Train or freeze batch normalization layers204# None: Train BN layers. This is the normal mode205# False: Freeze BN layers. Good when using a small batch size206# True: (don't use). Set layer in training mode even when predicting207TRAIN_BN = False # Defaulting to False since batch size is often small208209# Gradient norm clipping210GRADIENT_CLIP_NORM = 5.0211212def __init__(self):213"""Set values of computed attributes."""214# Effective batch size215self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT216217# Input image size218if self.IMAGE_RESIZE_MODE == "crop":219self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,220self.IMAGE_CHANNEL_COUNT])221else:222self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,223self.IMAGE_CHANNEL_COUNT])224225# Image meta data length226# See compose_image_meta() for details227self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES228229def display(self):230"""Display Configuration values."""231print("\nConfigurations:")232for a in dir(self):233if not a.startswith("__") and not callable(getattr(self, a)):234print("{:30} {}".format(a, getattr(self, a)))235print("\n")236237238