CoCalc -- config.py

GitHub Repository: matterport/Mask_RCNN
Path: blob/master/mrcnn/config.py
²³⁹ views
1
"""
2
Mask R-CNN
3
Base Configurations class.
4

5
Copyright (c) 2017 Matterport, Inc.
6
Licensed under the MIT License (see LICENSE for details)
7
Written by Waleed Abdulla
8
"""
9

10
import numpy as np
11

12

13
# Base Configuration Class
14
# Don't use this class directly. Instead, sub-class it and override
15
# the configurations you need to change.
16

17
class Config(object):
18
    """Base configuration class. For custom configurations, create a
19
    sub-class that inherits from this one and override properties
20
    that need to be changed.
21
    """
22
    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
23
    # Useful if your code needs to do things differently depending on which
24
    # experiment is running.
25
    NAME = None  # Override in sub-classes
26

27
    # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
28
    GPU_COUNT = 1
29

30
    # Number of images to train with on each GPU. A 12GB GPU can typically
31
    # handle 2 images of 1024x1024px.
32
    # Adjust based on your GPU memory and image sizes. Use the highest
33
    # number that your GPU can handle for best performance.
34
    IMAGES_PER_GPU = 2
35

36
    # Number of training steps per epoch
37
    # This doesn't need to match the size of the training set. Tensorboard
38
    # updates are saved at the end of each epoch, so setting this to a
39
    # smaller number means getting more frequent TensorBoard updates.
40
    # Validation stats are also calculated at each epoch end and they
41
    # might take a while, so don't set this too small to avoid spending
42
    # a lot of time on validation stats.
43
    STEPS_PER_EPOCH = 1000
44

45
    # Number of validation steps to run at the end of every training epoch.
46
    # A bigger number improves accuracy of validation stats, but slows
47
    # down the training.
48
    VALIDATION_STEPS = 50
49

50
    # Backbone network architecture
51
    # Supported values are: resnet50, resnet101.
52
    # You can also provide a callable that should have the signature
53
    # of model.resnet_graph. If you do so, you need to supply a callable
54
    # to COMPUTE_BACKBONE_SHAPE as well
55
    BACKBONE = "resnet101"
56

57
    # Only useful if you supply a callable to BACKBONE. Should compute
58
    # the shape of each layer of the FPN Pyramid.
59
    # See model.compute_backbone_shapes
60
    COMPUTE_BACKBONE_SHAPE = None
61

62
    # The strides of each layer of the FPN Pyramid. These values
63
    # are based on a Resnet101 backbone.
64
    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
65

66
    # Size of the fully-connected layers in the classification graph
67
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
68

69
    # Size of the top-down layers used to build the feature pyramid
70
    TOP_DOWN_PYRAMID_SIZE = 256
71

72
    # Number of classification classes (including background)
73
    NUM_CLASSES = 1  # Override in sub-classes
74

75
    # Length of square anchor side in pixels
76
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
77

78
    # Ratios of anchors at each cell (width/height)
79
    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
80
    RPN_ANCHOR_RATIOS = [0.5, 1, 2]
81

82
    # Anchor stride
83
    # If 1 then anchors are created for each cell in the backbone feature map.
84
    # If 2, then anchors are created for every other cell, and so on.
85
    RPN_ANCHOR_STRIDE = 1
86

87
    # Non-max suppression threshold to filter RPN proposals.
88
    # You can increase this during training to generate more propsals.
89
    RPN_NMS_THRESHOLD = 0.7
90

91
    # How many anchors per image to use for RPN training
92
    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
93
    
94
    # ROIs kept after tf.nn.top_k and before non-maximum suppression
95
    PRE_NMS_LIMIT = 6000
96

97
    # ROIs kept after non-maximum suppression (training and inference)
98
    POST_NMS_ROIS_TRAINING = 2000
99
    POST_NMS_ROIS_INFERENCE = 1000
100

101
    # If enabled, resizes instance masks to a smaller size to reduce
102
    # memory load. Recommended when using high-resolution images.
103
    USE_MINI_MASK = True
104
    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
105

106
    # Input image resizing
107
    # Generally, use the "square" resizing mode for training and predicting
108
    # and it should work well in most cases. In this mode, images are scaled
109
    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
110
    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
111
    # padded with zeros to make it a square so multiple images can be put
112
    # in one batch.
113
    # Available resizing modes:
114
    # none:   No resizing or padding. Return the image unchanged.
115
    # square: Resize and pad with zeros to get a square image
116
    #         of size [max_dim, max_dim].
117
    # pad64:  Pads width and height with zeros to make them multiples of 64.
118
    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
119
    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
120
    #         The multiple of 64 is needed to ensure smooth scaling of feature
121
    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
122
    # crop:   Picks random crops from the image. First, scales the image based
123
    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
124
    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
125
    #         IMAGE_MAX_DIM is not used in this mode.
126
    IMAGE_RESIZE_MODE = "square"
127
    IMAGE_MIN_DIM = 800
128
    IMAGE_MAX_DIM = 1024
129
    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
130
    # up scaling. For example, if set to 2 then images are scaled up to double
131
    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
132
    # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
133
    IMAGE_MIN_SCALE = 0
134
    # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
135
    # Changing this requires other changes in the code. See the WIKI for more
136
    # details: https://github.com/matterport/Mask_RCNN/wiki
137
    IMAGE_CHANNEL_COUNT = 3
138

139
    # Image mean (RGB)
140
    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
141

142
    # Number of ROIs per image to feed to classifier/mask heads
143
    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
144
    # enough positive proposals to fill this and keep a positive:negative
145
    # ratio of 1:3. You can increase the number of proposals by adjusting
146
    # the RPN NMS threshold.
147
    TRAIN_ROIS_PER_IMAGE = 200
148

149
    # Percent of positive ROIs used to train classifier/mask heads
150
    ROI_POSITIVE_RATIO = 0.33
151

152
    # Pooled ROIs
153
    POOL_SIZE = 7
154
    MASK_POOL_SIZE = 14
155

156
    # Shape of output mask
157
    # To change this you also need to change the neural network mask branch
158
    MASK_SHAPE = [28, 28]
159

160
    # Maximum number of ground truth instances to use in one image
161
    MAX_GT_INSTANCES = 100
162

163
    # Bounding box refinement standard deviation for RPN and final detections.
164
    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
165
    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
166

167
    # Max number of final detections
168
    DETECTION_MAX_INSTANCES = 100
169

170
    # Minimum probability value to accept a detected instance
171
    # ROIs below this threshold are skipped
172
    DETECTION_MIN_CONFIDENCE = 0.7
173

174
    # Non-maximum suppression threshold for detection
175
    DETECTION_NMS_THRESHOLD = 0.3
176

177
    # Learning rate and momentum
178
    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
179
    # weights to explode. Likely due to differences in optimizer
180
    # implementation.
181
    LEARNING_RATE = 0.001
182
    LEARNING_MOMENTUM = 0.9
183

184
    # Weight decay regularization
185
    WEIGHT_DECAY = 0.0001
186

187
    # Loss weights for more precise optimization.
188
    # Can be used for R-CNN training setup.
189
    LOSS_WEIGHTS = {
190
        "rpn_class_loss": 1.,
191
        "rpn_bbox_loss": 1.,
192
        "mrcnn_class_loss": 1.,
193
        "mrcnn_bbox_loss": 1.,
194
        "mrcnn_mask_loss": 1.
195
    }
196

197
    # Use RPN ROIs or externally generated ROIs for training
198
    # Keep this True for most situations. Set to False if you want to train
199
    # the head branches on ROI generated by code rather than the ROIs from
200
    # the RPN. For example, to debug the classifier head without having to
201
    # train the RPN.
202
    USE_RPN_ROIS = True
203

204
    # Train or freeze batch normalization layers
205
    #     None: Train BN layers. This is the normal mode
206
    #     False: Freeze BN layers. Good when using a small batch size
207
    #     True: (don't use). Set layer in training mode even when predicting
208
    TRAIN_BN = False  # Defaulting to False since batch size is often small
209

210
    # Gradient norm clipping
211
    GRADIENT_CLIP_NORM = 5.0
212

213
    def __init__(self):
214
        """Set values of computed attributes."""
215
        # Effective batch size
216
        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
217

218
        # Input image size
219
        if self.IMAGE_RESIZE_MODE == "crop":
220
            self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
221
                self.IMAGE_CHANNEL_COUNT])
222
        else:
223
            self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
224
                self.IMAGE_CHANNEL_COUNT])
225

226
        # Image meta data length
227
        # See compose_image_meta() for details
228
        self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
229

230
    def display(self):
231
        """Display Configuration values."""
232
        print("\nConfigurations:")
233
        for a in dir(self):
234
            if not a.startswith("__") and not callable(getattr(self, a)):
235
                print("{:30} {}".format(a, getattr(self, a)))
236
        print("\n")
237

238
Product

Resources

Company