Merge pull request #32 from pshivraj/master

Training Pipeline
havanagrawal · Mar 9, 2019 · 2060874 · 2060874
2 parents 00ecec8 + 9009c30
commit 2060874
Show file tree

Hide file tree

Showing 9 changed files with 5,458 additions and 0 deletions.
diff --git a/mrcnn/mask_data/id_map.json b/mrcnn/mask_data/id_map.json
@@ -0,0 +1,5 @@
+{
+    "1": "bottles",
+    "2": "boxes",
+    "3": "bags"
+}
diff --git a/mrcnn/scripts/Inference_notebook.ipynb b/mrcnn/scripts/Inference_notebook.ipynb
diff --git a/mrcnn/scripts/config.py b/mrcnn/scripts/config.py
@@ -0,0 +1,330 @@
+"""
+Mask R-CNN Base Configurations class.
+
+Copyright (c) 2017 Matterport, Inc.
+Licensed under the MIT License (see LICENSE for details)
+Written by Waleed Abdulla
+https://github.com/matterport/Mask_RCNN
+
+New classes by team clomask:
+ - ClomaskConfig()
+"""
+
+import os
+import sys
+import time
+import numpy as np
+import model as modellib
+import math
+import utils
+import cv2
+import pandas as pd
+from skimage.color import rgb2hed
+from skimage.exposure import rescale_intensity
+from scipy.ndimage.morphology import binary_fill_holes
+
+
+class Config(object):
+    """Base configuration class. For custom configurations, create a
+    sub-class that inherits from this one and override properties
+    that need to be changed.
+    """
+    # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
+    # Useful if your code needs to do things differently depending on which
+    # experiment is running.
+    NAME = None  # Override in sub-classes
+
+    # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
+    GPU_COUNT = 1
+
+    # Number of images to train with on each GPU. A 12GB GPU can typically
+    # handle 2 images of 1024x1024px.
+    # Adjust based on your GPU memory and image sizes. Use the highest
+    # number that your GPU can handle for best performance.
+    IMAGES_PER_GPU = 2
+
+    # Number of training steps per epoch
+    # This doesn't need to match the size of the training set. Tensorboard
+    # updates are saved at the end of each epoch, so setting this to a
+    # smaller number means getting more frequent TensorBoard updates.
+    # Validation stats are also calculated at each epoch end and they
+    # might take a while, so don't set this too small to avoid spending
+    # a lot of time on validation stats.
+    STEPS_PER_EPOCH = 1000
+
+    # Number of validation steps to run at the end of every training epoch.
+    # A bigger number improves accuracy of validation stats, but slows
+    # down the training.
+    VALIDATION_STEPS = 50
+
+    # Backbone network architecture
+    # Supported values are: resnet50, resnet101.
+    # You can also provide a callable that should have the signature
+    # of model.resnet_graph. If you do so, you need to supply a callable
+    # to COMPUTE_BACKBONE_SHAPE as well
+    BACKBONE = "resnet50"
+
+    # Only useful if you supply a callable to BACKBONE. Should compute
+    # the shape of each layer of the FPN Pyramid.
+    # See model.compute_backbone_shapes
+    COMPUTE_BACKBONE_SHAPE = None
+
+    # The strides of each layer of the FPN Pyramid. These values
+    # are based on a Resnet101 backbone.
+    BACKBONE_STRIDES = [4, 8, 16, 32, 64]
+
+    # Size of the fully-connected layers in the classification graph
+    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
+
+    # Size of the top-down layers used to build the feature pyramid
+    TOP_DOWN_PYRAMID_SIZE = 256
+
+    # Number of classification classes (including background)
+    NUM_CLASSES = 1  # Override in sub-classes
+
+    # Length of square anchor side in pixels
+    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
+
+    # Ratios of anchors at each cell (width/height)
+    # A value of 1 represents a square anchor, and 0.5 is a wide anchor
+    RPN_ANCHOR_RATIOS = [0.5, 1, 2]
+
+    # Anchor stride
+    # If 1 then anchors are created for each cell in the backbone feature map.
+    # If 2, then anchors are created for every other cell, and so on.
+    RPN_ANCHOR_STRIDE = 1
+
+    # Non-max suppression threshold to filter RPN proposals.
+    # You can increase this during training to generate more propsals.
+    RPN_NMS_THRESHOLD = 0.7
+
+    # How many anchors per image to use for RPN training
+    RPN_TRAIN_ANCHORS_PER_IMAGE = 256
+
+    # ROIs kept after tf.nn.top_k and before non-maximum suppression
+    PRE_NMS_LIMIT = 6000
+
+    # ROIs kept after non-maximum suppression (training and inference)
+    POST_NMS_ROIS_TRAINING = 2000
+    POST_NMS_ROIS_INFERENCE = 1000
+
+    # If enabled, resizes instance masks to a smaller size to reduce
+    # memory load. Recommended when using high-resolution images.
+    USE_MINI_MASK = True
+    MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
+
+    # Input image resizing
+    # Generally, use the "square" resizing mode for training and predicting
+    # and it should work well in most cases. In this mode, images are scaled
+    # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
+    # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
+    # padded with zeros to make it a square so multiple images can be put
+    # in one batch.
+    # Available resizing modes:
+    # none:   No resizing or padding. Return the image unchanged.
+    # square: Resize and pad with zeros to get a square image
+    #         of size [max_dim, max_dim].
+    # pad64:  Pads width and height with zeros to make them multiples of 64.
+    #         If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
+    #         up before padding. IMAGE_MAX_DIM is ignored in this mode.
+    #         The multiple of 64 is needed to ensure smooth scaling of feature
+    #         maps up and down the 6 levels of the FPN pyramid (2**6=64).
+    # crop:   Picks random crops from the image. First, scales the image based
+    #         on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
+    #         size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
+    #         IMAGE_MAX_DIM is not used in this mode.
+    IMAGE_RESIZE_MODE = "square"
+    IMAGE_MIN_DIM = 256
+    IMAGE_MAX_DIM = 256
+    # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
+    # up scaling. For example, if set to 2 then images are scaled up to double
+    # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
+    # Howver, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
+    IMAGE_MIN_SCALE = 0
+    # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
+    # Changing this requires other changes in the code. See the WIKI for more
+    # details: https://github.com/matterport/Mask_RCNN/wiki
+    IMAGE_CHANNEL_COUNT = 3
+
+    # Image mean (RGB)
+    MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
+
+    # Number of ROIs per image to feed to classifier/mask heads
+    # The Mask RCNN paper uses 512 but often the RPN doesn't generate
+    # enough positive proposals to fill this and keep a positive:negative
+    # ratio of 1:3. You can increase the number of proposals by adjusting
+    # the RPN NMS threshold.
+    TRAIN_ROIS_PER_IMAGE = 200
+
+    # Percent of positive ROIs used to train classifier/mask heads
+    ROI_POSITIVE_RATIO = 0.33
+
+    # Pooled ROIs
+    POOL_SIZE = 7
+    MASK_POOL_SIZE = 14
+
+    # Shape of output mask
+    # To change this you also need to change the neural network mask branch
+    MASK_SHAPE = [28, 28]
+
+    # Maximum number of ground truth instances to use in one image
+    MAX_GT_INSTANCES = 100
+
+    # Bounding box refinement standard deviation for RPN and final detections.
+    RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+    BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
+
+    # Max number of final detections
+    DETECTION_MAX_INSTANCES = 100
+
+    # Minimum probability value to accept a detected instance
+    # ROIs below this threshold are skipped
+    DETECTION_MIN_CONFIDENCE = 0.7
+
+    # Non-maximum suppression threshold for detection
+    DETECTION_NMS_THRESHOLD = 0.3
+
+    # Learning rate and momentum
+    # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
+    # weights to explode. Likely due to differences in optimizer
+    # implementation.
+    LEARNING_RATE = 0.001
+    LEARNING_MOMENTUM = 0.9
+
+    # Weight decay regularization
+    WEIGHT_DECAY = 0.0001
+
+    # Loss weights for more precise optimization.
+    # Can be used for R-CNN training setup.
+    LOSS_WEIGHTS = {
+        "rpn_class_loss": 20.0,
+        "rpn_bbox_loss": 1.0,
+        "mrcnn_class_loss": 10.0,
+        "mrcnn_bbox_loss": 1.0,
+        "mrcnn_mask_loss": 10.0
+    }
+
+    # Use RPN ROIs or externally generated ROIs for training
+    # Keep this True for most situations. Set to False if you want to train
+    # the head branches on ROI generated by code rather than the ROIs from
+    # the RPN. For example, to debug the classifier head without having to
+    # TRAIN the RPN.
+    USE_RPN_ROIS = True
+
+    # Train or freeze batch normalization layers
+    #     None: Train BN layers. This is the normal mode
+    #     False: Freeze BN layers. Good when using a small batch size
+    #     True: (don't use). Set layer in training mode even when predicting
+    TRAIN_BN = False  # Defaulting to False since batch size is often small
+
+    # Gradient norm clipping
+    GRADIENT_CLIP_NORM = 5.0
+
+    def __init__(self):
+        """Set values of computed attributes."""
+        # Effective batch size
+        self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
+
+        # Input image size
+        if self.IMAGE_RESIZE_MODE == "crop":
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+        else:
+            self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
+                self.IMAGE_CHANNEL_COUNT])
+
+        # Image meta data length
+        # See compose_image_meta() for details
+        self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
+
+    def display(self):
+        """Display Configuration values."""
+        print("\nConfigurations:")
+        for a in dir(self):
+            if not a.startswith("__") and not callable(getattr(self, a)):
+                print("{:30} {}".format(a, getattr(self, a)))
+        print("\n")
+
+
+class ClomaskConfig(Config):
+    """
+    Mask RCNN configuration for Clomask
+    """
+
+    # Give the configuration a recognizable name
+    NAME = "clomask"
+
+    # Image resize mode ['crop', 'square', 'pad64']
+    IMAGE_RESIZE_MODE = 'crop'
+
+    # Optimizer, default is 'SGD'
+    OPTIMIZER = 'ADAM'
+
+    # Train on 1 GPU and 2 images per GPU.
+    GPU_COUNT = 1
+    IMAGES_PER_GPU = 2
+
+    # Number of classes (including background)
+    NUM_CLASSES = 1 + 3  # background + bottles + candy_boxes + chips_bag
+
+    # Input image resing
+    # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
+    # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
+    # be satisfied together the IMAGE_MAX_DIM is enforced.
+    IMAGE_MIN_DIM = 512
+    IMAGE_MAX_DIM = 512
+
+    IMAGE_MIN_SCALE = 0
+
+    # Backbone encoder architecture
+    BACKBONE = 'resnet101'
+
+    # Using default anchors as object size is not too small.
+    RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
+
+    # How many anchors per image to use for RPN training
+    RPN_TRAIN_ANCHORS_PER_IMAGE = 320  #
+
+    # ROIs kept after non-maximum supression (training and inference)
+    POST_NMS_ROIS_TRAINING = 2048
+    POST_NMS_ROIS_INFERENCE = 2048
+    IMAGE_COLOR = 'RGB'
+
+    # Number of ROIs per image to feed to classifier/mask heads
+    TRAIN_ROIS_PER_IMAGE = 512
+
+    # Non-max suppression threshold to filter RPN proposals.
+    # Can be increased during training to generate more proposals.
+    RPN_NMS_THRESHOLD = 0.7
+    # Maximum number of ground truth instances to use in one image
+    # We set this to 300 as we have control over how many masks we have in an image.
+    MAX_GT_INSTANCES = 300
+
+    # Max number of final detections
+    DETECTION_MAX_INSTANCES = 300
+
+    # Minimum probability value to accept a detected instance
+    # ROIs below this threshold are skipped
+    DETECTION_MIN_CONFIDENCE = 0.85
+
+    # Non-maximum suppression threshold for detection
+    DETECTION_NMS_THRESHOLD = 0.3  # 0.3
+
+    # Threshold number for mask binarization, only used in inference mode
+    DETECTION_MASK_THRESHOLD = 0.35
+
+# Root directory of the project
+ROOT_DIR = '../mask_data/'
+
+# Directory to save logs and trained model weights for tensorboard visualization and prediction.
+MODEL_DIR = '../mask_data/logs'
+
+TRAIN_PATH = '../mask_data/train_image/'
+
+TEST_PATH = '../mask_data/test_image/'
+
+IMAGE_PATH = '/train_image/'
+
+MASK_PATH = '/train_mask/'
+
+COCO_PATH = '../mask_data/mask_rcnn_coco.h5'