upload new code

xiaocheng2 · Jul 1, 2020 · e2d69b6 · e2d69b6
1 parent 2d9cc14
commit e2d69b6
Show file tree

Hide file tree

Showing 28 changed files with 3,085 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,103 @@
+# YoloVx(yolov5/yolov4/yolov3/yolo_tiny)
+
+## Introduction
+A tensorflow implementation of YOLOv5 inspired by [https://github.com/ultralytics/yolov5](https://github.com/ultralytics/yolov5).
+
+A tensorflow implementation of YOLOv4 inspired by [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet).
+
+Frame code from [https://github.com/YunYang1994/tensorflow-yolov3](https://github.com/YunYang1994/tensorflow-yolov3).
+
+Backbone: Darknet53; CSPDarknet53[[1]](https://arxiv.org/pdf/1911.11929.pdf), Mish[[2]](https://arxiv.org/abs/1908.08681); MobileNetV2
+
+Neck: SPP[[3]](https://arxiv.org/abs/1406.4729), PAN[[4]](https://arxiv.org/abs/1803.01534); 
+
+Head: YOLOv5/YOLOv4(Mish), YOLOv3(Leaky_ReLU)[[10]](https://arxiv.org/abs/1804.02767); 
+
+Loss: DIOU CIOU[[5]](https://arxiv.org/pdf/1911.08287v1.pdf), Focal_Loss[[6]](https://arxiv.org/abs/1708.02002);  Other: Label_Smoothing[[7]](https://arxiv.org/pdf/1906.02629.pdf);
+
+## Environment
+
+Python 3.6.8
+
+Tensorflow 1.13.1
+
+## Quick Start
+
+1. Download YOLOv5 weights from [yolov5.weights](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J).
+2. Download YOLOv4 weights from [yolov4.weights](https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT).
+2. Convert the Darknet YOLOv4 model to a tf model.
+3. Train Yolov5/Yolov4/Yolov3/Yolo_tiny.
+3. Run Yolov5/Yolov4/Yolov3/Yolo_tiny detection.
+
+### Convert weights
+
+Running from_darknet_weights_to_ckpt.py will get tf yolov4 weight file yolov4_coco.ckpt.
+
+```
+python scripts/from_darknet_weights_to_ckpt.py
+```
+
+Running ckpt2pb.py will get tf yolov4 weight file yolov4.pb.
+
+```
+python scripts/ckpt2pb.py
+```
+
+Or running from_darknet_weights_to_pb.py directly.
+
+```
+python scripts/from_darknet_weights_to_pb.py
+```
+
+### Train
+
+In core/config.py add your own path.
+
+usage: python train.py gpu_id net_type(yolov5/yolov4/yolov3/tiny)
+
+```
+python train.py 0 yolov5
+```
+
+### Usage
+
+Inference
+
+```
+python test.py
+```
+
+```
+python demo.py
+```
+
+## Reference
+
+[[1] Cross Stage Partial Network (CSPNet)](https://arxiv.org/pdf/1911.11929.pdf)
+
+[[2] A Self Regularized Non-Monotonic Neural Activation Function](https://arxiv.org/abs/1908.08681)
+
+[[3] Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition](https://arxiv.org/abs/1406.4729)
+
+[[4] Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534)
+
+[[5] Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression](https://arxiv.org/pdf/1911.08287v1.pdf)
+
+[[6] Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
+
+[[7] When Does Label Smoothing Help?](https://arxiv.org/pdf/1906.02629.pdf)
+
+[[8] Convolutional Block Attention Module](https://arxiv.org/abs/1807.06521)
+
+[[9] YOLOv4: Optimal Speed and Accuracy of Object Detection](https://arxiv.org/abs/2004.10934)
+
+[[10] YOLOv3: An Incremental Improvement](https://arxiv.org/abs/1804.02767)
+
+[[11] Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
+
+### Acknowledgment
+
+keras_yolov3 [https://github.com/qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3).
+
+keras_yolov4 [https://github.com/Ma-Dan/keras-yolo4](https://github.com/Ma-Dan/keras-yolo4).
 
diff --git a/core/__init__.py b/core/__init__.py
diff --git a/core/backbone.py b/core/backbone.py
@@ -0,0 +1,79 @@
+#! /usr/bin/env python
+# coding=utf-8
+import core.common as common
+import tensorflow as tf
+
+
+def darknet53(input_data, trainable):
+    with tf.variable_scope('darknet'):
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 3, 32), trainable=trainable, name='conv0')
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 32, 64), trainable=trainable, name='conv1', downsample=True)
+
+        for i in range(1):
+            input_data = common.residual_block(input_data, 64, 32, 64, trainable=trainable, name='residual%d' % (i + 0))
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 64, 128), trainable=trainable, name='conv4', downsample=True)
+        for i in range(2):
+            input_data = common.residual_block(input_data, 128, 64, 128, trainable=trainable, name='residual%d' % (i + 1))
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 128, 256), trainable=trainable, name='conv9', downsample=True)
+        for i in range(8):
+            input_data = common.residual_block(input_data, 256, 128, 256, trainable=trainable, name='residual%d' % (i + 3))
+
+        route_1 = input_data
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 256, 512), trainable=trainable, name='conv26', downsample=True)
+
+        for i in range(8):
+            input_data = common.residual_block(input_data, 512, 256, 512, trainable=trainable, name='residual%d' % (i + 11))
+
+        route_2 = input_data
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 512, 1024), trainable=trainable, name='conv43', downsample=True)
+
+        for i in range(4):
+            input_data = common.residual_block(input_data, 1024, 512, 1024, trainable=trainable, name='residual%d' % (i + 19))
+
+        return route_1, route_2, input_data
+
+
+def mobilenetv2(input_data, trainable):
+    with tf.variable_scope('mobilenetv2'):
+        #input_data = tf.reshape(input_data, [-1, 608, 608, 3]) # print layer's shape
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 3, 32), trainable=trainable, name='conv0', downsample=True)
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 32, 16), trainable=trainable, name='conv1', downsample=True)        
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 16, 24), trainable=trainable, name='conv2')
+
+        for i in range(1):
+            input_data = common.residual_block(input_data, 24, 24, 24, trainable=trainable, name='residual%d' % (i + 0))
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 24, 32), trainable=trainable, name='conv4', downsample=True)
+
+        for i in range(2):
+            input_data = common.residual_block(input_data, 32, 32, 32, trainable=trainable, name='residual%d' % (i + 1))
+
+        route_1 = input_data
+
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 32, 64), trainable=trainable, name='conv7', downsample=True)
+
+        for i in range(3):
+            input_data = common.residual_block(input_data, 64, 384, 64, trainable=trainable, name='residual%d' % (i + 3))
+
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 64, 96), trainable=trainable, name='conv11')
+
+        for i in range(2):
+            input_data = common.residual_block(input_data, 96, 576, 96, trainable=trainable, name='residual%d' % (i + 6))
+
+        route_2 = input_data
+
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 96, 160), trainable=trainable, name='conv14', downsample=True)
+
+        for i in range(2):
+            input_data = common.residual_block(input_data, 160, 160, 160, trainable=trainable, name='residual%d' % (i + 8))
+
+        input_data = common.convolutional(input_data, filters_shape=(3, 3, 160, 320), trainable=trainable, name='conv17')   
+
+        return route_1, route_2, input_data
+
diff --git a/core/common.py b/core/common.py
@@ -0,0 +1,60 @@
+#! /usr/bin/env python
+# coding=utf-8
+import tensorflow as tf
+
+
+def convolutional(input_data, filters_shape, trainable, name, downsample=False, activate=True, bn=True):
+    with tf.variable_scope(name):
+        if downsample:
+            pad_h, pad_w = (filters_shape[0] - 2) // 2 + 1, (filters_shape[1] - 2) // 2 + 1
+            paddings = tf.constant([[0, 0], [pad_h, pad_h], [pad_w, pad_w], [0, 0]])
+            input_data = tf.pad(input_data, paddings, 'CONSTANT')
+            strides = (1, 2, 2, 1)
+            padding = 'VALID'
+        else:
+            strides = (1, 1, 1, 1)
+            padding = "SAME"
+
+        weight = tf.get_variable(name='weight', dtype=tf.float32, trainable=True, shape=filters_shape, initializer=tf.random_normal_initializer(stddev=0.01))
+        conv = tf.nn.conv2d(input=input_data, filter=weight, strides=strides, padding=padding)
+
+        if bn:
+            conv = tf.layers.batch_normalization(conv, beta_initializer=tf.zeros_initializer(), gamma_initializer=tf.ones_initializer(),
+                                                 moving_mean_initializer=tf.zeros_initializer(), moving_variance_initializer=tf.ones_initializer(), training=trainable)
+        else:
+            bias = tf.get_variable(name='bias', shape=filters_shape[-1], trainable=True, dtype=tf.float32, initializer=tf.constant_initializer(0.0))
+            conv = tf.nn.bias_add(conv, bias)
+
+        if activate == True: 
+            conv = tf.nn.leaky_relu(conv, alpha=0.1)
+    return conv
+
+
+def residual_block(input_data, input_channel, filter_num1, filter_num2, trainable, name):
+    short_cut = input_data
+    with tf.variable_scope(name):
+        input_data = convolutional(input_data, filters_shape=(1, 1, input_channel, filter_num1), trainable=trainable, name='conv1')
+        input_data = convolutional(input_data, filters_shape=(3, 3, filter_num1, filter_num2), trainable=trainable, name='conv2')
+        residual_output = input_data + short_cut
+    return residual_output
+
+
+def route(name, previous_output, current_output):
+    with tf.variable_scope(name):
+        output = tf.concat([current_output, previous_output], axis=-1)
+    return output
+
+
+def upsample(input_data, name, method="deconv"):
+    assert method in ["resize", "deconv"]
+    if method == "resize":
+        with tf.variable_scope(name):
+            input_shape = tf.shape(input_data)
+            output = tf.image.resize_nearest_neighbor(input_data, (input_shape[1] * 2, input_shape[2] * 2))
+
+    if method == "deconv":
+        # replace resize_nearest_neighbor with conv2d_transpose To support TensorRT optimization
+        numm_filter = input_data.shape.as_list()[-1]
+        output = tf.layers.conv2d_transpose(input_data, numm_filter, kernel_size=2, padding='same', strides=(2, 2), 
+                                            kernel_initializer=tf.random_normal_initializer())
+    return output
diff --git a/core/config.py b/core/config.py
@@ -0,0 +1,58 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+from easydict import EasyDict as edict
+
+
+__C = edict()
+# Consumers can get config by: from config import cfg
+
+cfg = __C
+
+# YOLO options
+__C.YOLO = edict()
+
+
+# Set the class name
+__C.YOLO.NET_TYPE = 'darknet53' # 'darknet53' 'mobilenetv2'
+__C.YOLO.CLASSES = "./data/classes/coco.names"
+__C.YOLO.ANCHORS = "./data/anchors/coco_anchors.txt" # yolov3/5 : yolo_anchors.txt; yolov4 : yolov4_anchors.txt
+__C.YOLO.MOVING_AVE_DECAY = 0.9995
+__C.YOLO.STRIDES = [8, 16, 32]
+__C.YOLO.STRIDES_TINY = [16, 32]
+__C.YOLO.ANCHOR_PER_SCALE = 3
+__C.YOLO.IOU_LOSS_THRESH = 0.5
+__C.YOLO.UPSAMPLE_METHOD = "resize"
+__C.YOLO.ORIGINAL_WEIGHT = "./checkpoint/yolov3_coco.ckpt"
+__C.YOLO.DEMO_WEIGHT = "./checkpoint/yolov3_coco_demo.ckpt"
+
+
+# Train options
+__C.TRAIN = edict()
+
+__C.TRAIN.ANNOT_PATH = "./data/COCO/2017/train.txt"
+__C.TRAIN.BATCH_SIZE = 6
+__C.TRAIN.INPUT_SIZE = [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+__C.TRAIN.DATA_AUG = True
+__C.TRAIN.LEARN_RATE_INIT = 1e-4
+__C.TRAIN.LEARN_RATE_END = 1e-6
+__C.TRAIN.WARMUP_EPOCHS = 2
+__C.TRAIN.FISRT_STAGE_EPOCHS = 20
+__C.TRAIN.SECOND_STAGE_EPOCHS = 30
+__C.TRAIN.INITIAL_WEIGHT = "./weights/yolov4_coco.ckpt"
+__C.TRAIN.CKPT_PATH = "./checkpoint"
+
+
+# TEST options
+__C.TEST = edict()
+
+__C.TEST.ANNOT_PATH = "./data/dataset/voc_test.txt"
+__C.TEST.BATCH_SIZE = 2
+__C.TEST.INPUT_SIZE = 544
+__C.TEST.DATA_AUG = False
+__C.TEST.WRITE_IMAGE = True
+__C.TEST.WRITE_IMAGE_PATH = "./data/detection/"
+__C.TEST.WRITE_IMAGE_SHOW_LABEL = True
+__C.TEST.WEIGHT_FILE = "./checkpoint/yolov3_test_loss=9.2099.ckpt-5"
+__C.TEST.SHOW_LABEL = True
+__C.TEST.SCORE_THRESHOLD = 0.3
+__C.TEST.IOU_THRESHOLD = 0.45