-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
82 lines (63 loc) · 3.53 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
from utils import WeightReader, decode_netout, draw_boxes
import Model
import Web
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = ""
LABELS = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
IMAGE_H, IMAGE_W = 416, 416
GRID_H, GRID_W = 13 , 13 # grid size
BOX = 5
CLASS = len(LABELS)
CLASS_WEIGHTS = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD = 0.3#0.5
NMS_THRESHOLD = 0.3#0.45
ANCHORS = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
NO_OBJECT_SCALE = 1.0
OBJECT_SCALE = 5.0
COORD_SCALE = 1.0
CLASS_SCALE = 1.0
BATCH_SIZE = 16
WARM_UP_BATCHES = 0
TRUE_BOX_BUFFER = 50
wt_path = 'yolov2.weights'
train_image_folder = './COCOdataset/images/train2014/'
train_annot_folder = './voc/train/'
valid_image_folder = './COCOdataset/images/val2014/'
valid_annot_folder = './voc/test/'
wt_path = './pre-trained weights/yolo.weights'
weight_reader = WeightReader(wt_path)
model = Model.define_model(IMAGE_H,IMAGE_W,TRUE_BOX_BUFFER,BOX,CLASS,GRID_H, GRID_W)
weight_reader.reset()
nb_conv = 23
for i in range(1, nb_conv + 1):
conv_layer = model.get_layer('conv_' + str(i))
if i < nb_conv:
norm_layer = model.get_layer('norm_' + str(i))
size = np.prod(norm_layer.get_weights()[0].shape)
beta = weight_reader.read_bytes(size)
gamma = weight_reader.read_bytes(size)
mean = weight_reader.read_bytes(size)
var = weight_reader.read_bytes(size)
weights = norm_layer.set_weights([gamma, beta, mean, var])
if len(conv_layer.get_weights()) > 1:
bias = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2, 3, 1, 0])
conv_layer.set_weights([kernel, bias])
else:
kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
kernel = kernel.transpose([2, 3, 1, 0])
conv_layer.set_weights([kernel])
layer = model.layers[-4] # the last convolutional layer
weights = layer.get_weights()
new_kernel = np.random.normal(size=weights[0].shape)/(GRID_H*GRID_W)
new_bias = np.random.normal(size=weights[1].shape)/(GRID_H*GRID_W)
layer.set_weights([new_kernel, new_bias])
# perform detection on image
model.load_weights("./pre-trained weights/yolo.h5")
# perform web cam!
dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))
Web.main(model,dummy_array,decode_netout,NMS_THRESHOLD,ANCHORS,CLASS,LABELS,draw_boxes)