-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathRoiPooling.py
120 lines (94 loc) · 4.81 KB
/
RoiPooling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from keras.engine.topology import Layer
import keras.backend as K
class RoiPooling(Layer):
"""ROI pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_list: list of int
List of pooling regions to use. The length of the list is the number of pooling regions,
each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
num_rois: number of regions of interest to be used
# Input shape
list of two 4D tensors [X_img,X_roi] with shape:
X_img:
`(1, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(1, rows, cols, channels)` if dim_ordering='tf'.
X_roi:
`(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
# Output shape
3D tensor with shape:
`(1, num_rois, channels * sum([i * i for i in pool_list])`
"""
def __init__(self, pool_list, num_rois, **kwargs):
self.dim_ordering = K.image_dim_ordering()
assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
self.pool_list = pool_list
self.num_rois = num_rois
self.num_outputs_per_channel = sum([i * i for i in pool_list])
super(RoiPooling, self).__init__(**kwargs)
def build(self, input_shape):
if self.dim_ordering == 'th':
self.nb_channels = input_shape[0][1]
elif self.dim_ordering == 'tf':
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
return None, self.num_rois, self.nb_channels * self.num_outputs_per_channel
def get_config(self):
config = {'pool_list': self.pool_list, 'num_rois': self.num_rois}
base_config = super(RoiPooling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, x, mask=None):
assert (len(x) == 2)
img = x[0]
rois = x[1]
input_shape = K.shape(img)
outputs = []
for roi_idx in range(self.num_rois):
x = rois[0, roi_idx, 0]
y = rois[0, roi_idx, 1]
w = rois[0, roi_idx, 2]
h = rois[0, roi_idx, 3]
row_length = [w / i for i in self.pool_list]
col_length = [h / i for i in self.pool_list]
if self.dim_ordering == 'th':
for pool_num, num_pool_regions in enumerate(self.pool_list):
for ix in range(num_pool_regions):
for jy in range(num_pool_regions):
x1 = x + ix * col_length[pool_num]
x2 = x1 + col_length[pool_num]
y1 = y + jy * row_length[pool_num]
y2 = y1 + row_length[pool_num]
x1 = K.cast(K.round(x1), 'int32')
x2 = K.cast(K.round(x2), 'int32')
y1 = K.cast(K.round(y1), 'int32')
y2 = K.cast(K.round(y2), 'int32')
new_shape = [input_shape[0], input_shape[1],
y2 - y1, x2 - x1]
x_crop = img[:, :, y1:y2, x1:x2]
xm = K.reshape(x_crop, new_shape)
pooled_val = K.max(xm, axis=(2, 3))
outputs.append(pooled_val)
elif self.dim_ordering == 'tf':
for pool_num, num_pool_regions in enumerate(self.pool_list):
for ix in range(num_pool_regions):
for jy in range(num_pool_regions):
x1 = x + ix * col_length[pool_num]
x2 = x1 + col_length[pool_num]
y1 = y + jy * row_length[pool_num]
y2 = y1 + row_length[pool_num]
x1 = K.cast(K.round(x1), 'int32')
x2 = K.cast(K.round(x2), 'int32')
y1 = K.cast(K.round(y1), 'int32')
y2 = K.cast(K.round(y2), 'int32')
new_shape = [input_shape[0], y2 - y1,
x2 - x1, input_shape[3]]
x_crop = img[:, y1:y2, x1:x2, :]
xm = K.reshape(x_crop, new_shape)
pooled_val = K.max(xm, axis=(1, 2))
outputs.append(pooled_val)
final_output = K.concatenate(outputs, axis=0)
final_output = K.reshape(final_output, (1, self.num_rois, self.nb_channels * self.num_outputs_per_channel))
return final_output