-
Notifications
You must be signed in to change notification settings - Fork 17
/
data.py
197 lines (172 loc) · 9.14 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
import imageio
import skimage.transform
"""
This script generate the x_train, x_test, y_train, y_test for the further process from the FLIC dataset.
x_train, x_test is [n, 480, 720, 3], type = float32, pixels are from 0 to 1
y_train, y_test is [n, 60, 90, 10], type = float32
There're 3987 for training and 1016 for testing.
Note, that "torso-joint" is included as well. For the details why we need it, please read the original paper.
"""
def downsample_cube(myarr, factor, ignoredim=0):
"""
Downsample a 3D array by averaging over *factor* pixels on the last two
axes.
"""
if ignoredim > 0:
myarr = myarr.swapaxes(0, ignoredim)
zs, ys, xs = myarr.shape
crarr = myarr[:, :ys - (ys % int(factor)), :xs - (xs % int(factor))]
dsarr = np.mean(np.concatenate([[crarr[:, i::factor, j::factor]
for i in range(factor)]
for j in range(factor)]), axis=0)
if ignoredim > 0: dsarr = dsarr.swapaxes(0, ignoredim)
return dsarr
def flip_backward_poses(flic_coords):
"""
Flip left and right parts of backward facing people. It's used by Tompson et al. 2014 in their evaluation scripts.
Looks like a cheating, but they claim that other people also use (used) this scheme.
"""
hip_left, hip_right = dict['lhip'], dict['rhip']
# if backward facing pose according to hips
if flic_coords[:, hip_left][0] < flic_coords[:, hip_right][0]:
for joint_left, joint_right in zip(['lwri', 'lelb', 'lhip', 'lsho'], ['rwri', 'relb', 'rhip', 'rsho']):
joint_left, joint_right = dict[joint_left], dict[joint_right]
coords_left_joint = flic_coords[:, joint_left]
coords_right_joint = flic_coords[:, joint_right]
flic_coords[:, joint_left] = coords_right_joint
flic_coords[:, joint_right] = coords_left_joint
return flic_coords
def how_many_backward_poses():
"""
How many backward facing poses in the dataset.
"""
# left_id, right_id = dict['lwri'], dict['rwri']
left_id, right_id = dict['lhip'], dict['rhip']
s_frontal = 0
index = train_index
for i in index:
flic_coords = data_FLIC[i][2]
# flic_coords = flip_backward_poses(flic_coords)
coords_left = flic_coords[:, left_id] / 8
coords_right = flic_coords[:, right_id] / 8
s_frontal += coords_left[0] < coords_right[0]
# print(coords_left[0], coords_right[0])
print('frontal:', s_frontal, 'total:', len(index), 'fraction:', s_frontal / len(index))
def distances_hip_sho():
"""
Show the distribution of torso heights.
"""
index = train_index
distances = []
for i in index:
flic_coords = data_FLIC[i][2]
# flic_coords = flip_backward_poses(flic_coords)
rhip = flic_coords[:, dict['rhip']]
rsho = flic_coords[:, dict['rsho']]
dist = np.sum((rhip - rsho) ** 2) ** 0.5
distances.append(dist)
distances = np.array(distances)
print(np.min(distances), np.median(distances), np.max(distances))
plt.hist(distances)
if __name__ == '__main__':
# We tried a similar data preparation mentioned in "Learning human pose estimation features with convolutional
# networks" (ICLR 2014) It unifies the scale on the training data, which seems as a good idea for the
# scale-dependent spatial model.
# However, it does not lead to improvements. So our recommendation is to set iclr_data_preparation = False.
iclr_data_preparation = False
meta_info_file = 'data_FLIC.mat'
images_dir = './images_FLIC/'
data_FLIC = loadmat(meta_info_file)
data_FLIC = data_FLIC['examples'][0]
joint_ids = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip', 'rhip', 'nose'] # , 'leye', 'reye',
dict = {'lsho': 0, 'lelb': 1, 'lwri': 2, 'rsho': 3, 'relb': 4, 'rwri': 5, 'lhip': 6,
'lkne': 7, 'lank': 8, 'rhip': 9, 'rkne': 10, 'rank': 11, 'leye': 12, 'reye': 13,
'lear': 14, 'rear': 15, 'nose': 16, 'msho': 17, 'mhip': 18, 'mear': 19, 'mtorso': 20,
'mluarm': 21, 'mruarm': 22, 'mllarm': 23, 'mrlarm': 24, 'mluleg': 25, 'mruleg': 26,
'mllleg': 27, 'torso': 28}
is_train = [data_FLIC[i][7][0, 0] for i in range(len(data_FLIC))]
is_train = np.array(is_train)
train_index = list(np.where(is_train == 1))[0]
test_index = list(np.array(np.where(is_train == 0)))[0]
print('# train indices:', len(train_index), ' # test indices:', len(test_index))
# coefs = np.array([[1, 8, 28, 56, 70, 56, 28, 8, 1]], dtype=np.float32) / 256
# coefs = np.array([[1, 4, 6, 4, 1]], dtype=np.float32) / 16
coefs = np.array([[1, 2, 1]], dtype=np.float32) / 4 # maximizes performance
# coefs = np.array([[1]])
kernel = coefs.T @ coefs
temp = round((len(kernel) - 1) / 2)
pad = 5 # use padding to avoid the exceeding of the boundary
# This part is for x_train and x_test
orig_h, orig_w = 480, 720
x_train, x_test, y_train_hmap, y_test_hmap = [], [], [], []
for x, x_name, hmaps, hmaps_name, index in zip([x_train, x_test], ['x_train_flic', 'x_test_flic'], [y_train_hmap, y_test_hmap],
['y_train_flic', 'y_test_flic'], [train_index, test_index]):
for i in index:
flic_coords = data_FLIC[i][2]
flic_coords = flip_backward_poses(flic_coords)
img = imageio.imread(images_dir + data_FLIC[i][3][0])
# img = downsample_cube(img, 2, ignoredim=2) # the third dim
img = img.astype(np.float32)
img = img / 255
# if training set
if 'train' in x_name and iclr_data_preparation:
mode_height = 127.0 # roughly mode of the distribution of ||rhip - rsho||_2
# center of torso
center = (flic_coords[:, dict['lsho']] + flic_coords[:, dict['rhip']] + flic_coords[:, dict['rsho']] +
flic_coords[:, dict['lhip']]) / 4
center = (float(center[1]), float(center[0]))
cur_height = np.sum((flic_coords[:, dict['rhip']] - flic_coords[:, dict['rsho']]) ** 2) ** 0.5
scale = float(cur_height / mode_height)
h1 = (1 - scale) / 2 * orig_h
h2 = h1 + scale * orig_h
w1 = (1 - scale) / 2 * orig_w
w2 = w1 + scale * orig_w
diff = [center[0] - orig_h / 2,
center[1] - orig_w / 2] # diff between real center of human and center (480/2, 720/2)
h1, h2 = round(h1 + diff[0]), round(h2 + diff[0])
w1, w2 = round(w1 + diff[1]), round(w2 + diff[1])
# but some coords h1,h2,w1,w2 can be negative => we need padding
pad_h = max(0, -h1), max(h2 - orig_h, 0)
pad_w = max(0, -w1), max(w2 - orig_w, 0)
img_pad = np.pad(img, (pad_h, pad_w, (0, 0)), 'constant', constant_values=0)
print('Before padding:', img.shape, ' after padding:', img_pad.shape)
# changes are needed if we effectively changed our origin (after padding)
h1, h2 = h1 + pad_h[0], h2 + pad_h[0]
w1, w2 = w1 + pad_w[0], w2 + pad_w[0]
img_crop = img_pad[h1:h2, w1:w2]
img_final = skimage.transform.resize(img_crop, (orig_h, orig_w))
x.append(img_final)
else:
x.append(img)
hmap = []
torso = (flic_coords[:, dict['lsho']] + flic_coords[:, dict['rhip']] + flic_coords[:, dict['rsho']] +
flic_coords[:, dict['lhip']]) / 4
flic_coords[:, dict['torso']] = torso
for joint in joint_ids + ['torso']:
coords = np.copy(flic_coords[:, dict[joint]])
# there are some annotation that are outside of the image (annotators did a great job!)
coords[0], coords[1] = max(min(coords[1], orig_h), 0), max(min(coords[0], orig_w), 0)
# Now we need y coordinates also to match
if 'train' in x_name and iclr_data_preparation:
coords[0] = (coords[0] + pad_h[0] - h1) * img_final.shape[0] / img_crop.shape[0]
coords[1] = (coords[1] + pad_w[0] - w1) * img_final.shape[1] / img_crop.shape[1]
coords /= 8
heat_map = np.zeros([60, 90], dtype=np.float32)
heat_map = np.lib.pad(heat_map, ((pad, pad), (pad, pad)), 'constant', constant_values=0)
coords = coords + pad
h1_k, h2_k = int(coords[0] - temp), int(coords[0] + temp + 1)
w1_k, w2_k = int(coords[1] - temp), int(coords[1] + temp + 1)
heat_map[h1_k:h2_k, w1_k:w2_k] = kernel
heat_map = heat_map[pad:pad + 60, pad:pad + 90]
hmap.append(heat_map)
hmap = np.stack(hmap, axis=2)
hmaps.append(hmap)
x = np.array(x, dtype=np.float32)
np.save(x_name, x)
print('Saved:', x_name, x.shape)
hmaps = np.array(hmaps, dtype=np.float32)
np.save(hmaps_name, hmaps)
print('Saved:', hmaps_name, hmaps.shape)