forked from elijahcole/caltech-ee148-spring2020-hw01
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_predictions.py
224 lines (170 loc) · 6.93 KB
/
run_predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import os
import numpy as np
import json
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import cv2
import numpy as np
import tensorflow as tf
# In[1870]:
def visualize_bounding_boxes(I, bounding_boxes, bounding_box_labels=None):
'''
This function takes a numpy image arry <I> and a list of
<bounding_boxes> and displays I with bounding boxes.
Each element of <bounding boxes> is a 4-integer list
specifying the top left and bottom right corners of bounding
boxes contained in the image I.
'''
fig, ax = plt.subplots()
plt.imshow(I)
idx = 0
for [ul_x, ul_y, br_x, br_y] in bounding_boxes:
(x, y) = (ul_x, ul_y)
ax.plot(br_x, br_y, '.')
rect = matplotlib.patches.Rectangle((x, y), abs(ul_x - br_x), abs(br_y - ul_y),
linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
idx += 1
plt.show()
# In[1921]:
def detect_red_light(I):
'''
This function takes a numpy image array <I> and returns a list <bounding_boxes>.
The list <bounding_boxes> should have one element for each red light in the
image. Each element of <bounding_boxes> should itself be a list, containing
four integers that specify a bounding box: the row and column index of the
top left corner and the row and column index of the bottom right corner (in
that order). See the code below for an example.
Note that PIL loads images in RGB order, so:
I[:,:,0] is the red channel
I[:,:,1] is the green channel
I[:,:,2] is the blue channel
'''
bounding_boxes = [] # This should be a list of lists, each of length 4. See format example below.
'''
BEGIN YOUR CODE
'''
# Create traffic light kernels from images
kernel_path = 'redlight_kernel.png'
kernel = Image.open(kernel_path).convert(mode='RGB')
kernels = [kernel]
# Generate different sizes of kernels
new_kernels = []
for k in kernels:
new_kernels.append(k)
k_width, k_height = np.asarray(k).shape[0], np.asarray(k).shape[1]
for maxsize in [50, 30, 20]:
ratio = k_width/k_height
ratio2 = k_height / k_width
zoom_in_kernel = k.copy().resize((int(maxsize*ratio2), int(maxsize*ratio)))
new_kernels.append(zoom_in_kernel)
kernels = [np.asarray(k) for k in new_kernels]
# Specify threshold for acceptable red lights
thresholds = [0.2, 0.21, 0.2, 0.19]
# Filter image with each kernel
I_width, I_height = I.shape[1], I.shape[0]
bounding_boxes = []
r_vals = []
for idx in range(len(kernels)):
# Get kernel
k = kernels[idx]
k_height, k_width = k.shape[0], k.shape[1]
# Normalize kernel
k_normalized = normalize_image(k, I)
# Define stride length
stride = min(k_width, k_height)//5
# Filter 2D image
for ul_x in range(0, I_width-k_width, stride):
for ul_y in range(0, I_height-k_height, stride):
# Get patch from image
br_x, br_y = (ul_x + k_width), (ul_y + k_height)
I_patch = np.array(I[ul_y:br_y, ul_x:br_x])
# normalize
I_patch = normalize_image(I_patch, I)
if I_patch.shape == k.shape:
# Convolve patch with kernel
r = np.mean(np.abs(np.subtract(I_patch.flatten(), k_normalized.flatten())))
# Determine if patch passes threshold
if r < thresholds[idx]:
# Check that bounding box is not too close to other bounding box
flag = True
for [x1, y1, x2, y2] in bounding_boxes:
dist1 = np.sqrt((x1-ul_x)**2 + (y1-ul_y)**2)
dist2 = np.sqrt((x2-ul_x)**2 + (y2-ul_y)**2)
if dist1 < 100 or dist2 < 100:
flag = False
if flag:
r_vals.append(round(r, 5))
bounding_boxes.append([ul_x, ul_y, br_x, br_y])
'''
END YOUR CODE
'''
for i in range(len(bounding_boxes)):
assert len(bounding_boxes[i]) == 4
visualize_bounding_boxes(I, bounding_boxes, r_vals)
return bounding_boxes
# In[1909]:
def normalize_image(img, I):
'''
For an image patch or kernel, img, normalize it with respect to each color channel using
the mean of each color channel of the full image, I. Return the normalized img.
'''
img_normalized = img.copy().astype(np.float)
r, g, b = I[:, :, 0], I[:, :, 1], I[:, :, 2]
img_normalized[:, :, 0] = np.subtract(img_normalized[:, :, 0], np.mean(r)) / 255
img_normalized[:, :, 1] = np.subtract(img_normalized[:, :, 1], np.mean(g)) / 255
img_normalized[:, :, 2] = np.subtract(img_normalized[:, :, 2], np.mean(b)) / 255
return img_normalized
# In[1924]:
# Download data
data_path = '/Users/Johanna/Desktop/Computer Vision/hw01/RedLights2011_Medium/'
# get sorted list of files:
file_names = sorted(os.listdir(data_path))
# remove any non-JPEG files:
file_names = [f for f in file_names if '.jpg' in f]
# set a path for saving predictions:
preds_path = '../data/hw01_preds'
os.makedirs(preds_path,exist_ok=True) # create directory if needed
preds = {}
idx = 0
for i in range(len(file_names)):
print("Image: ", idx)
idx += 1
# read image using PIL:
I = Image.open(os.path.join(data_path,file_names[i]))
# convert to numpy array:
I = np.asarray(I)
preds[file_names[i]] = detect_red_light(I)
# save preds (overwrites any previous predictions!)
with open(os.path.join(preds_path,'preds.json'),'w') as f:
json.dump(preds,f)
# In[1928]:
'''Show easy or "good" image detection examples
This images have red traffic lights that are easy to detect, because (1) there is greater similarity
between the traffic lights and kernel, due to blue sky background, (2) the images do not contain
misleading objects, and (3) the traffic lights are large enough to provide enough pixel-wise
similarity with the kernel.
'''
easy_indices = [9, 7, 60, 105, 57]
for i in easy_indices:
I = Image.open(os.path.join(data_path,file_names[i]))
I = np.asarray(I)
detect_red_light(I)
# In[1929]:
'''
Show hard or "bad" image examples
This images are hard, because (1) there is a lack of contrast between the traffic light and
background, (2) the traffic lights are too far away (too small) to provid enough similarity
between the kernel and image pixels, and (3) there are similar objects that aren't traffic
lights in the image.
'''
hard_indices = [19, 20, 167, 191]
for i in hard_indices:
I = Image.open(os.path.join(data_path,file_names[i]))
I = np.asarray(I)
detect_red_light(I)
# In[ ]: