-
Notifications
You must be signed in to change notification settings - Fork 0
/
guirun.py
367 lines (307 loc) · 16.6 KB
/
guirun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
from mvnc import mvncapi as mvnc
import sys
import numpy as np
import cv2
import time
from picamera.array import PiRGBArray
from picamera import PiCamera
import time
import os
import random
import math
# Assume running in examples/caffe/TinyYolo and graph file is in current directory.
#input_image_file= '../../data/images/nps_chair.png'
#input_image_file= './dog.jpg'
input_image_file = 'car.jpg'
#input_image_file = 'traffic.jpg'
tiny_yolo_graph_file= './graph'
# Tiny Yolo assumes input images are these dimensions.
NETWORK_IMAGE_WIDTH = 448
NETWORK_IMAGE_HEIGHT = 448
# Interpret the output from a single inference of TinyYolo (GetResult)
# and filter out objects/boxes with low probabilities.
# output is the array of floats returned from the API GetResult but converted
# to float32 format.
# input_image_width is the width of the input image
# input_image_height is the height of the input image
# Returns a list of lists. each of the inner lists represent one found object and contain
# the following 6 values:
# string that is network classification ie 'cat', or 'chair' etc
# float value for box center X pixel location within source image
# float value for box center Y pixel location within source image
# float value for box width in pixels within source image
# float value for box height in pixels within source image
# float value that is the probability for the network classification.
def filter_objects(inference_result, input_image_width, input_image_height):
# the raw number of floats returned from the inference (GetResult())
num_inference_results = len(inference_result)
network_classifications = ["miscellaneous", "bicycle", "miscellaneous", "miscellaneous", "miscellaneous", "bus", "car",
"miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "miscellaneous", "motorbike",
"person", "miscellaneous", "miscellaneous", "miscellaneous", "train","miscellaneous"]
# only keep boxes with probabilities greater than this
probability_threshold = 0.09
num_classifications = len(network_classifications) # should be 20
grid_size = 7 # the image is a 7x7 grid. Each box in the grid is 64x64 pixels
boxes_per_grid_cell = 2 # the number of boxes returned for each grid cell
# grid_size is 7 (grid is 7x7)
# num classifications is 20
# boxes per grid cell is 2
all_probabilities = np.zeros((grid_size, grid_size, boxes_per_grid_cell, num_classifications))
# classification_probabilities contains a probability for each classification for
# each 64x64 pixel square of the grid. The source image contains
# 7x7 of these 64x64 pixel squares and there are 20 possible classifications
classification_probabilities = \
np.reshape(inference_result[0:980], (grid_size, grid_size, num_classifications))
num_of_class_probs = len(classification_probabilities)
# The probability scale factor for each box
box_prob_scale_factor = np.reshape(inference_result[980:1078], (grid_size, grid_size, boxes_per_grid_cell))
# get the boxes from the results and adjust to be pixel units
all_boxes = np.reshape(inference_result[1078:], (grid_size, grid_size, boxes_per_grid_cell, 4))
boxes_to_pixel_units(all_boxes, input_image_width, input_image_height, grid_size)
# adjust the probabilities with the scaling factor
for box_index in range(boxes_per_grid_cell): # loop over boxes
for class_index in range(num_classifications): # loop over classifications
all_probabilities[:,:,box_index,class_index] = np.multiply(classification_probabilities[:,:,class_index],box_prob_scale_factor[:,:,box_index])
probability_threshold_mask = np.array(all_probabilities>=probability_threshold, dtype='bool')
box_threshold_mask = np.nonzero(probability_threshold_mask)
boxes_above_threshold = all_boxes[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]]
classifications_for_boxes_above = np.argmax(all_probabilities,axis=3)[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]]
probabilities_above_threshold = all_probabilities[probability_threshold_mask]
# sort the boxes from highest probability to lowest and then
# sort the probabilities and classifications to match
argsort = np.array(np.argsort(probabilities_above_threshold))[::-1]
boxes_above_threshold = boxes_above_threshold[argsort]
classifications_for_boxes_above = classifications_for_boxes_above[argsort]
probabilities_above_threshold = probabilities_above_threshold[argsort]
# get mask for boxes that seem to be the same object
duplicate_box_mask = get_duplicate_box_mask(boxes_above_threshold)
# update the boxes, probabilities and classifications removing duplicates.
boxes_above_threshold = boxes_above_threshold[duplicate_box_mask]
classifications_for_boxes_above = classifications_for_boxes_above[duplicate_box_mask]
probabilities_above_threshold = probabilities_above_threshold[duplicate_box_mask]
classes_boxes_and_probs = []
for i in range(len(boxes_above_threshold)):
classes_boxes_and_probs.append([network_classifications[classifications_for_boxes_above[i]],boxes_above_threshold[i][0],boxes_above_threshold[i][1],boxes_above_threshold[i][2],boxes_above_threshold[i][3],probabilities_above_threshold[i]])
return classes_boxes_and_probs
# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications
# that should be considered the same object. This is determined by how similar the boxes are
# based on the intersection-over-union metric.
# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width)
def get_duplicate_box_mask(box_list):
# The intersection-over-union threshold to use when determining duplicates.
# objects/boxes found that are over this threshold will be
# considered the same object
max_iou = 0.25
box_mask = np.ones(len(box_list))
for i in range(len(box_list)):
if box_mask[i] == 0: continue
for j in range(i + 1, len(box_list)):
if get_intersection_over_union(box_list[i], box_list[j]) > max_iou:
box_mask[j] = 0.0
filter_iou_mask = np.array(box_mask > 0.0, dtype='bool')
return filter_iou_mask
# Converts the boxes in box list to pixel units
# assumes box_list is the output from the box output from
# the tiny yolo network and is [grid_size x grid_size x 2 x 4].
def boxes_to_pixel_units(box_list, image_width, image_height, grid_size):
# number of boxes per grid cell
boxes_per_cell = 2
# setup some offset values to map boxes to pixels
# box_offset will be [[ [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6]] ...repeated for 7 ]
box_offset = np.transpose(np.reshape(np.array([np.arange(grid_size)]*(grid_size*2)),(boxes_per_cell,grid_size, grid_size)),(1,2,0))
# adjust the box center
box_list[:,:,:,0] += box_offset
box_list[:,:,:,1] += np.transpose(box_offset,(1,0,2))
box_list[:,:,:,0:2] = box_list[:,:,:,0:2] / (grid_size * 1.0)
# adjust the lengths and widths
box_list[:,:,:,2] = np.multiply(box_list[:,:,:,2],box_list[:,:,:,2])
box_list[:,:,:,3] = np.multiply(box_list[:,:,:,3],box_list[:,:,:,3])
#scale the boxes to the image size in pixels
box_list[:,:,:,0] *= image_width
box_list[:,:,:,1] *= image_height
box_list[:,:,:,2] *= image_width
box_list[:,:,:,3] *= image_height
# Evaluate the intersection-over-union for two boxes
# The intersection-over-union metric determines how close
# two boxes are to being the same box. The closer the boxes
# are to being the same, the closer the metric will be to 1.0
# box_1 and box_2 are arrays of 4 numbers which are the (x, y)
# points that define the center of the box and the length and width of
# the box.
# Returns the intersection-over-union (between 0.0 and 1.0)
# for the two boxes specified.
def get_intersection_over_union(box_1, box_2):
# one diminsion of the intersecting box
intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\
max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2])
# the other dimension of the intersecting box
intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\
max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3])
if intersection_dim_1 < 0 or intersection_dim_2 < 0 :
# no intersection area
intersection_area = 0
else :
# intersection area is product of intersection dimensions
intersection_area = intersection_dim_1*intersection_dim_2
# calculate the union area which is the area of each box added
# and then we need to subtract out the intersection area since
# it is counted twice (by definition it is in each box)
union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area;
# now we can return the intersection over union
iou = intersection_area / union_area
return iou
def display_objects_in_gui(source_image, filtered_objects,play):
# copy image so we can draw on it. Could just draw directly on source image if not concerned about that.
display_image = source_image.copy()
source_image_width = source_image.shape[1]
source_image_height = source_image.shape[0]
x_ratio = float(source_image_width) / NETWORK_IMAGE_WIDTH
y_ratio = float(source_image_height) / NETWORK_IMAGE_HEIGHT
possible_objects = ["bicycle", "bus", "car", "motorbike", "person", "train", "miscellaneous"]
num_objects = [0,0,0,0,0,0,0]
#op through each box and draw it on the image along with a classification label
print('Found this many objects in the image: ' + str(len(filtered_objects)))
for obj_index in range(len(filtered_objects)):
center_x = int(filtered_objects[obj_index][1] * x_ratio)
center_y = int(filtered_objects[obj_index][2] * y_ratio)
half_width = int(filtered_objects[obj_index][3] * x_ratio)//2
half_height = int(filtered_objects[obj_index][4] * y_ratio)//2
# calculate box (left, top) and (right, bottom) coordinates
box_left = max(center_x - half_width, 0)
box_top = max(center_y - half_height, 0)
box_right = min(center_x + half_width, source_image_width)
box_bottom = min(center_y + half_height, source_image_height)
print('box at index ' + str(obj_index) + ' is... left: ' + str(box_left) + ', top: ' + str(box_top) + ', right: ' + str(box_right) + ', bottom: ' + str(box_bottom))
#draw the rectangle on the image. This is hopefully around the object
box_color = (0, 255, 0) # green box
box_thickness = 2
cv2.rectangle(display_image, (box_left, box_top),(box_right, box_bottom), box_color, box_thickness)
# draw the classification label string just above and to the left of the rectangle
label_background_color = (70, 120, 70) # greyish green background for text
label_text_color = (255, 255, 255) # white text
#obj_index[0] = "unknown"
cv2.rectangle(display_image,(box_left, box_top-20),(box_right,box_top), label_background_color, -1)
cv2.putText(display_image,filtered_objects[obj_index][0] + ' : %.2f' % filtered_objects[obj_index][5], (box_left+5,box_top-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
num_objects[possible_objects.index(filtered_objects[obj_index][0])]+=1
dl_text, dl_value = calcDangerLevel(filtered_objects)
cv2.rectangle(display_image, (source_image_width - 250, 0), (source_image_width - 1, 40), dangerLevel(dl_text), -1)
cv2.putText(display_image, 'Danger Level: ' + dl_text, (source_image_width - 230, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0 ,0), 1)
for i in range(len(possible_objects)):
print("Found:", num_objects[i], "examples of a", possible_objects[i])
window_name = 'TinyYolo'
cv2.imshow(window_name, display_image)
#print(filtered_objects[obj_index][0])
raw_key = cv2.waitKey(1) & 0xFF
#clicked = cv2.setMouseCallback(window_name, clickEvent)
#if clicked == True:
#play = not(play)
#if play == False:
# display_objects_in_gui(source_image, filtered_objects,play)
if raw_key == ord('s'):
filename = 'yolo' + str(random.randint(0,20000)) + '.jpg'
cv2.imwrite(os.path.join('imgs4-29_2/', filename), display_image)
return raw_key, num_objects, dl_value
def dangerLevel(level):
switch = {
'Low': (0, 255, 0),
'Medium-Low': (0, 255, 255),
'Medium': (0, 165, 255),
'Medium-High': (0, 95, 255),
'High': (0, 0, 255)
}
return switch.get(level)
def calcDangerLevel(filtered_objects):
value = 0
num_obj = 0
possible_objects = {"bicycle": False, "bus": False, "car": False, "motorbike": False, "person" : False, "train" : False}
for obj_index in range(len(filtered_objects)):
item = filtered_objects[obj_index][0]
switcher = {
'bicycle': .6,
'bus': .7,
'car': .5,
'motorbike': .6,
'person': .4,
'train': .7,
'miscellaneous': .1
}
value = value + switcher.get(item)
possible_objects[item] = True
num_obj = num_obj + 1
if(num_obj == 0):
value = 0
else:
scaling_factor = sum(list(possible_objects.values()))**1.4
value = (scaling_factor * value)/1.4
print("Danger Value: ", value)
value = min(4.999, value)
text = ""
value_list = ['Low', 'Medium-Low', 'Medium', 'Medium-High', 'High']
value = int(math.floor(value))
text = value_list[value]
return text, value
# This function is called from the entry point to do
# all the work.
def main():
# Set logging level and initialize/open the first NCS we find
mvnc.SetGlobalOption(mvnc.GlobalOption.LOG_LEVEL, 0)
devices = mvnc.EnumerateDevices()
if len(devices) == 0:
print('No devices found')
return 1
device = mvnc.Device(devices[0])
device.OpenDevice()
#Load graph from disk and allocate graph via API
with open(tiny_yolo_graph_file, mode='rb') as f:
graph_from_disk = f.read()
graph = device.AllocateGraph(graph_from_disk)
# Read image from file, resize it to network width and height
# save a copy in display_image for display, then convert to float32, normalize (divide by 255),
# and finally convert to convert to float16 to pass to LoadTensor as input for an inference
camera = PiCamera()
rawCapture = PiRGBArray(camera)
time.sleep(0.1)
play = True
possible_objects = ["bicycle", "bus", "car", "motorbike", "person", "train", "miscellaneous"]
count = 0
total_objects = [0,0,0,0,0,0,0]
dl_total = 0
for frame in camera.capture_continuous(rawCapture, format = "bgr", use_video_port = True):
input_image = rawCapture.array
if play == True:
display_image = input_image
input_image = cv2.resize(input_image, (NETWORK_IMAGE_WIDTH, NETWORK_IMAGE_HEIGHT), cv2.INTER_LINEAR)
input_image = input_image.astype(np.float32)
input_image = np.divide(input_image, 255.0)
input_image = input_image[:, :, ::-1] # convert to RGB
t0 = time.time()
#load tensor and get result. This executes the inference on the NCS
graph.LoadTensor(input_image.astype(np.float16), 'user object')
output, userobj = graph.GetResult()
# filter out all the objects/boxes that don't meet thresholds
filtered_objs = filter_objects(output.astype(np.float32), input_image.shape[1], input_image.shape[0]) # fc27 instead of fc12 for yolo_small
print("Computation time:", time.time() - t0)
raw_key, num_objects, dl_value= display_objects_in_gui(display_image, filtered_objs,True)
rawCapture.truncate(0)
if raw_key == ord("p"):
play = not(play)
elif raw_key == ord("q"):
avg_objects = [n/count for n in total_objects]
value_list = ['Low', 'Medium-Low', 'Medium', 'Medium-High', 'High']
for i in range(len(avg_objects)):
print("Found an average of:", avg_objects[i], "examples of a", possible_objects[i])
av_dl = int(math.floor(dl_total/count))
print("Average danger level: " + value_list[av_dl])
break
else:
total_objects = [tot + new for tot, new in zip(total_objects, num_objects)]
dl_total += dl_value
count += 1
#Clean up
graph.DeallocateGraph()
device.CloseDevice()
print('Finished')
# main entry point for program. we'll call main() to do what needs to be done.
if __name__ == "__main__":
sys.exit(main())
e