-
Notifications
You must be signed in to change notification settings - Fork 7
/
percent_matching.py
493 lines (390 loc) · 22.1 KB
/
percent_matching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
import time
import cv2
import numpy as np
# SmashScan Libraries
import position_tools
import timeline
import util
# An object that takes a capture and a number of input parameters and performs
# a number of template matching operations. Parameters include a frame_range
# for the range of frame numbers to be urveyed, gray_flag for a grayscale or
# BGR analysis, show_flag which displays results with cv2.imshow(), and
# wait_flag which waits between frames.
class PercentMatcher:
def __init__(self, capture, frame_range=None,
gray_flag=True, save_flag=False, show_flag=False, wait_flag=False):
self.capture = capture
self.gray_flag = gray_flag
self.save_flag = save_flag
self.show_flag = show_flag
# Predetermined parameters that have been tested to work best.
self.calib_w_range = (24, 30) # The possible template width values.
self.conf_thresh = 0.8 # The cv2 Template Matching conf thresh.
self.min_match_length_s = 30 # Minimum time of a "match" in seconds.
self.num_init_frames = 30 # # of frames to init. template size.
self.num_port_frames = 20 # # of frames to find port each match.
self.prec_step_size = 2 # Fnum step size during precision sweep.
self.max_prec_tl_gap_size = 4 # Max size of precise t.l. gaps to fill.
self.max_tl_gap_size = 4 # Max size of timeline gaps to fill.
self.roi_y_tolerance = 3 # The size to expand the ROI y-dimensons.
self.step_size = 60 # Frame number step size during sweep.
self.template_zero_radius = 2 # Size of match_mat subregion to zero.
# Paramaters that are redefined later on during initialization.
self.template_roi = None # A bounding box to search for templates.
# Set the start/stop frame to the full video if frame_range undefined.
if frame_range:
self.start_fnum, self.stop_fnum = frame_range
else:
self.start_fnum = 0
self.stop_fnum = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
# Set the wait_length for cv2.waitKey. 0 represents waiting, 1 = 1ms.
if wait_flag:
self.wait_length = 0
else:
self.wait_length = 1
# Read the percentage sign image file and extract a binary mask based
# off of the alpha channel. Also, resize to the 360p base height.
self.orig_pct_img, self.orig_pct_mask = util.get_image_and_mask(
"resources/pct.png", gray_flag)
self.pct_img = util.resize_img(self.orig_pct_img, 360/480)
self.pct_mask = util.resize_img(self.orig_pct_mask, 360/480)
#### PERCENT MATCHER TESTS #################################################
# 1. The PM Sweep Test iterates over the entire video, searching for four
# default sized percent sprites within each frame.
def sweep_test(self):
# Iterate through input video range. During each iteration, fetch the
# frame and obtain the percent template confidences and bounding boxes.
start_time = time.time()
for fnum in range(self.start_fnum, self.stop_fnum, self.step_size):
frame = util.get_frame(self.capture, fnum, self.gray_flag)
confidence_list, bbox_list = self.get_tm_results(frame, 4, 0)
# Display and save frame if the respective flags are enabled.
if self.show_flag:
label_list = ["{:0.3f}".format(i) for i in confidence_list]
label = " ".join(label_list)
util.show_frame(frame, bbox_list, label,
self.save_flag, "output/{:07d}.png".format(fnum))
if cv2.waitKey(self.wait_length) & 0xFF == ord('q'):
break
# Display the time taken to complete the test.
frame_count = (self.stop_fnum - self.start_fnum) // self.step_size
util.display_fps(start_time, frame_count, "Sweep")
# 2. The PM Calibrate Test iterates over the entire video, comparing a
# calibrated template size to the default template size. The "calibrated"
# template size is determined by resizing the template and dermining which
# resize-operation yields the highest confidence.
def calibrate_test(self):
# Iterate through input video range. During each iteration, fetch the
# frame and obtain the optimal calibrated template size.
start_time = time.time()
for fnum in range(self.start_fnum, self.stop_fnum, self.step_size):
frame = util.get_frame(self.capture, fnum, self.gray_flag)
bbox, opt_conf, opt_w, opt_h = self.get_calibrate_results(frame)
# Get the percent sign accuracy according to the default (480, 584)
# to (360, 640) rescale change from (24, 32) to (18, 24).
orig_conf_list, _ = self.get_tm_results(frame, 1, 0)
# Display frame with a confidence label if show_flag is enabled.
if self.show_flag:
label = "({}, {}) {:0.3f} -> {:0.3f}".format(
opt_w, opt_h, orig_conf_list[0], opt_conf)
util.show_frame(frame, [bbox], label,
self.save_flag, "output/{:07d}.png".format(fnum))
if cv2.waitKey(self.wait_length) & 0xFF == ord('q'):
break
# Display the time taken to complete the test.
frame_count = (self.stop_fnum - self.start_fnum) // self.step_size
util.display_fps(start_time, frame_count, "Calibrate")
# 3. The PM Initialize Test iterates over a number of random frames to find
# an expected template size and region of interest. The ROI is a horizontal
# of the frame based on the y-values that the templates were found.
def initialize_test(self):
# Generate random frames to search for a proper template size.
start_time, opt_w_list, bbox_list = time.time(), list(), list()
random_fnum_list = np.random.randint(low=self.start_fnum,
high=self.stop_fnum, size=self.num_init_frames)
# Iterate through input video range. During each iteration, fetch the
# frame and obtain the optimal calibrated template size.
print("(opt_w, opt_h), (bbox), random_fnum, opt_conf")
for random_fnum in random_fnum_list:
frame = util.get_frame(self.capture, random_fnum, self.gray_flag)
bbox, opt_conf, opt_w, opt_h = self.get_calibrate_results(frame)
# Store the template width if above a confidence threshold.
if opt_conf > self.conf_thresh:
opt_w_list.append(opt_w)
bbox_list.append(bbox)
print((opt_w, opt_h), bbox, random_fnum, opt_conf)
# Display frame with a confidence label if show_flag is enabled.
if self.show_flag:
orig_conf_list, _ = self.get_tm_results(frame, 1, 0)
label = "({}, {}) {:0.3f} -> {:0.3f}".format(
opt_w, opt_h, orig_conf_list[0], opt_conf)
util.show_frame(frame, [bbox], label,
self.save_flag, "output/{:07d}.png".format(random_fnum))
if cv2.waitKey(self.wait_length) & 0xFF == ord('q'):
break
# Display the optimal dims, ROI, and time taken to complete the test.
opt_w, opt_h = self.get_opt_template_dims(opt_w_list)
self.template_roi = self.get_opt_template_roi(bbox_list)
print("Optimal Template Size: ({}, {})".format(opt_w, opt_h))
print("Optimal ROI bbox: {}".format(self.template_roi))
util.display_fps(start_time, self.num_init_frames, "Initialize")
if self.show_flag:
util.show_frame(frame, [self.template_roi], wait_flag=True)
# 4. The PM timeline Test initializes the template scale, determines a
# rough estimate of the video timeline (when a percent sign is present),
# and then obtains a more precise estimate of the video timeline.
def timeline_test(self):
# Use a random number of frames to calibrate the percent template size.
start_time = time.time()
self.initialize_template_scale()
util.display_fps(start_time, self.num_init_frames, "Initialize")
# Iterate through the video to identify when percent is present.
start_time = time.time()
pct_timeline = self.get_pct_timeline()
frame_count = (self.stop_fnum - self.start_fnum) // self.step_size
util.display_fps(start_time, frame_count, "Initial Sweep")
# Fill holes in the history timeline list, and filter out timeline
# sections that are smaller than a particular size.
clean_timeline = timeline.fill_filter(pct_timeline,
self.max_tl_gap_size)
clean_timeline = timeline.size_filter(clean_timeline,
self.step_size, self.min_match_length_s)
if self.show_flag:
timeline.show_plots(pct_timeline, clean_timeline, ["pct found"])
# Display the frames associated with the calculated match ranges.
timeline_ranges = timeline.get_ranges(clean_timeline)
match_ranges = np.multiply(timeline_ranges, self.step_size)
if self.show_flag:
util.show_frames(self.capture, match_ranges.flatten())
# Display the frames associated with the precise match ranges.
start_time = time.time()
new_match_ranges = self.get_precise_match_ranges(match_ranges)
util.display_total_time(start_time, "Cleaning Sweep")
print("\tMatch Ranges: {:}".format(match_ranges.tolist()))
print("\tPrecise Match Ranges: {:}".format(new_match_ranges.tolist()))
if self.show_flag:
util.show_frames(self.capture, new_match_ranges.flatten())
#### PERCENT MATCHER SWEEP METHODS #########################################
# Given a frame, return a confidence list and bounding box list.
def get_tm_results(self, frame, num_results, conf_thresh=None):
# Only a specific subregion of the frame is analyzed. If the template
# ROI has been initialized, take that frame subregion. Otherwise, take
# the bottom quarter of the frame assuming a W-360p (640x360) format.
if self.template_roi:
frame = frame[self.template_roi[0][1]:self.template_roi[1][1], :]
else:
frame = frame[270:, :]
# Set the confidence threshold to the default, if none was input.
if conf_thresh is None:
conf_thresh = self.conf_thresh
# Match the template using a normalized cross-correlation method and
# retrieve the confidence and top-left points from the result.
match_mat = cv2.matchTemplate(frame, self.pct_img,
cv2.TM_CCORR_NORMED, mask=self.pct_mask)
conf_list, tl_list = self.get_match_results(
match_mat, num_results, conf_thresh)
# Compensate for point location for the used region of interest.
if self.template_roi:
for i, _ in enumerate(tl_list):
tl_list[i] = (tl_list[i][0],
tl_list[i][1] + self.template_roi[0][1])
else:
for i, _ in enumerate(tl_list):
tl_list[i] = (tl_list[i][0], tl_list[i][1] + 270)
# Create a list of bounding boxes (top-left & bottom-right points),
# using the input template_shape given as (width, height).
bbox_list = list()
h, w = self.pct_img.shape[:2]
for tl in tl_list:
br = (tl[0] + w, tl[1] + h)
bbox_list.append((tl, br))
return conf_list, bbox_list
# Take the result of cv2.matchTemplate, and find the most likely locations
# of a template match. To find multiple locations, the region around a
# successful match is zeroed. Return a list of confidences and locations.
def get_match_results(self, match_mat, num_results, conf_thresh):
max_val_list, top_left_list = list(), list()
match_mat_dims = match_mat.shape
# Find multiple max locations in the input matrix using cv2.minMaxLoc
# and then zeroing the surrounding region to find the next match.
for i in range(0, num_results):
_, max_val, _, top_left = cv2.minMaxLoc(match_mat)
set_subregion_to_zeros(match_mat, match_mat_dims,
top_left, radius=self.template_zero_radius)
max_val_list.append(max_val)
top_left_list.append(top_left)
# Remove results that do not meet the confidence threshold.
conf_list, tl_list = list(), list()
for i, conf in enumerate(max_val_list):
if conf > conf_thresh:
conf_list.append(conf)
tl_list.append(top_left_list[i])
return (conf_list, tl_list)
#### PERCENT MATCHER CALIBRATION METHODS ###################################
# Resize the original template a number of times to find the dimensions
# of the template that yield the highest (optimal) confidence. Return the
# bounding box, confidence value, and optimal template dimensions.
def get_calibrate_results(self, frame):
h, w = self.orig_pct_img.shape[:2]
opt_max_val, opt_top_left, opt_w, opt_h = 0, 0, 0, 0
# Assuming W-360p (640×360), only search the bottom of the frame.
frame = frame[270:, :]
# Iterate over a num. of widths, and rescale the img/mask accordingly.
for new_w in range(self.calib_w_range[0], self.calib_w_range[1]):
new_h = int(new_w * h / w)
pct_img = cv2.resize(self.orig_pct_img, (new_w, new_h))
pct_mask = cv2.resize(self.orig_pct_mask, (new_w, new_h))
# Calculate the confidence and location of the current rescale.
match_mat = cv2.matchTemplate(frame, pct_img,
cv2.TM_CCORR_NORMED, mask=pct_mask)
_, max_val, _, top_left = cv2.minMaxLoc(match_mat)
# Store the results if the confidence is larger than the previous.
if max_val > opt_max_val:
opt_max_val, opt_top_left = max_val, top_left
opt_w, opt_h = new_w, new_h
# Compensate for point location for the ROI that was used.
opt_top_left = (opt_top_left[0], opt_top_left[1] + 270)
# Format the bounding box and return.
bbox = (opt_top_left, (opt_top_left[0]+opt_w, opt_top_left[1]+opt_h))
return bbox, opt_max_val, opt_w, opt_h
# Given a list of expected widths, return the optimal dimensions of the
# template bounding box by calculating the median of the list.
def get_opt_template_dims(self, opt_w_list):
opt_w = int(np.median(opt_w_list))
h, w = self.orig_pct_img.shape[:2]
return (opt_w, round(h*opt_w/w))
# Given a list of expected bounding boxes, return the optimal region of
# interest bounding box, that covers a horizontal line over the entire 360p
# frame. The bounding box must not surpass the boundaries of the frame.
def get_opt_template_roi(self, bbox_list):
y_min_list, y_max_list = list(), list()
for bbox in bbox_list:
y_min_list.append(bbox[0][1])
y_max_list.append(bbox[1][1])
y_min = max(0, min(y_min_list) - self.roi_y_tolerance)
y_max = min(359, max(y_max_list) + self.roi_y_tolerance)
return ((0, y_min), (639, y_max))
#### PERCENT MATCHER INITIALIZE METHODS ####################################
# Selects a random number of frames to calibrate the percent template size.
def initialize_template_scale(self):
# Generate random frames to search for a proper template size.
random_fnum_list = np.random.randint(low=self.start_fnum,
high=self.stop_fnum, size=self.num_init_frames)
opt_w_list, bbox_list = list(), list()
# Iterate through input video range. During each iteration, fetch the
# frame and obtain the optimal calibrated template size.
for random_fnum in random_fnum_list:
frame = util.get_frame(self.capture, random_fnum, self.gray_flag)
bbox, opt_conf, opt_w, _ = self.get_calibrate_results(frame)
# Store template info if confidence above an input threshold.
if opt_conf > self.conf_thresh:
opt_w_list.append(opt_w)
bbox_list.append(bbox)
# Calculate the median of the optimal widths and rescale accordingly.
opt_w, opt_h = self.get_opt_template_dims(opt_w_list)
self.pct_img = cv2.resize(self.orig_pct_img, (opt_w, opt_h))
self.pct_mask = cv2.resize(self.orig_pct_mask, (opt_w, opt_h))
# Calculate the region of interest to search for the template.
self.template_roi = self.get_opt_template_roi(bbox_list)
#### PERCENT MATCHER TIMELINE METHODS ######################################
# Iterate through the video to identify when the percent sprite is present.
def get_pct_timeline(self):
pct_timeline = list()
for fnum in range(self.start_fnum, self.stop_fnum, self.step_size):
# Obtain the frame and get the template confidences and locations.
frame = util.get_frame(self.capture, fnum, self.gray_flag)
confidence_list, _ = self.get_tm_results(frame, 1)
# Append to the percent timeline according to if percent was found.
if confidence_list:
pct_timeline.append(0)
else:
pct_timeline.append(-1)
return pct_timeline
# Given an initial guess of match ranges, make a more precise estimate.
def get_precise_match_ranges(self, init_match_ranges):
# Iterate through the match ranges, going backwards if at the start
# of a match, and going forward if at the end of a match.
prec_match_ranges_flat = list()
init_match_ranges_flat = init_match_ranges.flatten()
for i, fnum_prediction in enumerate(init_match_ranges_flat):
fnum = fnum_prediction
if i % 2 == 0:
current_step_size = -self.prec_step_size
else:
current_step_size = self.prec_step_size
# Iterate through the video using fnum until no percent has been
# found for a specified number of frames.
while True:
frame = util.get_frame(self.capture, fnum, self.gray_flag)
confidence_list, _ = self.get_tm_results(frame, 1)
# Increment the precise counter if no pct was found.
if confidence_list:
prec_counter = 0
else:
prec_counter += 1
# Exit if there has been no percent found over multiple frames.
if prec_counter == self.max_prec_tl_gap_size:
prec_match_ranges_flat.append(
fnum - current_step_size*(prec_counter+1))
break
elif fnum == 0 or fnum >= self.stop_fnum - self.prec_step_size:
prec_match_ranges_flat.append(fnum)
break
fnum = fnum + current_step_size
# Return the match ranges as a list of pairs.
return np.reshape(prec_match_ranges_flat, (-1, 2))
#### PERCENT MATCHER EXTERNAL METHODS ######################################
# Run the timeline template test over a video range.
def get_match_ranges(self):
# Use a random number of frames to calibrate the percent template size.
self.initialize_template_scale()
# Iterate through the video to identify when percent is present.
pct_timeline = self.get_pct_timeline()
# Fill holes in the history timeline list, and filter out timeline
# sections that are smaller than a particular size.
clean_timeline = timeline.fill_filter(pct_timeline,
self.max_tl_gap_size)
clean_timeline = timeline.size_filter(clean_timeline,
self.step_size, self.min_match_length_s)
# Display the frames associated with the calculated match ranges.
timeline_ranges = timeline.get_ranges(clean_timeline)
match_ranges = np.multiply(timeline_ranges, self.step_size)
# Display the frames associated with the precise match ranges.
new_match_ranges = self.get_precise_match_ranges(match_ranges)
return new_match_ranges.tolist()
# Given a list of match ranges and bboxes, return the ports in use.
def get_match_ports(self, match_ranges, match_bboxes):
# Iterate over all matches, and generate random frame nums to check.
match_ports = list()
for i, match_range in enumerate(match_ranges):
random_fnum_list = np.random.randint(low=match_range[0],
high=match_range[1], size=self.num_port_frames)
# Iterate over the random frames, and store percent x-positions.
x_pos_list = list()
for fnum in random_fnum_list:
frame = util.get_frame(self.capture, fnum, self.gray_flag)
_, bbox_list = self.get_tm_results(frame, 4)
for bbox in bbox_list:
x_pos_list.append(bbox[0][0])
# Get the ports used for current match and append to total list.
port_pos_list = position_tools.get_port_pos_list(x_pos_list)
port_num_list = position_tools.get_port_num_list(
port_pos_list, match_bboxes[i])
match_ports.append(port_num_list)
return match_ports
#### Functions not inherent by PercentMatcher Object ##########################
# Take a matrix and coordinate, and zero the region around that coordinate.
# This also prevents matrix out of bound errors if the input coordinate is
# near the border. Also, the input coordinate is organized as (x, y). Matrices
# are pass by reference, so the input can be directly modified.
def set_subregion_to_zeros(input_mat, mat_dims, center_pt, radius):
# Set the top-left and bot-right points of the zeroed region. Note that
# mat_dims is organized as (height, width), and tl/br is (y, x).
tl = (max(center_pt[1]-radius, 0),
max(center_pt[0]-radius, 0))
br = (min(center_pt[1]+radius+1, mat_dims[0]),
min(center_pt[0]+radius+1, mat_dims[1]))
# Calculate the size of the region to be zeroed.
x_size = br[0] - tl[0]
y_size = br[1] - tl[1]
input_mat[tl[0]:br[0], tl[1]:br[1]] = np.zeros((x_size, y_size))