-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdetectvideo.py
101 lines (84 loc) · 3.47 KB
/
detectvideo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import time
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from absl import app, flags, logging
from absl.flags import FLAGS
import core.utils as utils
from tensorflow.python.saved_model import tag_constants
from PIL import Image
import cv2
import numpy as np
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from core.config import cfg
flags.DEFINE_string('video', './data/road.mp4', 'path to input video')
flags.DEFINE_string('output', 'result.avi', 'path to output video')
def main(_argv):
# Setup configs
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config()
input_size = cfg.size
video_path = FLAGS.video
print("Video from: ", video_path )
vid = cv2.VideoCapture(video_path)
saved_model_loaded = tf.saved_model.load(cfg.weights, tags=[tag_constants.SERVING])
infer = saved_model_loaded.signatures['serving_default']
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*cfg.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
# Loop for applying object detection on every frame
frame_id = 0
while True:
return_value, frame = vid.read()
if return_value:
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
image = Image.fromarray(frame)
else:
if frame_id == vid.get(cv2.CAP_PROP_FRAME_COUNT):
print("Video processing complete")
break
raise ValueError("No image! Try with another video format")
frame_size = frame.shape[:2]
image_data = cv2.resize(frame, (input_size, input_size))
image_data = image_data / 255.
image_data = image_data[np.newaxis, ...].astype(np.float32)
prev_time = time.time()
# Load pre-trained weights and apply object detection
batch_data = tf.constant(image_data)
pred_bbox = infer(batch_data)
for key, value in pred_bbox.items():
boxes = value[:, :, 0:4]
pred_conf = value[:, :, 4:]
# Perform NMS after YOLOv4 inference
boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
scores=tf.reshape(
pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
max_output_size_per_class=50,
max_total_size=50,
iou_threshold = cfg.iou,
score_threshold = cfg.score
)
pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
image = utils.draw_bbox(frame, pred_bbox)
curr_time = time.time()
exec_time = curr_time - prev_time
result = np.asarray(image)
info = "time: %.2f ms" %(1000*exec_time)
print(info)
result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Output result
out.write(result)
frame_id += 1
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass