-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrealTime_detection.py
144 lines (111 loc) · 5.09 KB
/
realTime_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import cv2
from ultralytics import YOLO
import numpy as np
import albumentations as A
import time
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
# def to improve frame
def improve_frame(frame):
transform = A.Compose([
A.CLAHE(clip_limit=4.0, p=1),
])
filtered_frame = transform(image = frame)['image']
return filtered_frame
def identify_grid_loc(xmin, ymin, xmax, ymax, width, height):
grid_width = width//3
grid_height = height//3
# get Center grid value
center_x = (xmin + xmax) / 2
center_y = (ymin + ymax) /2
# Identifying Grid Loc
if center_x < grid_width:
col = 'Right'
elif center_x < grid_width * 2:
col = 'Center'
else:
col = 'Left'
if center_y < grid_height:
row = 'Top'
elif center_y < grid_height * 2:
row = 'Mid'
else:
row = 'Bottom'
return f"{row}-{col}"
def calculate_metrics(true_labels, pred_labels):
accuracy = accuracy_score(true_labels, pred_labels)
f1 = f1_score(true_labels, pred_labels, average='weighted')
precision = precision_score(true_labels, pred_labels, average='weighted')
recall = recall_score(true_labels, pred_labels, average='weighted')
return accuracy, f1, precision, recall
# Function for real-time detection
def run_real_time_detection():
# Load the trained model
model = YOLO("yolov8n.pt")
# Start video capture (0 for default webcam, or specify video file path)
cap = cv2.VideoCapture(0)
# Initialize Frame time
prev_frame_time = 0
new_frame_time = 0
while True:
# Capture frame-by-frame
ret, frame = cap.read()
if not ret:
break
height, width = frame.shape[:2]
new_frame_time = time.time()
# Improve Frame
improved_frame = improve_frame(frame) # type: ignore
# Perform detection
results = model(improved_frame)
# Calculate FPS
fps = 1/(new_frame_time - prev_frame_time)
prev_frame_time = new_frame_time
true_labels = []
pred_labels = []
# Extract bounding boxes, labels, and scores
for result in results:
boxes = result.boxes.xyxy.cpu().numpy() # Extract bounding boxes
scores = result.boxes.conf.cpu().numpy() # Extract confidence scores
labels = result.boxes.cls.cpu().numpy() # Extract class labels
# Combine all detections into a single list, and Sort by Confidence Score
detections = sorted(zip(boxes, scores, labels), key=lambda x: x[1], reverse=True)
if len(detections) > 4:
detections = detections[:3]
# Draw bounding boxes and labels
for box, score, label in detections[:3]:
if score >= 0.5: # Only consider detections with confidence >= 0.5
xmin, ymin, xmax, ymax = map(int, box)
label_name = model.names[int(label)]
grid_location = identify_grid_loc(xmin, ymin, xmax, ymax, width, height)
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
cv2.putText(frame, f"{label_name}: {score:.2f} ({grid_location})", (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)
# Append the true and predicted labels
true_labels.append(label_name)
pred_labels.append(label_name) # This should be the actual predicted label
# Print out Confidence Score and loc
print(f"{label_name}: {score:.2f}")
print(f"{label_name}: {grid_location}")
# Display Grid on Frame
gridColor = (0, 0, 255)
gridThickness = 2
for x in range(1, 3):
cv2.line(frame, (x*width // 3, 0), (x*width // 3, height), gridColor, gridThickness)
for y in range(1, 3):
cv2.line(frame, (0, y*height // 3), (width, y*height//3), gridColor, gridThickness)
# Display FPS on the frame?
print(f"\nFPS: {fps:.2f}")
cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# Display the resulting frame
cv2.imshow('Real-time YOLO Detection', frame)
# Break loop on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if true_labels and pred_labels:
accuracy, f1, precision, recall = calculate_metrics(true_labels, pred_labels)
print(f"Accuracy: {accuracy:.2f}, F1 Score: {f1:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}")
# Release the capture and close windows
cap.release()
cv2.destroyAllWindows()
# Run real-time detection
run_real_time_detection()