Skip to content

Commit fae597d

Browse files
author
dehobitto
committed
Done.
Fixed smoothing of faces (now it smoothes each face differently. Added fps count. Fixed text issues. A lot of refactoring. Added comments for newbies
1 parent 88b6a88 commit fae597d

File tree

3 files changed

+195
-59
lines changed

3 files changed

+195
-59
lines changed

Python/face_detector/README.md

Whitespace-only changes.

Python/face_detector/main.py

Lines changed: 193 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,216 @@
1-
import cv2 as cv
21
import sys
2+
import time
3+
from dataclasses import dataclass
4+
import numpy as np
5+
import cv2 as cv
6+
from numpy import ndarray
7+
8+
# === CONSTANTS ===
9+
CAMERA_SOURCE: int = 0
310

4-
def get_camera(camera_source_idx : int) -> cv.VideoCapture:
5-
cap = cv.VideoCapture(camera_source_idx)
11+
FD_MIN_NEIGHBORS: int = 7
12+
FD_SCALE_FACTOR: float = 1.2
613

7-
if not cap.isOpened():
14+
FACEBOX_COLOR: tuple[int, int, int] = (255, 0, 0)
15+
FACEBOX_THICKNESS: int = 2
16+
17+
TEXT_DEFAULT_SIZE = 25 # don't touch, corresponds to font width at scale=0TEXT_MARGIN: int = 5
18+
TEXT_COEFF: int = 200
19+
TEXT_MARGIN = 5
20+
TEXT_FONT = cv.FONT_HERSHEY_SIMPLEX
21+
22+
SMOOTH_ALPHA: float = 0.2
23+
SMOOTH_THRESHOLD: int = 2
24+
25+
MATCH_MAX_DISTANCE: int = 150
26+
27+
FPS_COLOR: tuple[int, int, int] = (0, 255, 0)
28+
FPS_FONT_SCALE: float = 0.8
29+
FPS_THICKNESS: int = 1
30+
FPS_POSITION: tuple[int, int] = (0, int(TEXT_DEFAULT_SIZE * FPS_FONT_SCALE)) # верхний левый угол
31+
FPS_FONT = cv.FONT_HERSHEY_SIMPLEX
32+
33+
# === DATA CLASS ===
34+
@dataclass
35+
class FaceBox:
36+
x: int = 0
37+
y: int = 0
38+
w: int = 0
39+
h: int = 0
40+
41+
def get_center(self) -> tuple[int, int]:
42+
return self.x + self.w // 2, self.y + self.h // 2
43+
44+
def get_vars(self) -> tuple[int, int, int, int]:
45+
return self.x, self.y, self.w, self.h
46+
47+
48+
# === FACE DETECTOR CLASS ===
49+
class FaceDetector:
50+
def __init__(self):
51+
"""
52+
face_cascade: HAAR cascade for frontal face detection
53+
tracked_faces: list of faces from previous frame to smooth positions
54+
"""
55+
self.face_cascade = cv.CascadeClassifier(
56+
cv.data.haarcascades + "haarcascade_frontalface_default.xml"
57+
)
58+
self.tracked_faces: list[FaceBox] = []
59+
60+
def detect_faces(self, frame: ndarray) -> ndarray:
61+
"""
62+
Detect faces, smooth their positions using EMA, and draw rectangles with labels.
63+
"""
64+
frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
65+
detected_raw = self.face_cascade.detectMultiScale(
66+
frame_gray, scaleFactor=FD_SCALE_FACTOR, minNeighbors=FD_MIN_NEIGHBORS
67+
)
68+
69+
if len(detected_raw) == 0:
70+
return frame
71+
72+
detected_faces = [FaceBox(x, y, w, h) for (x, y, w, h) in detected_raw]
73+
prev_tracked = self.tracked_faces.copy()
74+
render_faces: list[FaceBox] = []
75+
76+
# Match & smooth each detected face
77+
for det_face in detected_faces:
78+
smoothed_face = self._match_and_smooth_face(det_face, prev_tracked)
79+
render_faces.append(smoothed_face)
80+
81+
# Draw faces
82+
for face in render_faces:
83+
x, y, w, h = face.get_vars()
84+
frame = cv.rectangle(frame, (x, y), (x + w, y + h), FACEBOX_COLOR, FACEBOX_THICKNESS)
85+
frame = add_text(frame, "Face", (x + w + TEXT_MARGIN, y - TEXT_MARGIN), font_scale=h / TEXT_COEFF)
86+
87+
self.tracked_faces = render_faces
88+
return frame
89+
90+
def _match_and_smooth_face(self, det_face: FaceBox, prev_tracked: list[FaceBox]) -> FaceBox:
91+
"""
92+
Match a detected face with tracked faces from previous frame and smooth its position.
93+
If no match found, returns detected face as is.
94+
"""
95+
det_center = det_face.get_center()
96+
best_match_idx = -1
97+
best_match_dist = float('inf')
98+
99+
for i, tracked_face in enumerate(prev_tracked):
100+
tracked_center = tracked_face.get_center()
101+
dist = np.linalg.norm(np.array(det_center) - np.array(tracked_center))
102+
if dist < best_match_dist and dist < MATCH_MAX_DISTANCE:
103+
best_match_dist = dist
104+
best_match_idx = i
105+
106+
if best_match_idx != -1:
107+
matched_face = prev_tracked.pop(best_match_idx)
108+
sx, sy = self.smooth((matched_face.x, matched_face.y), (det_face.x, det_face.y))
109+
sw, sh = self.smooth((matched_face.w, matched_face.h), (det_face.w, det_face.h))
110+
return FaceBox(sx, sy, sw, sh)
111+
else:
112+
return det_face
113+
114+
115+
@staticmethod
116+
def smooth(
117+
last_pos: tuple[int, int],
118+
cur_pos: tuple[int, int],
119+
alpha: float = SMOOTH_ALPHA,
120+
threshold: int = SMOOTH_THRESHOLD
121+
) -> tuple[int, int]:
122+
"""
123+
Apply Exponential Moving Average (EMA) smoothing to positions.
124+
"""
125+
dx = cur_pos[0] - last_pos[0]
126+
dy = cur_pos[1] - last_pos[1]
127+
128+
if abs(dx) < threshold:
129+
cur_pos = (last_pos[0], cur_pos[1])
130+
if abs(dy) < threshold:
131+
cur_pos = (cur_pos[0], last_pos[1])
132+
133+
x = int(alpha * cur_pos[0] + (1 - alpha) * last_pos[0])
134+
y = int(alpha * cur_pos[1] + (1 - alpha) * last_pos[1])
135+
136+
return x, y
137+
138+
# === FPSCounter CLASS ===
139+
class FPSCounter:
140+
def __init__(self):
141+
self.start_time = time.time()
142+
self.frame_count = 0
143+
self.fps = 0
144+
145+
def update(self):
146+
self.frame_count += 1
147+
now = time.time()
148+
elapsed = now - self.start_time
149+
150+
if elapsed >= 1.0:
151+
self.fps = int(self.frame_count / elapsed)
152+
self.frame_count = 0
153+
self.start_time = now
154+
155+
return self.fps
156+
157+
158+
# === HELPERS ===
159+
def get_camera(idx: int = CAMERA_SOURCE) -> cv.VideoCapture:
160+
cam = cv.VideoCapture(idx)
161+
162+
if not cam.isOpened():
8163
print("Cannot open camera")
9164
sys.exit()
10165

11-
return cap
12-
13-
def process_frame(frame):
14-
detect_faces(frame)
166+
return cam
167+
168+
def add_text(
169+
frame: ndarray,
170+
text: str,
171+
point: tuple[int, int] = (0, 0),
172+
font_scale: float = 1,
173+
font_face = TEXT_FONT,
174+
color: tuple[int, int, int]=(255, 0, 0),
175+
thickness: int = 1
176+
) -> np.ndarray:
177+
frame = cv.putText(frame, text, point, font_face, font_scale, color, thickness, lineType=cv.LINE_AA)
15178
return frame
16179

17-
TEXT_MARGIN = 5
18-
last_face = (0, 0)
19-
last_face_w, last_face_h = (100, 100)
20-
21-
def smooth(last_pos, cur_pos, alpha=0.2, threshold=2):
22-
dx = cur_pos[0] - last_pos[0]
23-
dy = cur_pos[1] - last_pos[1]
24-
25-
if abs(dx) < threshold:
26-
cur_pos = (last_pos[0], cur_pos[1])
27-
if abs(dy) < threshold:
28-
cur_pos = (cur_pos[0], last_pos[1])
29-
30-
x = int(alpha * cur_pos[0] + (1 - alpha) * last_pos[0])
31-
y = int(alpha * cur_pos[1] + (1 - alpha) * last_pos[1])
32-
return x, y
33-
34-
35-
def detect_faces(frame):
36-
global last_face
37-
global last_face_w, last_face_h
38-
frame_gray = cv.cvtColor(src=frame, code=cv.COLOR_BGR2GRAY)
39-
faces = face_cascade.detectMultiScale(image=frame_gray, scaleFactor=1.2, minNeighbors=7)
40-
41-
for (x, y, w, h) in faces:
42-
x, y = smooth(last_face, (x,y))
43-
w, h = smooth((last_face_w, last_face_h), (w, h))
44-
frame = cv.rectangle(
45-
img=frame,
46-
pt1=(x, y), pt2=(x + w, y + h),
47-
color=(255, 0, 0), thickness=2)
48-
add_text(frame, "Face", (x + w + TEXT_MARGIN, y - TEXT_MARGIN), h / 200)
49-
last_face = (x, y)
50-
last_face_w, last_face_h = w, h
51-
52-
def add_text(frame, text, point, font_scale = 1, font_face = cv.FONT_HERSHEY_DUPLEX, color = (255, 0, 0), thickness = 1):
53-
cv.putText(frame, text, point, font_face, font_scale, color, thickness)
54-
return frame
55180

56-
# TODO: List all cameras let him choose
181+
# === MAIN ===
57182
def main():
58-
global face_cascade
59-
60-
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + "haarcascade_frontalface_default.xml")
61-
cap = get_camera(0)
183+
face_detector = FaceDetector()
184+
fps_counter = FPSCounter()
185+
cam = get_camera()
62186

63-
# Main loop
64187
while True:
65-
res, frame = cap.read()
66-
frame = process_frame(frame)
67-
188+
res, frame = cam.read()
68189
if not res:
69190
print("Cannot read frame")
70191
break
71192

72-
cv.imshow('frame', frame)
193+
fps = fps_counter.update()
194+
195+
# If we had added a few more processes to a frame (different features or recognitions),
196+
# I would have moved this to a new function called something like process_frame()
197+
frame = face_detector.detect_faces(frame)
198+
frame = add_text(
199+
frame=frame,
200+
text=f"FPS: {fps}",
201+
point=FPS_POSITION,
202+
color=FPS_COLOR,
203+
font_scale=FPS_FONT_SCALE,
204+
font_face=FPS_FONT,
205+
thickness=FPS_THICKNESS
206+
)
207+
208+
cv.imshow("Face detector", frame)
73209

74210
if cv.waitKey(1) & 0xFF == ord('q'):
75211
break
76212

77-
cap.release()
213+
cam.release()
78214
cv.destroyAllWindows()
79215

80216

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
numpy~=2.2.6
2-
opencv-python~=4.12.0.88
1+
opencv-python~=4.12.0.88
2+
numpy~=2.2.6

0 commit comments

Comments
 (0)