-
Notifications
You must be signed in to change notification settings - Fork 0
/
lane_detection.py
425 lines (351 loc) · 18.5 KB
/
lane_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
#<=============================== Imports ===================================>#
import cv2
import sys
import numpy as np
from os import system
from video_handler import VideoHandler
from sklearn.linear_model import RANSACRegressor
from sklearn.preprocessing import PolynomialFeatures
#<=============================== LaneDetectionHandler Class Definition ===================================>#
class LaneDetectionHandler(VideoHandler):
"""
Lane Detection Handler
Parent: Video Handler
args: (video name, output vid name, frame scale factor, cam cal matrix, cam distortion vector, lane output shape)
Handles all processes for the lane detection (e.g. frame preprocessing, etc.) except for computer vision pipeline.
"""
def __init__(self, video_name, video_out_name, scale_percent, K, D, lane_output_shape=None) -> None:
super().__init__(video_name, video_out_name, scale_percent=scale_percent)
self.K = K
self.D = D
h,w = super().get_img_shape()
if lane_output_shape == None:
self.lane_output_shape = (int(.5*w),int(2.5*h/2))
elif isinstance(lane_output_shape, tuple):
self.lane_output_shape = (int(lane_output_shape[0]*w),int(lane_output_shape[1]*h))
else:
self.lane_output_shape = lane_output_shape
self.ideal_cam_mat, _ = cv2.getOptimalNewCameraMatrix(self.K, self.D,(w,h), 1, (w,h))
# Calculates the homography matrix
def set_projective_transform(self, selected_corners, bounds_x, bounds_y):
h, w = self.lane_output_shape
bounds_x = (int(bounds_x[0]*w), int(bounds_x[1]*w))
bounds_y = (int(bounds_y[0]*h), int(bounds_y[1]*h))
selected_corners = np.float32(selected_corners)
birds_eye_corners = np.float32([[bounds_x[0],bounds_y[1]], [bounds_x[1], bounds_y[1]], [bounds_x[1], bounds_y[0]], [bounds_x[0], bounds_y[0]]])
self.M = cv2.getPerspectiveTransform(selected_corners, birds_eye_corners)
# Applies the homography matrix on a frame
def apply_projective_transform(self, img):
return cv2.warpPerspective(img, self.M, self.lane_output_shape)[::-1,:]
# Undistorts an image based on its distortion parameters
def undistort(self, img):
return cv2.undistort(img, self.K, self.D, None, self.ideal_cam_mat)
# Gets the nth frame
def get_frame_n(self, frame_num):
return self.undistort(super().get_frame_n(frame_num))
# Gets the next frame in the video buffer
def get_next_frame(self):
ret, frame = super().get_next_frame()
frame_cp = None
try:
frame = self.undistort(frame)
frame_cp = frame.copy()
frame = self.apply_projective_transform(frame)
# frame = self.resize_frame(frame, 50)
except TypeError:
pass
finally:
return ret, (frame, frame_cp, self.M)
# Runs program to select custom lane corners.
def get_lane_corners(self, scale=200, n=0):
print("Please select four corners for homography in the clockwise direction.")
print("Use W/A/S/D to move the circle and set the four corners.")
print("Use E/R to lower/upper the pixel movement per key press.")
print("Select V to set the image to a new image and x to reset selection.")
print("Lastly select Q to quit. If four corners are chosen, the program will print the selected corners.")
img = self.resize_frame(self.get_frame_n(n), scale)
h, w = img.shape[:2]
circle_x = w//2
circle_y = h//2
res = 5
corners = []
pt_scale = scale/100
try:
while True:
img_cp = img.copy()
if corners:
for coord in corners:
from_x, to_x = coord[0]-200, coord[0]+200
from_y, to_y = coord[1]-200, coord[1]+200
cv2.line(img_cp, (from_x, coord[1]), (to_x, coord[1]), (255,0,0), 1)
cv2.line(img_cp, (coord[0], from_y), (coord[0], to_y), (255,0,0), 1)
cv2.circle(img_cp, coord, 3, (0,255,0), -1)
cv2.circle(img_cp, (circle_x, circle_y), 1, (0,0,255), -1)
cv2.imshow("corner finder", img_cp)
key = cv2.waitKey(1) & 0xff
if key == ord('w'):
circle_y -= res
elif key == ord('s'):
circle_y += res
elif key == ord('a'):
circle_x -= res
elif key == ord('d'):
circle_x += res
elif key == ord('r'):
res += 1
elif key == ord('e'):
res -= 1
if res < 1:
res = 1
elif key == ord('v'):
n += 1
img = self.resize_frame(self.get_frame_n(n), scale)
elif key == ord('x'):
corners = []
elif key == 32:
corners.append((circle_x, circle_y))
circle_x = w//2
circle_y = h//2
res = 5
elif key == ord('q'):
break
if len(corners) != 4:
print("Please input exactly 4 points")
return
corners = [(int(round(corner[0]/pt_scale)), int(round(corner[1]/pt_scale))) for corner in corners]
print(f"The four corners in clockwise are: {corners}")
return corners
except KeyboardInterrupt:
return
#<=============================== LaneDetection Class Definition ===================================>#
class LaneDetection:
"""
Lane Detection Class
args(desired number of y-lines to take from histogram, color threshold, approximate middle of the lane, whether or not there's grass)
Computer vision/lane detection algorithm
"""
def __init__(self, desired_maxes = 50, threshold=220, mid_per=.6, grass=False) -> None:
self.approx_perp_dist = None
self.desired_maxes = desired_maxes
self.threshold = threshold
self.mid_per = mid_per
self.grass = grass
def detect_lanes(self, frame_info):
# Parse passed in parameters
M = frame_info[-1]
orig_frame = frame_info[1]
frame = frame_info[0]
h, w = frame.shape[:2]
# Convert img to HSV
frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# Threshold img to get lane candidates
frame_t_white = cv2.inRange(frame_hsv, (0, 0, self.threshold), (255, 255, 255))
frame_t = None
if not self.grass:
frame_t_yellow = cv2.inRange(frame_hsv, (10, 50, 50), (50,255,200))
frame_t = cv2.bitwise_or(frame_t_white, frame_t_yellow)
else:
frame_t = frame_t_white
# cv2.imshow("frmae", frame)
# cv2.imshow("thresh", frame_t)
# cv2.waitKey(0)
# Generate historgram of lane candidates among vertical lines
white_px = np.argwhere(frame_t)
unique, counts = np.unique(white_px[:,1], return_counts=True)
arg_maxes = np.argsort(-counts, 0)
# Select the desired_maxes vertical lines with the most lane candidates
sorted_white_px_bool = np.isin(white_px[:, 1], unique[arg_maxes[:self.desired_maxes]])
sorted_white_px = white_px[sorted_white_px_bool]
# Differentiate between left lane pixels and right lane pixels
right_sorted_white_px_bool = sorted_white_px[:,1] > self.mid_per*w
left_sorted_white_px_bool = np.invert(right_sorted_white_px_bool)
right_sorted_white_px = sorted_white_px[right_sorted_white_px_bool]
left_sorted_white_px = sorted_white_px[left_sorted_white_px_bool]
frame[right_sorted_white_px[:,0], right_sorted_white_px[:,1]] = [0,0,255]
frame[left_sorted_white_px[:,0], left_sorted_white_px[:,1]] = [255,0,0]
# Apply polynomial regression
poly_ft = PolynomialFeatures(2)
all_y = np.array(range(h))
X_all_y = poly_ft.fit_transform(all_y.reshape(-1,1))
right_avg_x = 5000
left_avg_x = 5000
try:
# Run parabolic RANSAC estimator on right side pixels
right_poly = poly_ft.fit_transform(right_sorted_white_px[:, 0].reshape(-1,1))
right_fit = RANSACRegressor(residual_threshold=5).fit(right_poly, right_sorted_white_px[:, 1])
right_predict = right_fit.predict(X_all_y)
right_predict = np.round(right_predict).astype(int, copy=False)
right_avg_x = np.mean(right_predict)
except ValueError:
pass
try:
# Run parabolic RANSAC estimator on left side pixels
left_poly = poly_ft.fit_transform(left_sorted_white_px[:, 0].reshape(-1,1))
left_fit = RANSACRegressor(residual_threshold=5).fit(left_poly, left_sorted_white_px[:, 1])
left_predict = left_fit.predict(X_all_y)
left_predict = np.round(left_predict).astype(int, copy=False)
left_avg_x = np.mean(left_predict)
except ValueError:
pass
# To ensure that car doesn't follow false lane estimation, threshold the distance between 2 lanes.
dist = abs(left_avg_x - right_avg_x)
if self.approx_perp_dist == None:
self.approx_perp_dist = dist
text = "Cannot Detect Lanes"
text_x = w//2-170
if abs(dist - self.approx_perp_dist) < 80:
left_worked = False
right_worked = False
# Color lane pixels
try:
frame[all_y, right_predict] = [0,0,255]
right_worked = True
except IndexError:
pass
try:
frame[all_y, left_predict] = [255,0,0]
left_worked = True
except IndexError:
pass
if left_worked and right_worked:
# Estimate lane center
left_right = np.vstack((left_predict, right_predict))
center_predict = np.mean(left_right, axis=0).astype(np.uint16, copy=False)
# Determine text
text_x = w//2-40
curve = int(center_predict[0]) - int(center_predict[h//2])
if abs(curve) < 20:
text = "Going straight"
elif curve > 0:
text = "Turning right"
else:
text = "Turning left"
# Backpropogation onto image
prev_arrow = False
arr_res = 10
inv_M = np.linalg.pinv(M)
start = 5
overlay = np.zeros_like(frame)
frame = cv2.UMat(frame)
partial_h = h//4
pts_ls = [[[left_predict[0], 0],
[left_predict[partial_h], partial_h],
[left_predict[h-1], h-1],
[right_predict[h-1], h-1],
[right_predict[partial_h], partial_h],
[right_predict[0], 0]]]
# Polygon
pts = np.array(pts_ls, dtype=np.int32)
cv2.fillPoly(overlay, np.array(pts_ls, dtype=np.int32), (255,255,0))
frame = cv2.addWeighted(overlay, .2, frame, 1 - .2, 0)
overlay2 = np.zeros_like(orig_frame)
pts = cv2.perspectiveTransform(np.array(pts_ls, dtype=np.float32), inv_M).astype(np.int32, copy=False)
cv2.fillPoly(overlay2, pts, (255,255,0))
orig_frame = cv2.addWeighted(overlay2, .2, orig_frame, 1 - .2, 0)
# Arrows
for arrow_ind in range(20):
if start + arr_res >= h:
break
if not prev_arrow:
cv2.arrowedLine(frame, (center_predict[start+arr_res], start+arr_res), (center_predict[start], start), (0,255,0), 5, tipLength=.5)
pts_c = cv2.perspectiveTransform(np.array([[[center_predict[start+arr_res], start+arr_res]],
[[center_predict[start], start]]], dtype=np.float32), inv_M)
pts_c = np.array(np.round(pts_c), dtype=int)
cv2.arrowedLine(orig_frame, tuple(np.round(pts_c[1][0])), tuple(np.round(pts_c[0][0])), (0,255,0), 5, tipLength=.5)
start += arr_res
arr_res += 10
else:
start += 10
prev_arrow = not prev_arrow
# Text
cv2.putText(orig_frame, text, (text_x, 150), cv2.FONT_HERSHEY_SIMPLEX, 3, (150, 0, 150), 2)
cv2.imshow("lane", frame)
return orig_frame
#<=============================== Main ===================================>#
if __name__ == "__main__":
# system('cls')
# Run easier video, vid1
if len(sys.argv) == 1:
print("Starting the vid1 lane detection.")
vid_name = 'data_1/data_vid1.avi'
video_out_name = 'data_vid1_lanes'
D = np.array([-3.639558e-01, 1.788651e-01, 6.029694e-04, -3.922424e-04, -5.382460e-02])
K = np.array([[9.037596e+02, 0.000000e+00, 6.957519e+02],
[0.000000e+00, 9.019653e+02, 2.242509e+02],
[0.000000e+00, 0.000000e+00, 1.000000e+00]])
selected_corners = [(647, 246), (692, 246), (798, 421), (388, 421)]
lane_output_shape = (.6, 2.5/2)
bounds_x = (.5, .95)
bounds_y = (0, 1.5)
desired_maxes = 80
threshold = 220
mid_per = .6
grass = True
lane_detector = LaneDetection(desired_maxes, threshold, mid_per, grass)
handle = LaneDetectionHandler(vid_name, video_out_name, 100, K, D, lane_output_shape=lane_output_shape)
handle.set_projective_transform(selected_corners, bounds_x, bounds_y)
def body(frame):
frame = lane_detector.detect_lanes(frame)
cv2.imshow("img", frame)
return frame
handle.run(body)
elif len(sys.argv) == 2:
# Run the define lane corners script
if sys.argv[-1] == "def_lane_corners":
vid_name = 'data_1/data_vid1.avi'
video_out_name = 'data_vid1_lanes'
D = np.array([-3.639558e-01, 1.788651e-01, 6.029694e-04, -3.922424e-04, -5.382460e-02])
K = np.array([[9.037596e+02, 0.000000e+00, 6.957519e+02],
[0.000000e+00, 9.019653e+02, 2.242509e+02],
[0.000000e+00, 0.000000e+00, 1.000000e+00]])
lane_output_shape = (.6, 2.5/2)
handle = LaneDetectionHandler(vid_name, video_out_name, 100, K, D, lane_output_shape=lane_output_shape)
# These two lines bring up a prompt that allows the user to pick coordinates for the corners
handle.get_lane_corners(scale=100)
# Note that these corners must be copy/pasted into the "selected_corners" var
# Generate the video from images
elif sys.argv[-1] == "gen_video":
print("Generate video from images.")
# This function takes all of the images and patches them into a video. It then saves the resulting video. Therefore,
# this function must only be run if the video has not been generated yet
vid_name = 'data_1/data_vid1.avi'
VideoHandler.gen_video_frm_imgs(vid_name, 'data_1/data', 10, 302, '.png', 10)
# Run the challenge video lane detection
else:
print("Starting the challenge video lane detection.")
# Harder video
vid_name = 'data_2/challenge_video.mp4'
video_out_name = 'challenge_video_lanes'
D = np.array([-2.42565104e-01, -4.77893070e-02, -1.31388084e-03, -8.79107779e-05, 2.20573263e-02])
K = np.array([[1.15422732e+03, 0.00000000e+00, 6.71627794e+02],
[0.00000000e+00, 1.14818221e+03, 3.86046312e+02],
[0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
selected_corners = [(657, 455), (713, 455), (963, 645), (381, 645)]
lane_output_shape = (.3, .6)
bounds_x = (.1, .50)
bounds_y = (0, 1)
desired_maxes = 140
threshold = 175
mid_per = .4
grass = False
lane_detector = LaneDetection(desired_maxes, threshold, mid_per, grass)
handle = LaneDetectionHandler(vid_name, video_out_name, 100, K, D, lane_output_shape=lane_output_shape)
handle.set_projective_transform(selected_corners, bounds_x, bounds_y)
def body(frame):
frame = lane_detector.detect_lanes(frame)
cv2.imshow("img", frame)
return frame
handle.run(body)
# Define challenge video lane corners
elif np.any(np.array(sys.argv) == 'challenge') and np.any(np.array(sys.argv) == 'def_lane_corners'):
vid_name = 'data_2/challenge_video.mp4'
video_out_name = 'challenge_video_lanes'
D = np.array([-2.42565104e-01, -4.77893070e-02, -1.31388084e-03, -8.79107779e-05, 2.20573263e-02])
K = np.array([[1.15422732e+03, 0.00000000e+00, 6.71627794e+02],
[0.00000000e+00, 1.14818221e+03, 3.86046312e+02],
[0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
lane_output_shape = (.3, .6)
handle = LaneDetectionHandler(vid_name, video_out_name, 100, K, D, lane_output_shape=lane_output_shape)
# These two lines bring up a prompt that allows the user to pick coordinates for the corners
handle.get_lane_corners(scale=100)
# Note that these corners must be copy/pasted into the "selected_corners" var