-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCV_Final.py
409 lines (338 loc) · 14.9 KB
/
CV_Final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
'''
Final Demo Scipt for joint and seam detection for autonomous welding
Using OpenCV for inference
'''
#------------------------------------------------------------------
# Import required modules
import cv2
import time
import sys
import numpy as np
import cv2 # state of the art computer vision algorithms library
import numpy as np # fundamental package for scientific computing
import matplotlib.pyplot as plt # 2D plotting library producing publication quality figures
import pyrealsense2 as rs # Intel RealSense cross-platform open-source API
import paho.mqtt.client as mqtt
import sys
import argparse
import time
import math
import scipy.interpolate
from scipy.spatial import distance
#import open3d as o3d
#------------------------------------------------------------------
# Set some constants
# HSV threshold values
dark_hsv= np.array([0.302*180,0.164*255,.1*255])
light_hsv= np.array([.487*180,1*255,1.00*255])
# colors for showing the bounding boxes
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]
# Number of points for interpolation
NUM_POINTS= 40
# Model input images need to be 640 x 640
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
# NMS and thersholds for Yolo output (OpenCV version)
SCORE_THRESHOLD = 0.2
NMS_THRESHOLD = 0.4
CONFIDENCE_THRESHOLD = 0.8 #only detections with confidence > this are valid
#------------------------------------------------------------------
# Set some flags
calib_depth = False # set true if testing with depth stuff
calib_pointCloud = False # set true if testing with point clouds
# set Cuda flag and get model
is_cuda = True
# True for camera feed, false for bag file
LIVE_FEED=True
#------------------------------------------------------------------
# MQTT Setup
# replace localhost with ip of Arm nano for demo
mqttBroker ="10.155.61.199"
client = mqtt.Client("Label Detection")
client.connect(mqttBroker)
#------------------------------------------------------------------
# if running on a pre-recorded rosbag:
if not LIVE_FEED:
pipeline = rs.pipeline()
cfg = rs.config()
#change to ROS bag to use as needed
cfg.enable_device_from_file("/home/learning/librealsense-jupyter/object_detect.bag")
profile = pipeline.start(cfg)
# Live feed from camera set-up
elif LIVE_FEED:
pipeline = rs.pipeline()
config = rs.config()
# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))
found_rgb = False
for s in device.sensors:
if s.get_info(rs.camera_info.name) == 'RGB Camera':
found_rgb = True
s.set_option(rs.option.exposure, 150)
break
if not found_rgb:
print("The demo requires Depth camera with Color sensor")
exit(0)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
if device_product_line == 'L500':
config.enable_stream(rs.stream.color, 960, 540, rs.format.bgr8, 30)
else:
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
# Start streaming
profile = pipeline.start(config)
else :
print("Unknown video source!")
sys.exit(-1)
print("Environment Ready")
#------------------------------------------------------------------
# Function Defs
# Defining the functions used by the main driver loop
#From:https://github.com/doleron/yolov5-opencv-cpp-python
def build_model(is_cuda):
# select onnx file to read weights/model from
net = cv2.dnn.readNet("best_from_scratch.onnx")
if is_cuda:
print("Attempting to use CUDA")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
else:
print("Running on CPU")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
return net
#From:https://github.com/doleron/yolov5-opencv-cpp-python
def detect(image, net):
blob = cv2.dnn.blobFromImage(image, 1/255.0, size = (INPUT_WIDTH, INPUT_HEIGHT), swapRB=False, crop=False)
net.setInput(blob)
preds = net.forward()
return preds
#From:https://github.com/doleron/yolov5-opencv-cpp-python
def load_capture():
capture = cv2.VideoCapture("tjoint.mp4")
return capture
#From:https://github.com/doleron/yolov5-opencv-cpp-python
def load_classes():
class_list = []
with open("classes.txt", "r") as f:
class_list = [cname.strip() for cname in f.readlines()]
return class_list
class_list = load_classes()
#From:https://github.com/doleron/yolov5-opencv-cpp-python
# returns our bounding boxes:
def wrap_detection(input_image, output_data):
class_ids = []
confidences = []
boxes = []
rows = output_data.shape[0]
image_width, image_height, _ = input_image.shape
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
for r in range(rows):
row = output_data[r]
confidence = row[4]
if confidence >= CONFIDENCE_THRESHOLD:
classes_scores = row[5:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > .25):
confidences.append(confidence)
class_ids.append(class_id)
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)
result_class_ids = []
result_confidences = []
result_boxes = []
for i in indexes:
result_confidences.append(confidences[i])
result_class_ids.append(class_ids[i])
result_boxes.append(boxes[i])
return result_class_ids, result_confidences, result_boxes
#From:https://github.com/doleron/yolov5-opencv-cpp-python
def format_yolov5(frame):
row, col, _ = frame.shape
_max = max(col, row)
result = np.zeros((_max, _max, 3), np.uint8)
result[0:row, 0:col] = frame
return result
net = build_model(is_cuda)
#start some counters
start = time.time()*10**9
frame_count = 0
total_frames = 0
fps = -1
jointCount = 0 # how many consecutive frames have we seen the joint
def transform_coordinates(original_x, original_y):
# Shilpa's function, not currently in use
new_x, new_y = None, None
# If original x is less than or equal to 320 and
# y is less than or equal 240, new coordinate is (-x, +y)
if (original_x <= 320) and (original_y <= 240):
new_x = (original_x - 320) / 320
new_y = (original_y + 240) / 240
# If original x is less than or equal to 320 and
# y is greater than 240, new coordinate is (-x, -y)
elif (original_x <= 320) and (original_y > 240):
new_x = (original_x - 320) / 320
new_y = (original_y - 240) / 240
# If original x is greater than to 320 and
# y is less than or equal 240, new coordinate is (+x, +y)
elif (original_x > 320) and (original_y <= 240):
new_x = (original_x + 320) / 320
new_y = (original_y + 240) / 240
# If original x is greater than to 320 and
# y is greater than 240, new coordinate is (+x, -y)
elif (original_x > 320) and (original_y > 240):
new_x = (original_x + 320) / 320
new_y = (original_y - 240) / 240
return new_x, new_y
while True:
pipeline.wait_for_frames()
# store frame
frameset = pipeline.wait_for_frames()
color_frame = frameset.get_color_frame()
depth_frame = frameset.get_depth_frame()
frame = np.asanyarray(color_frame.get_data())
# Create alignment primitive with color as its target stream:
align = rs.align(rs.stream.color)
frameset = align.process(frameset)
# Update color and depth frames:
colorizer = rs.colorizer()
aligned_depth_frame = frameset.get_depth_frame()
colorized_depth = np.asanyarray(colorizer.colorize(aligned_depth_frame).get_data())
# Standard OpenCV boilerplate for running the net:
inputImage = format_yolov5(frame)
outs = detect(inputImage, net)
class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])
frame_count += 1
total_frames += 1
# MQTT/INTEGRATION STUFF:
# if no detection just publish 0 and reset the joint counter
# otherwise grab bbox values
if len(class_ids) == 0:
jointCount = 0
client.publish("joint_found",str(0))
else:
jointCount += 1
boxLeft = boxes[0][0]
boxTop = boxes[0][1]
boxWidth = boxes[0][2]
boxHeight = boxes[0][3]
print('saw a joint for ',jointCount,' frames')
# iterate through the bounding boxes/detections
for (classid, confidence, box) in zip(class_ids, confidences, boxes):
color = colors[int(classid) % len(colors)]
print(confidence)
# extract cropped camera frame to bbox coords
img= frame[box[1]:box[1]+box[3],box[0]:box[0]+box[2]]
lines = None
if all(dim >0 for dim in [ dim for dim in img.shape]):
# create HSV image -> threshold mask -> canny edge detect:
boxh, boxw , _ = img.shape
min_size= max(boxh,boxw)
hsvIMG= cv2.cvtColor(img,cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsvIMG, dark_hsv, light_hsv)
edges= cv2.Canny(mask,50,150,apertureSize=3)
# grab lines
lines = cv2.HoughLinesP(
edges, # Input edge image
1, # Distance resolution in pixels
np.pi/180, # Angle resolution in radians
threshold=50, # Min number of votes for valid line
minLineLength=int(.88*min_size), # Min allowed length of line
maxLineGap=int(.25*min_size) # Max allowed gap between line for joining them
)
# Iterate over points
lines_list=[]
lengths_list = []
if lines is not None:
for points in lines[0:4]:
# Extracted points nested in the list
x1,y1,x2,y2=points[0]
lineLength = distance.euclidean((x1,y1),(x2,y2))
lines_list.append([(x1,y1),(x2,y2)])
lengths_list.append(lineLength)
# grab the longest detected line
# and grab (x,y) for its two points
lengths_list = np.array(lengths_list)
finalLine = lines_list[np.argmax(lengths_list)]
x1 = finalLine[0][0] + boxLeft
y1 = finalLine[0][1] + boxTop
x2 = finalLine[1][0] + boxLeft
y2 = finalLine[1][1] + boxTop
cv2.line(frame,(x1,y1),(x2,y2),(255,0,0),10) # adds line to frame
# get depth values at line points:
depth1 = depth_frame.get_distance(x1,y1)
depth2 = depth_frame.get_distance(x2,y2)
# normalize detection coords for arm team
#x1final, y1final = transform_coordinates(x1, y1)
#x2final, y2final = transform_coordinates(x2, y2)
x1final = (x1-320) / 320
x2final = (x2-320) / 320 # normalizing coordinates
y1final = (y1-240) / -240 # so that center of image is 0,0
y2final = (y2-240) / -240
# Just for calibration can delete if you want:
print("ORIG X:",x1,",",x2)
print("ORIG Y:",y1,",",y2)
print("TOP:",x1final,y1final)
print("BOT:",x2final,y2final)
if calib_depth:
#print("DEPTHS:",depth1,depth2)
# adds circles to frame if plotting
circle = cv2.circle(colorized_depth,(x1,y1),radius=10,color=(0,0,0),thickness=-1)
circle = cv2.circle(colorized_depth,(x2,y2),radius=10,color=(0,255,0),thickness=-1)
cv2.namedWindow("Realsense_dist", cv2.WINDOW_AUTOSIZE)
cv2.imshow('Realsense_dist',colorized_depth)
#interpolate 10 points to send to arm team and send as a string list
xlist = np.linspace(x2final,x1final,NUM_POINTS)
ylist = np.linspace(y2final,y1final,NUM_POINTS)
dlist = np.linspace(depth2,depth1,NUM_POINTS)
finalLine = [[round(x,2),round(y,2),round(z,2)] for x,y,z in zip(xlist,ylist,dlist)]
finalLine = ','.join(str(i) for i in finalLine)
# publish if seam detected and joint has been in view for at least 5 consecutive frames
if jointCount >= 5:
client.publish("joint_found",str(1))
client.publish("coord Start",str(x1final)+','+str(y1final)) # MQTT pub start & end
client.publish("coord_list",finalLine)
print("Just Published Coords: ",finalLine)
else:
client.publish("joint_found",str(0))
if calib_pointCloud:
pc = rs.pointcloud();
pc.map_to(color_frame);
pointcloud = pc.calculate(depth_frame)
h = rs.video_frame(depth_frame).width
w = rs.video_frame(depth_frame).height
verts = np.asanyarray(pointcloud.get_vertices(dims=2)).view(np.float32).reshape(h,w,3)
roi = verts[200:600, 100:400,:].reshape(-1,3)
#pointcloud.export_to_ply("pointcloud.ply", color_frame);
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(roi)
o3d.io.write_point_cloud("croppedCloud.ply", pcd)
else:
client.publish("joint_found",str(0))
cv2.rectangle(frame, box, color, 2)
cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))
if frame_count >= 30:
end = time.time()*10**9
fps = 1000000000 * frame_count / (end - start)
frame_count = 0
start = time.time()*10**9
if fps > 0:
fps_label = "FPS: %.2f" % fps
cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
#cv2.resizeWindow("Resized_window",480,640)
cv2.imshow("output",frame)
if cv2.waitKey(1) > -1:
print("finished by user")
break
print("Total frames: " + str(total_frames))