diff --git a/deep_sort/utils/ds_custom_funtions.py b/deep_sort/utils/ds_custom_funtions.py deleted file mode 100644 index 8b13789..0000000 --- a/deep_sort/utils/ds_custom_funtions.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/detection_and_tracking_yolov7.py b/detection_and_tracking_yolov7.py deleted file mode 100644 index 43c3dcd..0000000 --- a/detection_and_tracking_yolov7.py +++ /dev/null @@ -1,308 +0,0 @@ - -# https://learnopencv.com/yolov7-object-detection-paper-explanation-and-inference/ - -import torch # Docker implemented -import torchvision # Docker implemented -from torchvision import transforms - -import cv2 - -# Custom functions -from utils.custom_functions import detect, draw_roi, load_roi - -from tracker.tracking_function import tracking, load_deepsort - - -""" -To run -(Host terminal) -xhost + -docker run --gpus all --rm -it -e DISPLAY=$DISPLAY -v $PWD:/workspace -v /tmp/.X11-unix:/tmp/.X11-unix:rw --device="/dev/video0:/dev/video0" yolov7-deep_sort_general:latest - -""" - - -def detect_and_track(video_path=0, - show_img=True, inv_h_frame=False, hold_img=False, - save_vid=False, save_loc="result", - model_path='yolov7.pt', class_ids=[], img_sz=640, color=(0, 255, 0),conf_thres=0.25, iou_thres=0.65, - roi=[0, 0, 1, 1], roi_color=(255, 255, 255), - deep_sort_model="osnet_x1_0", ds_max_dist=0.1, ds_max_iou_distance=0.7, ds_max_age=30, ds_n_init=3, ds_nn_budget=100, ds_color=(0, 0, 255)): - """ - WHAT IT DOES: - 1. Load model of detection. - 2. Load model of tracking. - 3. Load video capture. - 4. Select the ROI. - 5. Detection. - 6. Tracking. - 7. Counting. - 8. Plots. - 9. Close capture. - 10. Return results. - - WARNINGS: - - This function only works with CUDA enabled. - - If you want to do your custom detection you can train and obtain the weights with the repository of YOLOv7x and then replace the .pt file. - * https://github.com/WongKinYiu/yolov7 - * https://www.youtube.com/watch?v=wMMu_ReIaHk&list=LL&index=1 - - - INPUTS: - > SOURCE PARAMETERS: - video_path = 0 -> Path of the video or image, can it be a rstp, http, local video or webcamera. - - > OUT PARAMETERS: - show_img = True -> To show every result in the screen. - inv_h_frame = False -> To correct the inversion of the frame of the webcam. - hold_img = False -> To stop the video every frame, reproducing it whith spacebar. - color = (0, 255, 0) -> Color of the letters and bounding boxes generated by the detection module. - - > ROI PARAMETERS: - roi = [0, 0, 1, 1] -> Sample roi. - roi_color = (255, 255, 255) -> Color of the roi. - - > SAVE PARAMETERS: - save_vid = False -> To save the results. - save_loc = "result" -> Path of the results. IE: ./results. - - > DETECTION PARAMETERS (YOLOv7x): - model_path = 'yolov7.pt' -> Path of the .pt YOLOv7x model. - class_ids = [] -> List of class ID's that we want to detect. Empty list means 'every class'. - img_sz = 640 -> Image size needed by the model. - conf_thres = 0.25 -> Confidence threshold of the detection. There is a detection if the confidence is greather than conf_thres. - iou_thres = 0.65 -> Intersection over Union is an evaluation metric used to measure the accuracy of an object detector on a particular dataset. - - > DEEP SORT MODEL PARAMETERS: - deep_sort_model = "osnet_x1_0" -> Model of deep sort used. You can leave this empty and the program can recomend it one. - ds_max_dist = 0.1 -> The matching threshold. Samples with larger distance are considered an invalid match. - ds_max_iou_distance = 0.7 -> Gating threshold. Associations with cost larger than this value are disregarded. - ds_max_age=30, ds_n_init = 3 -> Maximum number of missed misses before a track is deleted. - ds_nn_budget = 100 -> Number of frames that a track remains in initialization phase. - ds_color = (0, 0, 255) -> Maximum size of the appearance descriptors gallery. - - - OUTPUTS: - - classes_after_ds = {class_id_1: number of objects of class_id_1,...,class_id_n: number of objects of class_id_n} -> Returns the number of objects detected of each class. - names_detected = [[class_id_1, name_1],...,[class_id_n,name_n]] -> Returns the name of each class detected. - roi = [(roi_xmin, roi_ymin), (roi_xmax, roi_ymax)] -> Returns the ROI selected. - avg_fps = float -> Returns the AVG FPS of the detection model plus the tracking model. - orig_w = int -> Original width of the frame. - orig_h = int -> Original heigth of the frame. - orig_fps = int -> Original FPS of the source. - stopped = bool -> Returns True if the program has stopped manually and False if the video has ended. - """ - - # Graphic card setup - if not torch.cuda.is_available(): - raise TypeError( - 'Error while trying to use Graphic Card. Please check that it is available.') - - # This functions only accept ONE graphic card located at 0, you can change the number after ':' to select other graphcard. - # You can list the graphcards with the command 'nvidia-smi'. - device = torch.device("cuda:0") - - # Load all characteristics of YOLOv7x model - weigths = torch.load(model_path) - - # Send model characteristics to the graphic card - model = weigths['model'] - model = model.half().to(device) - _ = model.eval() - - # Get model class names - names = model.module.names if hasattr(model, 'module') else model.names - - # Load deepsort model - deepsort = load_deepsort(deep_sort_model=deep_sort_model, max_dist=ds_max_dist, max_iou_distance=ds_max_iou_distance, - max_age=ds_max_age, n_init=ds_n_init, nn_budget=ds_nn_budget) - - # Capturing the frames and excepting opening errors - cap = cv2.VideoCapture(video_path) - - if (cap.isOpened() == False): - raise TypeError( - 'Error while trying to read video. Please check path again') - - # Get the properties of the original capture - orig_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - orig_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - orig_fps = cap.get(cv2.CAP_PROP_FPS) % 100 - - # To save the video - if save_vid: - result = cv2.VideoWriter(save_loc+'.avi', - cv2.VideoWriter_fourcc(*'MJPG'), - 10, (orig_w, orig_h)) - - # Start the FPS counting - frame_count = 0 # To count total frames. - total_fps = 0 # To get the final frames per second. - - # Load ROI - if show_img: - roi = load_roi(cap, roi_color, inv_h_frame) - - else: - if (roi[0] < 0): - raise TypeError( - 'Error while trying to load ROI. xmin must be grater than 0') - - if (roi[1] < 0): - raise TypeError( - 'Error while trying to load ROI. ymin must be grater than 0') - - if (roi[2] > orig_w): - raise TypeError( - f'Error while trying to load ROI. xmax must be smaller than {orig_w})') - - if (roi[3] > orig_h): - raise TypeError( - f'Error while trying to load ROI. ymax must be smaller than {orig_h})') - - # Count - counted = [] - classes_after_ds = {} - - # While cicle of the detection - while (cap.isOpened): - - # get the frames - ret, frame = cap.read() - - # To show image correctly (IE: web camera) - if inv_h_frame: - frame = cv2.flip(frame, 1) - - # if the video has not finished yet - if ret: - - try: - # Detection (YOLOv7) - coords, classes_detected, exec_time_yolo = detect( - frame, model, device, names, show_img=show_img, color=color, img_sz=img_sz, class_ids=class_ids, conf_thres=conf_thres, iou_thres=iou_thres) - except: - raise TypeError("Error while running the detection model.") - - if coords != []: - - try: - # Tracking (DEEP SORT osnet_x1_0) - output_ds, exec_time_sort = tracking( - coords, classes_detected, deepsort, names, frame, ds_color=ds_color, show_img=show_img) - except: - raise TypeError("Error while running the tracking model.") - - # Count - for detection in output_ds: - - # Get variables - ds_cpoint = detection[0] - ds_id = detection[1] - ds_class = detection[2] - - # To check if the ds_cpoint is into the roi - is_into_roi = (roi[0][0] < ds_cpoint[0] < roi[1][0]) and ( - roi[0][1] < ds_cpoint[1] < roi[1][1]) - - # If is into the roi - if is_into_roi: - - # fill the empty vector - if len(counted) == 0: - counted.append([ds_id, ds_class]) - - # get the classes detected - classes_after_ds = dict.fromkeys( - [elem[1] for elem in counted], 0) - - # count per class - for elem in counted: - classes_after_ds[elem[1]] += 1 - - else: - # if the id is not in the list - if (ds_id not in [elem[0] for elem in counted]): - # count object - counted.append([ds_id, ds_class]) - - # get the classes detected - classes_after_ds = dict.fromkeys( - [elem[1] for elem in counted], 0) - - # count per class - for elem in counted: - classes_after_ds[elem[1]] += 1 - else: - exec_time_sort = 0 - - # Calculate fps (Aproximate: 25FPS GEFORCE 1060 Max-Q Design) - fps = 1 / (exec_time_yolo + exec_time_sort) - - total_fps += fps - frame_count += 1 - - # Show the processed frame - if show_img: - - # draw ROI - draw_roi(roi, roi_color, frame) - - # draw fps - cv2.putText(frame, f"{fps:.3f} FPS (YOLO + SORT)", (15, 30), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, color, 1) - - # draw counter - counter_text = [[key, names[key], classes_after_ds[key]] - for key in classes_after_ds.keys()] - cv2.putText(frame, f"COUNTER = {counter_text}", (15, 50), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, color, 1) - - # show the frame - cv2.imshow('PROCESSED FRAME', frame) - - # wait q to exit - if hold_img: - if cv2.waitKey(0) & 0xFF == ord('q'): - stopped = True - break - else: - if cv2.waitKey(1) & 0xFF == ord('q'): - stopped = True - break - - else: - stopped = False - break - - if save_vid: - # to save - result.write(frame) - - # Close the videocapture - cap.release() - - # To save the video - if save_vid: - result.release() - - # Print aditional results - avg_fps = 0 - if frame_count > 0: - avg_fps = total_fps / frame_count - - # Close all windows - if show_img: - cv2.destroyAllWindows() - - return classes_after_ds, [[key, names[key]] for key in classes_after_ds.keys()], roi, round(avg_fps,2), orig_w, orig_h, orig_fps, stopped - - -# Test -print(detect_and_track(video_path=0, - show_img=True, inv_h_frame=True, hold_img=False, - save_vid=False, save_loc="results/result_3", - model_path='pretrained_weights/yolov7.pt', class_ids=[], img_sz=640, color=(0, 255, 0),conf_thres=0.4, iou_thres=0.65, - roi=[0, 0, 1, 1], roi_color=(255, 255, 255), - deep_sort_model="osnet_x1_0", ds_max_dist=0.1, ds_max_iou_distance=0.7, ds_max_age=30, ds_n_init=3, ds_nn_budget=100, ds_color=(0, 0, 255))) diff --git a/test_oop.py b/test_oop.py index 966269f..5b02ee3 100644 --- a/test_oop.py +++ b/test_oop.py @@ -1,6 +1,5 @@ from yolov7_sort_count_oop import YoloSortCount -import time -import cv2 + # Test test = YoloSortCount() @@ -30,11 +29,11 @@ test.conf_thres = 0.5 # Frame -test.inv_h_frame = False +test.inv_h_frame = True # Save test.save_loc = "results/test_test" -test.save_vid = True +test.save_vid = False # Run test.run() \ No newline at end of file diff --git a/tracker/tracking_function.py b/tracker/tracking_function.py deleted file mode 100644 index 1c24900..0000000 --- a/tracker/tracking_function.py +++ /dev/null @@ -1,127 +0,0 @@ -# https://github.com/dongdv95/yolov5/blob/master/Yolov5_DeepSort_Pytorch/track.py - -from deep_sort.deep_sort import DeepSort -import time -import numpy as np -import cv2 - - -def xyxy2xywh(x): - - """ - WHAT IT DOES: - - Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right - - xywhs is making negative h because some ymin and ymax are inverted or they have the same dimention, - so the resize of the frame has an error when the method _resize tries to resize the bboxes. - This function solve this problem. - - INPUTS: - x = [xmin,ymin,xmax,ymax] -> List of coordinates of a bounding box. - - OUTPUTS: - y = [xleft,ytop,width,height] -> List of - - """ - - y = np.copy(x) - - for i in range(len(x)): - if x[i][3] <= x[i][1]: - x[i][3] = y[i][1] +1 - x[i][1] = y[i][3] - - y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center - y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center - y[:, 2] = x[:, 2] - x[:, 0] # width - y[:, 3] = x[:, 3] - x[:, 1] # height - - return y - - -def xyxy2cxcy(x): - """" - WHAT IT DOES: - Convert nx4 boxes from [x1, y1, x2, y2] to [cx,xy] where xy1=top-left, xy2=bottom-right - - INPUTS: - x = [x1, y1, x2, y2] -> xy1=top-left, xy2=bottom-right - - OUTPUTS: - y = [cx,xy] -> Centroid of bounding box. - - """ - y = np.copy(x[:2]) - y[0] = ((x[2] - x[0])) / 2 + x[0] # x center - y[1] = ((x[3] - x[1])) / 2 + x[1] # y center - - return y - - -def load_deepsort(deep_sort_model="osnet_x1_0", max_dist=0.1, max_iou_distance=0.7, max_age=30, n_init=3, nn_budget=100): - """ - WHAT IT DOES: - Generate the tracking of the detections. - - INPUTS: - MODEL_TYPE = "osnet_x1_0" -> Model of deep sort used. You can leave this empty and the program can recomend it one. - MAX_DIST = 0.1 -> The matching threshold. Samples with larger distance are considered an invalid match. - MAX_IOU_DISTANCE = 0.7 -> Gating threshold. Associations with cost larger than this value are disregarded. - MAX_AGE = 30 -> Maximum number of missed misses before a track is deleted. - N_INIT = 3 -> Number of frames that a track remains in initialization phase. - NN_BUDGET = 100 -> Maximum size of the appearance descriptors gallery. - - OUTPUTS: - ds_output = [[ds_cpoint, id, cls],...,[ds_cpoint_n, id_n, cls_n]] -> Returns the list of lists with the centroid, id of detection and class name. - delta_time = -> Time of processing the tracking model. - - """ - - deepsort = DeepSort(deep_sort_model, - max_dist=max_dist, - max_iou_distance=max_iou_distance, - max_age=max_age, n_init=n_init, nn_budget=nn_budget, - use_cuda=True) - return deepsort - - - - -def tracking(coords, classes_detected, deepsort, names, frame, ds_color = (0,0,255),show_img=True): - - xywhs = xyxy2xywh(np.array(coords)) - confs = np.array([[elem[2]] for elem in classes_detected]) - clss = np.array([[elem[1]] for elem in classes_detected]) - - if coords != []: - - # pass detections to deepsort - start_time = time.time() - outputs = list(deepsort.update(xywhs, confs, clss, frame)) - end_time = time.time() - - delta_time = end_time - start_time - - ds_output = [] - # draw boxes for visualization - if len(outputs) > 0: - for j, (output, conf) in enumerate(zip(outputs, confs)): - ds_cpoint = tuple(xyxy2cxcy(output[0:4])) - id = output[4] - cls = output[5] - - ds_output.append([ds_cpoint, id, cls]) - - if show_img: - cv2.circle(frame, (ds_cpoint[0], ds_cpoint[1]), radius=0, color=ds_color, thickness=3) - cv2.putText(frame, f"{names[cls]}: {id}", (ds_cpoint[0]-10, ds_cpoint[1]-7), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, ds_color, 1) - - else: - start_time = time.time() - deepsort.increment_ages() - ds_output = [] - end_time = time.time() - - delta_time = end_time - start_time - - return ds_output, delta_time diff --git a/utils/custom_functions.py b/utils/custom_functions.py deleted file mode 100644 index ad56bdd..0000000 --- a/utils/custom_functions.py +++ /dev/null @@ -1,315 +0,0 @@ -# YOLOR general utils -> Custom - -import torch # Docker implemented -import torchvision # Docker implemented -from torchvision import transforms - -import cv2 -import numpy as np -import time - -# Utilities -from utils.general import non_max_suppression -from utils.datasets import letterbox -from utils.plots import output_to_keypoint - - -def scale_coords_custom(img1_shape, coords, img0_shape): - """ - WHAT IT DOES: - Rescale coords (xyxy) from img1_shape to img0_shape. - - INPUTS: - img1_shape = img1.shape -> Shape of resized image. - coords = [xmin,ymin,xmax,ymax] -> coords of raw bounding boxes. - img0_shape = img0.shape -> Shape of original image. - - OUTPUTS: - coords = [xmin,ymin,xmax,ymax] -> Resized coords of bounding boxes. - """ - - gain = min(img1_shape[0] / img0_shape[0], - img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / \ - 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding - - coords[0] -= pad[0] # x padding - coords[2] -= pad[0] # x padding - coords[1] -= pad[1] # y padding - coords[3] -= pad[1] # y padding - coords[:] /= gain - - return coords - - -def draw_bbox(frame, coords, color, names, confidence): - """ - WHAT IT DOES: - Draw bboxes on the frame with names and confidence. - - INPUTS: - frame -> Frame to draw. - coords = [xmin,ymin,xmax,ymax] -> Bounding box coords of detection. - color = (B,G,R) -> Color of bounding box. - names = names -> Names of detection (List of model) - confidence = confidence -> Confidences of detections. - - OUTPUTS: - Only returns a boolean confirmation: True. - """ - - # draw bounding box - frame = cv2.rectangle( - frame, - (int(coords[0]), int(coords[1])), - (int(coords[2]), int(coords[3])), - color=color, - thickness=1, - lineType=cv2.LINE_AA - ) - - # write confidence and class names - cv2.putText(frame, f"{names}: {confidence}", (int(coords[0]), int(coords[1])-5), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, color, 1) - - return True - - -def draw_roi(roi, roi_color, frame): - """ - INPUTS: - roi= -> - roi_color = (B,G,R) -> - frame -> - - OUTPUTS: - Only returns a boolean confirmation: True. - - """ - # extract values - roi_xmin, roi_ymin, roi_xmax, roi_ymax = roi[0][0], roi[0][1], roi[1][0], roi[1][1] - - # draw roi - frame = cv2.rectangle( - frame, - (int(roi_xmin), int(roi_ymin)), - (int(roi_xmax), int(roi_ymax)), - color=roi_color, - thickness=2, - lineType=cv2.LINE_AA - ) - - return True - - -class coordinateStore: - """ - Class to capture the click event and get the coordinates. - - 1. Instantiate an object of class - 2. Capture the event with self.select_point() - 3. Use the self.point variable. - """ - - def __init__(self): - self.point = False - - def select_point(self, event, x, y, flags, param): - if event == cv2.EVENT_LBUTTONDBLCLK: - self.point = (x, y) - - -def load_roi(cap, roi_color=(255, 255, 255), inv_h_frame=False): - """ - WHAT IT DOES: - Ask for the ROI. - - INPUTS: - cap -> Capture of frame or image with cv2.videoCapture(). - roi_color = (BGR) -> Color of the ROI. - inv_h_frame = False -> To invert the image horizontally. - - OUTPUTS: - roi = [roi_xmin,roi_ymin,roi_xmax, roi_ymax] -> ROI bounding box. - - """ - - ret, frame = cap.read() - - # To show image correctly (IE: web camera) - if inv_h_frame: - frame = cv2.flip(frame, 1) - - if ret: - - for i in range(4): - - # Instantiate class - coordinates = coordinateStore() - - if i == 1: - while True: - text_frame = frame.copy() - cv2.putText(text_frame, f"No points selected. Please choose your 'top-left' point double-clicking on the image.\n\nPlease press 'q' to continue after you've selected the point.", (15, 30), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, roi_color, 1) - - selected_min = coordinates.point - if selected_min: - cv2.circle(text_frame, selected_min, radius=0, - color=roi_color, thickness=10) - - cv2.imshow('Selecting ROI', text_frame) - - # capturing click event - cv2.setMouseCallback( - 'Selecting ROI', coordinates.select_point) - - if cv2.waitKey(22) & 0xFF == ord('q'): - break - - cv2.destroyAllWindows() - - if i == 2: - while True: - text_frame = frame.copy() - cv2.putText(text_frame, f"'top-left' point have already selected.\nPlease choose your 'bottom-right' point double-clicking on the image.\n\nPlease press 'q' to continue after you've selected the point..", (15, 30), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, roi_color, 1) - - selected_max = coordinates.point - # Only if the point is defined - if selected_max: - cv2.circle(text_frame, selected_max, radius=0, - color=roi_color, thickness=10) - - cv2.imshow('Selecting ROI', text_frame) - - # capturing click event - cv2.setMouseCallback( - 'Selecting ROI', coordinates.select_point) - - if cv2.waitKey(22) & 0xFF == ord('q'): - break - - cv2.destroyAllWindows() - - if i == 3: - # [(xmin,ymin),(xmax,ymax)] - roi = [selected_min, selected_max] - - while True: - text_frame = frame.copy() - cv2.putText(text_frame, f"The roi has already done. Please press 'q' to continue with the detection.", (15, 30), cv2.FONT_HERSHEY_SIMPLEX, - 0.5, roi_color, 1) - - # draw roi - text_frame = cv2.rectangle( - text_frame, - roi[0], - roi[1], - color=roi_color, - thickness=2, - lineType=cv2.LINE_AA - ) - - cv2.imshow('Selecting ROI', text_frame) - if cv2.waitKey(22) & 0xFF == ord('q'): - break - - cv2.destroyAllWindows() - - return roi - - -def detect(frame, model, device, names, show_img=True, color=(0, 255, 0), img_sz=640, class_ids=[], conf_thres=0.25, iou_thres=0.65): - """ - WHAT IT DOES: - Detect objects in a frame. - - INPUTS: - frame -> Original frame - model -> Pytorch object: model used for detection - device -> PyTorch object: device selected, IE: Graphic card - names -> List of classes. - show_img = True -> To show images - color = (0, 255, 0) -> Color of bounding boxes in BGR format. - img_sz = 640 -> Image size accepted by the model to do the process. - class_ids = [] -> Selected class ID, empty list means 'all of classes'. - conf_thres =0.25 -> Confidence threshold, there is a detection if the confidence is greather than the threshold. - iou_thres = 0.65 -> Intersection over Union is an evaluation metric used to measure the accuracy of an object detector on a particular dataset. - - OUTPUTS: - coords_bb = [[xmin,ymin,xmax,ymax],...,[coords_n]] -> List of coordinates of boundin boxes of each detection. - classes_detected = [[name,class_id, confidence],...,[classes_detected_n]] -> List of list with name, class_id and confidence of each detection. - delta_time = time.time() object -> Process time of the detection model. - - """ - - # initialize vectors - coords_bb = [] - classes_detected = [] - - img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - # reshape the frames to the adecuate w and h - img = letterbox(img, img_sz, stride=64, auto=True)[0] - - # get image data to use for rescaling - img0 = img.copy() - - # transform the image to tensor and send the tensor of the image to the device - img = transforms.ToTensor()(img) - img = torch.tensor(np.array([img.numpy()])) - img = img.to(device) - img = img.half() - - # time to count fps - start_time = time.time() - - # get the output of the model - with torch.no_grad(): - pred, _ = model(img) - - # calculate fps - end_time = time.time() - - delta_time = end_time - start_time - - # remove the noise of the output (NMS: a technique to filter the predictions of object detectors.) - pred = non_max_suppression(pred, conf_thres, iou_thres) - - # process the information of the filtered output and return the main characteristics [batch_id, class_id, x, y, w, h, conf] - output = output_to_keypoint(pred) - - # for detection in frame - for idx in range(output.shape[0]): - - # Separate by class id - if (int(output[idx][1]) in class_ids) or (class_ids == []): - - # Rescale boxes (Rescale coords (xyxy) from img0 to frame) - output[idx][2:6] = scale_coords_custom( - img0.shape[0:2], output[idx][2:6], frame.shape).round() - - # generate coord to bounding boxes - xmin, ymin = (output[idx, 2]-output[idx, 4] / - 2), (output[idx, 3]-output[idx, 5]/2) - xmax, ymax = (output[idx, 2]+output[idx, 4] / - 2), (output[idx, 3]+output[idx, 5]/2) - - # xyxy - coord_bb = [xmin, ymin, xmax, ymax] - - # [class id, class name, confidence] - class_detected = [names[int(output[idx][1])], int( - output[idx][1]), round(output[idx][6], 2)] - - # fill the output list - coords_bb.append(coord_bb) - classes_detected.append(class_detected) - - # draw bounding boxes, classnames and confidence - if show_img: - draw_bbox(frame, coord_bb, color, - class_detected[0], class_detected[2]) - - return coords_bb, classes_detected, delta_time