From 56b3abd084b82143af678e7da874e99e3bbef769 Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Thu, 29 Aug 2024 16:30:25 +0200 Subject: [PATCH] Format code --- depthai_nodes/ml/messages/__init__.py | 2 +- .../ml/messages/creators/__init__.py | 2 +- .../ml/messages/creators/detection.py | 8 +- depthai_nodes/ml/messages/img_detections.py | 17 ++- depthai_nodes/ml/messages/segmentation.py | 1 - depthai_nodes/ml/parsers/fastsam.py | 82 ++++++++--- depthai_nodes/ml/parsers/utils/fastsam.py | 60 ++++++-- depthai_nodes/ml/parsers/utils/yolo.py | 138 ++++++++++++------ depthai_nodes/ml/parsers/yolo.py | 86 +++++++---- 9 files changed, 274 insertions(+), 122 deletions(-) diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py index 8a5ee1b..b354d3b 100644 --- a/depthai_nodes/ml/messages/__init__.py +++ b/depthai_nodes/ml/messages/__init__.py @@ -5,8 +5,8 @@ ) from .keypoints import HandKeypoints, Keypoints from .lines import Line, Lines -from .segmentation import SegmentationMasks from .misc import AgeGender +from .segmentation import SegmentationMasks __all__ = [ "ImgDetectionWithAdditionalOutput", diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index f9a8ebc..bdcdad2 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -3,8 +3,8 @@ from .detection import create_detection_message, create_line_detection_message from .image import create_image_message from .keypoints import create_hand_keypoints_message, create_keypoints_message -from .segmentation import create_sam_message, create_segmentation_message from .misc import create_age_gender_message +from .segmentation import create_sam_message, create_segmentation_message from .thermal import create_thermal_message from .tracked_features import create_tracked_features_message diff --git a/depthai_nodes/ml/messages/creators/detection.py b/depthai_nodes/ml/messages/creators/detection.py index 359deb1..f9ba408 100644 --- a/depthai_nodes/ml/messages/creators/detection.py +++ b/depthai_nodes/ml/messages/creators/detection.py @@ -15,7 +15,9 @@ def create_detection_message( bboxes: np.ndarray, scores: np.ndarray, labels: List[int] = None, - keypoints: Union[List[Tuple[float, float]], List[Tuple[float, float, float]]] = None, + keypoints: Union[ + List[Tuple[float, float]], List[Tuple[float, float, float]] + ] = None, masks: List[np.ndarray] = None, ) -> dai.ImgDetections: """Create a DepthAI message for an object detection. @@ -118,7 +120,9 @@ def create_detection_message( if not isinstance(mask, np.ndarray): raise ValueError(f"mask should be numpy array, got {type(mask)}.") if len(mask.shape) != 2: - raise ValueError(f"mask should be of shape (H/4, W/4), got {mask.shape}.") + raise ValueError( + f"mask should be of shape (H/4, W/4), got {mask.shape}." + ) if len(masks) != bboxes.shape[0]: raise ValueError( diff --git a/depthai_nodes/ml/messages/img_detections.py b/depthai_nodes/ml/messages/img_detections.py index 5f69058..b8e6a2a 100644 --- a/depthai_nodes/ml/messages/img_detections.py +++ b/depthai_nodes/ml/messages/img_detections.py @@ -1,4 +1,3 @@ - from typing import List, Tuple, Union import depthai as dai @@ -19,11 +18,15 @@ class ImgDetectionWithAdditionalOutput(dai.ImgDetection): def __init__(self): """Initializes the ImgDetectionWithAdditionalOutput object.""" dai.ImgDetection.__init__(self) # TODO: change to super().__init__()? - self._keypoints: Union[List[Tuple[float, float]], List[Tuple[float, float, float]]] = [] + self._keypoints: Union[ + List[Tuple[float, float]], List[Tuple[float, float, float]] + ] = [] self._mask: np.ndarray = np.array([]) @property - def keypoints(self) -> Union[List[Tuple[float, float]], List[Tuple[float, float, float]]]: + def keypoints( + self, + ) -> Union[List[Tuple[float, float]], List[Tuple[float, float, float]]]: """Returns the keypoints. @return: List of keypoints. @@ -32,7 +35,13 @@ def keypoints(self) -> Union[List[Tuple[float, float]], List[Tuple[float, float, return self._keypoints @keypoints.setter - def keypoints(self, value: Union[List[Tuple[Union[int, float], Union[int, float]]], List[Tuple[Union[int, float, float], Union[int, float, float]]]]): + def keypoints( + self, + value: Union[ + List[Tuple[Union[int, float], Union[int, float]]], + List[Tuple[Union[int, float, float], Union[int, float, float]]], + ], + ): """Sets the keypoints. @param value: List of keypoints. diff --git a/depthai_nodes/ml/messages/segmentation.py b/depthai_nodes/ml/messages/segmentation.py index b708dec..92e5cf3 100644 --- a/depthai_nodes/ml/messages/segmentation.py +++ b/depthai_nodes/ml/messages/segmentation.py @@ -1,4 +1,3 @@ - import depthai as dai import numpy as np diff --git a/depthai_nodes/ml/parsers/fastsam.py b/depthai_nodes/ml/parsers/fastsam.py index f5e9591..9bb79ca 100644 --- a/depthai_nodes/ml/parsers/fastsam.py +++ b/depthai_nodes/ml/parsers/fastsam.py @@ -15,17 +15,17 @@ class FastSAMParser(YOLOParser): def __init__( - self, - confidence_threshold: int = 0.5, - num_classes: int = 1, - iou_threshold: int = 0.5, - mask_conf: float = 0.5, - input_shape: Tuple[int, int] = (640, 640), - prompt: str = "everything", - points: Optional[Tuple[int, int]] = None, - point_label: Optional[int] = None, - bbox: Optional[Tuple[int, int, int, int]] = None - ): + self, + confidence_threshold: int = 0.5, + num_classes: int = 1, + iou_threshold: int = 0.5, + mask_conf: float = 0.5, + input_shape: Tuple[int, int] = (640, 640), + prompt: str = "everything", + points: Optional[Tuple[int, int]] = None, + point_label: Optional[int] = None, + bbox: Optional[Tuple[int, int, int, int]] = None, + ): """Initialize the YOLOParser node. @param confidence_threshold: The confidence threshold for the detections @@ -47,7 +47,9 @@ def __init__( @param bbox: The bounding box @type bbox: Optional[Tuple[int, int, int, int]] """ - YOLOParser.__init__(self, confidence_threshold, num_classes, iou_threshold, mask_conf) + YOLOParser.__init__( + self, confidence_threshold, num_classes, iou_threshold, mask_conf + ) self.input_shape = input_shape self.prompt = prompt self.points = points @@ -99,21 +101,36 @@ def setBoundingBox(self, bbox): def run(self): while self.isRunning(): try: - nnDataIn : dai.NNData = self.input.get() + nnDataIn: dai.NNData = self.input.get() except dai.MessageQueue.QueueException: - break # Pipeline was stopped, no more data + break # Pipeline was stopped, no more data # Get all the layer names layer_names = nnDataIn.getAllLayerNames() outputs_names = sorted([name for name in layer_names if "_yolo" in name]) - outputs_values = [nnDataIn.getTensor(o, dequantize=True).astype(np.float32) for o in outputs_names] + outputs_values = [ + nnDataIn.getTensor(o, dequantize=True).astype(np.float32) + for o in outputs_names + ] # Get the segmentation outputs - masks_outputs_values, protos_output, protos_len = self._get_segmentation_outputs(nnDataIn) + ( + masks_outputs_values, + protos_output, + protos_len, + ) = self._get_segmentation_outputs(nnDataIn) if len(outputs_values[0].shape) != 4: # RVC4 - outputs_values = [o.transpose((2, 0, 1))[np.newaxis, ...] for o in outputs_values] - protos_output, protos_len, masks_outputs_values = self._reshape_seg_outputs(protos_output, protos_len, masks_outputs_values) + outputs_values = [ + o.transpose((2, 0, 1))[np.newaxis, ...] for o in outputs_values + ] + ( + protos_output, + protos_len, + masks_outputs_values, + ) = self._reshape_seg_outputs( + protos_output, protos_len, masks_outputs_values + ) # Decode the outputs results = decode_fastsam_output( @@ -123,25 +140,42 @@ def run(self): img_shape=self.input_shape[::-1], conf_thres=self.confidence_threshold, iou_thres=self.iou_threshold, - num_classes=self.num_classes + num_classes=self.num_classes, ) bboxes, masks = [], [] for i in range(results.shape[0]): - bbox, conf, label, seg_coeff = results[i, :4].astype(int), results[i, 4], results[i, 5].astype(int), results[i, 6:].astype(int) + bbox, conf, label, seg_coeff = ( + results[i, :4].astype(int), + results[i, 4], + results[i, 5].astype(int), + results[i, 6:].astype(int), + ) bboxes.append(bbox.tolist() + [conf, int(label)]) hi, ai, xi, yi = seg_coeff - mask_coeff = masks_outputs_values[hi][0, ai*protos_len:(ai+1)*protos_len, yi, xi] - mask = process_single_mask(protos_output[0], mask_coeff, self.mask_conf, self.input_shape, bbox) + mask_coeff = masks_outputs_values[hi][ + 0, ai * protos_len : (ai + 1) * protos_len, yi, xi + ] + mask = process_single_mask( + protos_output[0], mask_coeff, self.mask_conf, self.input_shape, bbox + ) masks.append(mask) results_bboxes = np.array(bboxes) results_masks = np.array(masks) if self.prompt == "bbox": - results_masks = box_prompt(results_masks, bbox=self.bbox, orig_shape=self.input_shape[::-1]) + results_masks = box_prompt( + results_masks, bbox=self.bbox, orig_shape=self.input_shape[::-1] + ) elif self.prompt == "point": - results_masks = point_prompt(results_bboxes, results_masks, points=self.points, pointlabel=self.point_label, orig_shape=self.input_shape[::-1]) + results_masks = point_prompt( + results_bboxes, + results_masks, + points=self.points, + pointlabel=self.point_label, + orig_shape=self.input_shape[::-1], + ) segmentation_message = create_sam_message(results_masks) self.out.send(segmentation_message) diff --git a/depthai_nodes/ml/parsers/utils/fastsam.py b/depthai_nodes/ml/parsers/utils/fastsam.py index 2f91627..1fab9bd 100644 --- a/depthai_nodes/ml/parsers/utils/fastsam.py +++ b/depthai_nodes/ml/parsers/utils/fastsam.py @@ -75,10 +75,17 @@ def point_prompt(bboxes, masks, points, pointlabel, orig_shape): # numpy h = masks[0]["segmentation"].shape[0] w = masks[0]["segmentation"].shape[1] if h != target_height or w != target_width: - points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points] + points = [ + [int(point[0] * w / target_width), int(point[1] * h / target_height)] + for point in points + ] onemask = np.zeros((h, w)) for annotation in masks: - mask = annotation["segmentation"] if isinstance(annotation, dict) else annotation + mask = ( + annotation["segmentation"] + if isinstance(annotation, dict) + else annotation + ) for i, point in enumerate(points): if mask[point[1], point[0]] == 1 and pointlabel[i] == 1: onemask += mask @@ -89,7 +96,9 @@ def point_prompt(bboxes, masks, points, pointlabel, orig_shape): # numpy return masks -def adjust_bboxes_to_image_border(boxes: np.ndarray, image_shape: Tuple[int, int], threshold: int = 20) -> np.ndarray: +def adjust_bboxes_to_image_border( + boxes: np.ndarray, image_shape: Tuple[int, int], threshold: int = 20 +) -> np.ndarray: """ Source: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py#L6 (Ultralytics) Adjust bounding boxes to stick to image border if they are within a certain threshold. @@ -114,12 +123,12 @@ def adjust_bboxes_to_image_border(boxes: np.ndarray, image_shape: Tuple[int, int def bbox_iou( - box1: np.ndarray, - boxes: np.ndarray, - iou_thres: float = 0.9, - image_shape: Tuple[int, int] = (640, 640), - raw_output: bool = False - ) -> np.ndarray: + box1: np.ndarray, + boxes: np.ndarray, + iou_thres: float = 0.9, + image_shape: Tuple[int, int] = (640, 640), + raw_output: bool = False, +) -> np.ndarray: """ Source: https://github.com/ultralytics/ultralytics/blob/main/ultralytics/models/fastsam/utils.py#L30 (Ultralytics - rewritten to numpy) Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes. @@ -161,7 +170,15 @@ def bbox_iou( return np.flatnonzero(iou > iou_thres) -def decode_fastsam_output(yolo_outputs, strides, anchors, img_shape: Tuple[int, int], conf_thres=0.5, iou_thres=0.45, num_classes=1): +def decode_fastsam_output( + yolo_outputs, + strides, + anchors, + img_shape: Tuple[int, int], + conf_thres=0.5, + iou_thres=0.45, + num_classes=1, +): """ Decode the bounding boxes @@ -182,13 +199,20 @@ def decode_fastsam_output(yolo_outputs, strides, anchors, img_shape: Tuple[int, conf_thres=conf_thres, iou_thres=iou_thres, num_classes=num_classes, - kpts_mode=False + kpts_mode=False, )[0] full_box = np.zeros(output_nms.shape[1]) - full_box[2], full_box[3], full_box[4], full_box[6:] = img_shape[1], img_shape[0], 1.0, 1.0 + full_box[2], full_box[3], full_box[4], full_box[6:] = ( + img_shape[1], + img_shape[0], + 1.0, + 1.0, + ) full_box = full_box.reshape((1, -1)) - critical_iou_index = bbox_iou(full_box[0][:4], output_nms[:, :4], iou_thres=0.9, image_shape=img_shape) + critical_iou_index = bbox_iou( + full_box[0][:4], output_nms[:, :4], iou_thres=0.9, image_shape=img_shape + ) if critical_iou_index.size > 0: full_box[0][4] = output_nms[critical_iou_index][:, 4] @@ -216,8 +240,14 @@ def crop_mask(masks, box): return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2)) -def process_single_mask(protos, mask_coeff, mask_conf, img_shape: Tuple[int, int], bbox: Tuple[int, int, int, int]) -> np.ndarray: - mask = sigmoid(np.sum(protos * mask_coeff[..., np.newaxis, np.newaxis], axis = 0)) +def process_single_mask( + protos, + mask_coeff, + mask_conf, + img_shape: Tuple[int, int], + bbox: Tuple[int, int, int, int], +) -> np.ndarray: + mask = sigmoid(np.sum(protos * mask_coeff[..., np.newaxis, np.newaxis], axis=0)) mask = cv2.resize(mask, img_shape, interpolation=cv2.INTER_NEAREST) mask = crop_mask(mask, np.array(bbox)) return (mask > mask_conf).astype(np.uint8) diff --git a/depthai_nodes/ml/parsers/utils/yolo.py b/depthai_nodes/ml/parsers/utils/yolo.py index fbb432f..bddf6c8 100644 --- a/depthai_nodes/ml/parsers/utils/yolo.py +++ b/depthai_nodes/ml/parsers/utils/yolo.py @@ -7,7 +7,7 @@ def make_grid_numpy(ny: int, nx: int, na: int) -> np.ndarray: - yv, xv = np.meshgrid(np.arange(ny), np.arange(nx), indexing='ij') + yv, xv = np.meshgrid(np.arange(ny), np.arange(nx), indexing="ij") return np.stack((xv, yv), 2).reshape(1, na, ny, nx, 2) @@ -21,14 +21,20 @@ def xywh2xyxy(x: np.ndarray) -> np.ndarray: def non_max_suppression( - prediction: np.ndarray, conf_thres: float = 0.5, - iou_thres: float = 0.45, classes: list = None, - num_classes: int = 1, agnostic: bool = False, - multi_label: bool = False, max_det: int = 300, - max_time_img: float = 0.05, max_nms: int = 30000, - max_wh: int = 7680, kpts_mode: bool = False + prediction: np.ndarray, + conf_thres: float = 0.5, + iou_thres: float = 0.45, + classes: list = None, + num_classes: int = 1, + agnostic: bool = False, + multi_label: bool = False, + max_det: int = 300, + max_time_img: float = 0.05, + max_nms: int = 30000, + max_wh: int = 7680, + kpts_mode: bool = False, ): - ''' + """ :param prediction: - prediction from the model, shape = (batch_size, boxes, xy+wh+...) :param conf_thres: - confidence threshold :param iou_thres: - intersection over union threshold @@ -42,18 +48,26 @@ def non_max_suppression( :param max_wh: - maximum width and height :param kpts_mode: - keypoints mode :return: - an array of detections per each batch = [...[...[xyxy, conf, label]...]...] - ''' + """ bs = prediction.shape[0] # batch size # Keypoints: 4 (bbox) + 1 (objectness) + 51 (kpts) = 56 # Segmentation: 4 (bbox) + 1 (objectness) + 4 (pos) = 9 - num_classes_check = prediction.shape[2] - (56 if kpts_mode else 9) # number of classes + num_classes_check = prediction.shape[2] - ( + 56 if kpts_mode else 9 + ) # number of classes nm = prediction.shape[2] - num_classes - 5 pred_candidates = prediction[..., 4] > conf_thres # candidates # Check the parameters. - assert num_classes == num_classes_check, f"Number of classes {num_classes} does not match the model {num_classes_check}" - assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" - assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" + assert ( + num_classes == num_classes_check + ), f"Number of classes {num_classes} does not match the model {num_classes_check}" + assert ( + 0 <= conf_thres <= 1 + ), f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert ( + 0 <= iou_thres <= 1 + ), f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" # Function settings. time_limit = 2.0 + max_time_img * bs # seconds to quit after @@ -63,7 +77,7 @@ def non_max_suppression( output = [np.zeros((0, 6 + nm))] * prediction.shape[0] for img_idx, x in enumerate(prediction): # image index, image inference - x = x[pred_candidates[img_idx]] # confidence + x = x[pred_candidates[img_idx]] # confidence # If no box remains, skip the next process. if not x.shape[0]: @@ -71,45 +85,62 @@ def non_max_suppression( # (center x, center y, width, height) to (x1, y1, x2, y2) box = xywh2xyxy(x[:, :4]) - cls = x[:, 5:5+num_classes] - other = x[:, 5+num_classes:] # Either kpts or pos + cls = x[:, 5 : 5 + num_classes] + other = x[:, 5 + num_classes :] # Either kpts or pos if multi_label: box_idx, class_idx = (cls > conf_thres).nonzero(as_tuple=False).T - x = np.concatenate((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None], other[box_idx, :]), 1) + x = np.concatenate( + ( + box[box_idx], + x[box_idx, class_idx + 5, None], + class_idx[:, None], + other[box_idx, :], + ), + 1, + ) else: # Only keep the class with highest scores. class_idx = np.expand_dims(cls.argmax(1), 1) conf = cls.max(1, keepdims=True) - x = np.concatenate((box, conf, class_idx, other), 1)[conf.flatten() > conf_thres] + x = np.concatenate((box, conf, class_idx, other), 1)[ + conf.flatten() > conf_thres + ] # Filter by class, only keep boxes whose category is in classes. if classes is not None: x = x[(x[:, 5:6] == np.array(classes)).any(1)] # Check shape - num_box = x.shape[0] # number of boxes - if not num_box: # no boxes kept. + num_box = x.shape[0] # number of boxes + if not num_box: # no boxes kept. continue elif num_box > max_nms: # excess max boxes' number. - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence # Batched NMS class_offset = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + class_offset, x[:, 4][..., np.newaxis] # boxes (offset by class), scores - keep_box_idx = np.array(nms(np.hstack((boxes, scores)).astype(np.float32, copy=False), iou_thres)) - - if keep_box_idx.shape[0] > max_det: # limit detections + boxes, scores = ( + x[:, :4] + class_offset, + x[:, 4][..., np.newaxis], + ) # boxes (offset by class), scores + keep_box_idx = np.array( + nms(np.hstack((boxes, scores)).astype(np.float32, copy=False), iou_thres) + ) + + if keep_box_idx.shape[0] > max_det: # limit detections keep_box_idx = keep_box_idx[:max_det] output[img_idx] = x[keep_box_idx] if (time.time() - tik) > time_limit: - print(f'WARNING: NMS cost time exceed the limited {time_limit}s.') + print(f"WARNING: NMS cost time exceed the limited {time_limit}s.") break # time limit exceeded return output -def parse_yolo_outputs(outputs: list, strides: list, anchors: np.ndarray, kpts = None) -> np.ndarray: +def parse_yolo_outputs( + outputs: list, strides: list, anchors: np.ndarray, kpts=None +) -> np.ndarray: output = None for i, (x, s, a) in enumerate(zip(outputs, strides, anchors)): @@ -121,14 +152,14 @@ def parse_yolo_outputs(outputs: list, strides: list, anchors: np.ndarray, kpts = def parse_yolo_output( - out: np.ndarray, - stride: int, - anchors: np.ndarray, - head_id: int = -1, - kpts: Optional[np.ndarray] = None - ) -> np.ndarray: - na = 1 if anchors is None else len(anchors) # number of anchors per head - bs, _, ny, nx = out.shape # bs - batch size, ny|nx - y and x of grid cells + out: np.ndarray, + stride: int, + anchors: np.ndarray, + head_id: int = -1, + kpts: Optional[np.ndarray] = None, +) -> np.ndarray: + na = 1 if anchors is None else len(anchors) # number of anchors per head + bs, _, ny, nx = out.shape # bs - batch size, ny|nx - y and x of grid cells grid = make_grid_numpy(ny, nx, na) @@ -144,24 +175,29 @@ def parse_yolo_output( if kpts is None: # Segmentation - x_coors = np.tile(np.arange(0,nx), (ny, 1)) - x_coors = np.repeat(x_coors[np.newaxis, np.newaxis, ..., np.newaxis], 1, axis = 1) + x_coors = np.tile(np.arange(0, nx), (ny, 1)) + x_coors = np.repeat(x_coors[np.newaxis, np.newaxis, ..., np.newaxis], 1, axis=1) - y_coors = np.tile(np.arange(0,ny)[np.newaxis, ...].T, (1, nx)) - y_coors = np.repeat(y_coors[np.newaxis, np.newaxis, ..., np.newaxis], 1, axis = 1) + y_coors = np.tile(np.arange(0, ny)[np.newaxis, ...].T, (1, nx)) + y_coors = np.repeat(y_coors[np.newaxis, np.newaxis, ..., np.newaxis], 1, axis=1) - ai = np.ones((bs,na,ny,nx)) * np.arange(na)[np.newaxis,..., np.newaxis,np.newaxis] + ai = ( + np.ones((bs, na, ny, nx)) + * np.arange(na)[np.newaxis, ..., np.newaxis, np.newaxis] + ) ai = ai[..., np.newaxis] - hi = np.ones((bs,na,ny,nx,1)) * head_id + hi = np.ones((bs, na, ny, nx, 1)) * head_id - out = np.concatenate((out, hi, ai, x_coors, y_coors), axis=4).reshape(bs, na * ny * nx, -1) + out = np.concatenate((out, hi, ai, x_coors, y_coors), axis=4).reshape( + bs, na * ny * nx, -1 + ) else: # Keypoints if (kpts.shape[1] // 17) == 3: kpts[:, 2::3] = sigmoid(kpts[:, 2::3]) kpts_out = kpts.transpose(0, 2, 1) out = out.reshape(bs, ny * nx, -1) - out = np.concatenate((out, kpts_out), axis = 2) + out = np.concatenate((out, kpts_out), axis=2) return out @@ -171,26 +207,34 @@ def sigmoid(x: np.ndarray) -> np.ndarray: def process_single_mask(protos, mask_coeff, mask_conf): - mask = sigmoid(np.sum(protos * mask_coeff[..., np.newaxis, np.newaxis], axis = 0)) + mask = sigmoid(np.sum(protos * mask_coeff[..., np.newaxis, np.newaxis], axis=0)) return (mask > mask_conf).astype(np.uint8) def parse_kpts(kpts): kps = [] for idx in range(0, kpts.shape[0], 3): - x, y, conf = kpts[idx], kpts[idx+1], kpts[idx+2] + x, y, conf = kpts[idx], kpts[idx + 1], kpts[idx + 2] kps.append((int(x), int(y), conf)) return kps -def decode_yolo_output(yolo_outputs, strides, anchors, kpts=None, conf_thres=0.5, iou_thres=0.45, num_classes=1): +def decode_yolo_output( + yolo_outputs, + strides, + anchors, + kpts=None, + conf_thres=0.5, + iou_thres=0.45, + num_classes=1, +): output = parse_yolo_outputs(yolo_outputs, strides, anchors, kpts) output_nms = non_max_suppression( output, conf_thres=conf_thres, iou_thres=iou_thres, num_classes=num_classes, - kpts_mode=kpts is not None + kpts_mode=kpts is not None, )[0] return output_nms diff --git a/depthai_nodes/ml/parsers/yolo.py b/depthai_nodes/ml/parsers/yolo.py index d1d5a00..c57654d 100644 --- a/depthai_nodes/ml/parsers/yolo.py +++ b/depthai_nodes/ml/parsers/yolo.py @@ -1,4 +1,3 @@ - import depthai as dai import numpy as np @@ -11,12 +10,12 @@ class YOLOParser(dai.node.ThreadedHostNode): def __init__( - self, - confidence_threshold: int = 0.5, - num_classes: int = 1, - iou_threshold: int = 0.5, - mask_conf: float = 0.5 - ): + self, + confidence_threshold: int = 0.5, + num_classes: int = 1, + iou_threshold: int = 0.5, + mask_conf: float = 0.5, + ): """Initialize the YOLOParser node. @param confidence_threshold: The confidence threshold for the detections @@ -74,8 +73,13 @@ def _get_segmentation_outputs(self, nnDataIn): # Get all the layer names layer_names = nnDataIn.getAllLayerNames() mask_outputs = sorted([name for name in layer_names if "_masks" in name]) - masks_outputs_values = [nnDataIn.getTensor(o, dequantize=True).astype(np.float32) for o in mask_outputs] - protos_output = nnDataIn.getTensor("protos_output", dequantize=True).astype(np.float32) + masks_outputs_values = [ + nnDataIn.getTensor(o, dequantize=True).astype(np.float32) + for o in mask_outputs + ] + protos_output = nnDataIn.getTensor("protos_output", dequantize=True).astype( + np.float32 + ) protos_len = protos_output.shape[1] return masks_outputs_values, protos_output, protos_len @@ -83,38 +87,60 @@ def _reshape_seg_outputs(self, protos_output, protos_len, masks_outputs_values): """Reshape the segmentation outputs.""" protos_output = protos_output.transpose((2, 0, 1))[np.newaxis, ...] protos_len = protos_output.shape[1] - masks_outputs_values = [o.transpose((2, 0, 1))[np.newaxis, ...] for o in masks_outputs_values] + masks_outputs_values = [ + o.transpose((2, 0, 1))[np.newaxis, ...] for o in masks_outputs_values + ] return protos_output, protos_len, masks_outputs_values def run(self): while self.isRunning(): try: - nnDataIn : dai.NNData = self.input.get() + nnDataIn: dai.NNData = self.input.get() except dai.MessageQueue.QueueException: - break # Pipeline was stopped, no more data + break # Pipeline was stopped, no more data # Get all the layer names layer_names = nnDataIn.getAllLayerNames() outputs_names = sorted([name for name in layer_names if "_yolo" in name]) - outputs_values = [nnDataIn.getTensor(o, dequantize=True).astype(np.float32) for o in outputs_names] + outputs_values = [ + nnDataIn.getTensor(o, dequantize=True).astype(np.float32) + for o in outputs_names + ] if any("kpt_output" in name for name in layer_names): mode = KPTS_MODE # Get the keypoint outputs - kpts_output_names = sorted([name for name in layer_names if "kpt_output" in name]) - kpts_outputs = [nnDataIn.getTensor(o, dequantize=True).astype(np.float32) for o in kpts_output_names] + kpts_output_names = sorted( + [name for name in layer_names if "kpt_output" in name] + ) + kpts_outputs = [ + nnDataIn.getTensor(o, dequantize=True).astype(np.float32) + for o in kpts_output_names + ] elif any("_masks" in name for name in layer_names): mode = SEG_MODE # Get the segmentation outputs - masks_outputs_values, protos_output, protos_len = self._get_segmentation_outputs(nnDataIn) + ( + masks_outputs_values, + protos_output, + protos_len, + ) = self._get_segmentation_outputs(nnDataIn) if len(outputs_values[0].shape) != 4: # RVC4 - outputs_values = [o.transpose((2, 0, 1))[np.newaxis, ...] for o in outputs_values] + outputs_values = [ + o.transpose((2, 0, 1))[np.newaxis, ...] for o in outputs_values + ] if mode == KPTS_MODE: kpts_outputs = [o[np.newaxis, ...] for o in kpts_outputs] elif mode == SEG_MODE: - protos_output, protos_len, masks_outputs_values = self._reshape_seg_outputs(protos_output, protos_len, masks_outputs_values) + ( + protos_output, + protos_len, + masks_outputs_values, + ) = self._reshape_seg_outputs( + protos_output, protos_len, masks_outputs_values + ) # Decode the outputs results = decode_yolo_output( @@ -124,12 +150,17 @@ def run(self): kpts=kpts_outputs if mode == KPTS_MODE else None, conf_thres=self.confidence_threshold, iou_thres=self.iou_threshold, - num_classes=self.num_classes + num_classes=self.num_classes, ) bboxes, labels, scores, additional_output = [], [], [], [] for i in range(results.shape[0]): - bbox, conf, label, other = results[i, :4].astype(int), results[i, 4], results[i, 5].astype(int), results[i, 6:] + bbox, conf, label, other = ( + results[i, :4].astype(int), + results[i, 4], + results[i, 5].astype(int), + results[i, 6:], + ) bboxes.append(bbox) labels.append(int(label)) @@ -141,8 +172,12 @@ def run(self): elif mode == SEG_MODE: seg_coeff = other.astype(int) hi, ai, xi, yi = seg_coeff - mask_coeff = masks_outputs_values[hi][0, ai*protos_len:(ai+1)*protos_len, yi, xi] - mask = process_single_mask(protos_output[0], mask_coeff, self.mask_conf) + mask_coeff = masks_outputs_values[hi][ + 0, ai * protos_len : (ai + 1) * protos_len, yi, xi + ] + mask = process_single_mask( + protos_output[0], mask_coeff, self.mask_conf + ) additional_output.append(mask) if mode == KPTS_MODE: @@ -150,13 +185,10 @@ def run(self): np.array(bboxes), np.array(scores), labels, - keypoints=additional_output + keypoints=additional_output, ) elif mode == SEG_MODE: detections_message = create_detection_message( - np.array(bboxes), - np.array(scores), - labels, - masks=additional_output + np.array(bboxes), np.array(scores), labels, masks=additional_output ) self.out.send(detections_message)