From d6dc43d4f7ffc8a3497d9b9fd5b9016ccad92833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:07:00 +0200 Subject: [PATCH 1/3] XFeat parser improvement. (#30) * Supporting no matches. * Add max_keypoints parameter. --- depthai_nodes/ml/parsers/utils/xfeat.py | 3 +++ depthai_nodes/ml/parsers/xfeat.py | 35 +++++++++++++++++++++---- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py index 5d2d62e..364d9b6 100644 --- a/depthai_nodes/ml/parsers/utils/xfeat.py +++ b/depthai_nodes/ml/parsers/utils/xfeat.py @@ -225,6 +225,9 @@ def detect_and_compute( grid = 2.0 * (mkpts / div_array) - 1.0 grid = np.expand_dims(grid, axis=2) + if grid.size == 0: + return None + # Numpy implementation of F.grid_sample map_x = grid[..., 0].reshape(-1).astype(np.float32) map_y = grid[..., 1].reshape(-1).astype(np.float32) diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 7a061fc..72db15e 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -20,6 +20,8 @@ class XFeatParser(dai.node.ThreadedHostNode): Original image size. input_size : Tuple[float, float] Input image size. + max_keypoints : int + Maximum number of keypoints to keep. previous_results : np.ndarray Previous results from the model. Previous results are used to match keypoints between two frames. @@ -38,6 +40,7 @@ def __init__( self, original_size: Tuple[float, float] = None, input_size: Tuple[float, float] = (640, 352), + max_keypoints: int = 4096, ): """Initializes the XFeatParser node. @@ -51,6 +54,7 @@ def __init__( self.out = self.createOutput() self.original_size = original_size self.input_size = input_size + self.max_keypoints = max_keypoints self.previous_results = None def setOriginalSize(self, original_size): @@ -69,6 +73,14 @@ def setInputSize(self, input_size): """ self.input_size = input_size + def setMaxKeypoints(self, max_keypoints): + """Sets the maximum number of keypoints to keep. + + @param max_keypoints: Maximum number of keypoints. + @type max_keypoints: int + """ + self.max_keypoints = max_keypoints + def run(self): if self.original_size is None: raise ValueError("Original image size must be specified!") @@ -95,14 +107,27 @@ def run(self): ) result = detect_and_compute( - feats, keypoints, resize_rate_w, resize_rate_h, self.input_size - )[0] + feats, + keypoints, + resize_rate_w, + resize_rate_h, + self.input_size, + self.max_keypoints, + ) + + if result is not None: + result = result[0] + else: + matched_points = dai.TrackedFeatures() + matched_points.setTimestamp(output.getTimestamp()) + self.out.send(matched_points) + continue if self.previous_results is not None: mkpts0, mkpts1 = match(self.previous_results, result) matched_points = create_tracked_features_message(mkpts0, mkpts1) matched_points.setTimestamp(output.getTimestamp()) self.out.send(matched_points) - else: - # save the result from first frame - self.previous_results = result + + # save the result from first frame + self.previous_results = result From 1043366a97bf81aa3ddcbafdaedcdfd163ada435 Mon Sep 17 00:00:00 2001 From: jkbmrz <74824974+jkbmrz@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:11:03 +0200 Subject: [PATCH 2/3] Add parsers for HRNet and AgeGender models. (#16) * feat: add support for age_gender model * feat: add support for HRNet model * fix: formatting and structure * fix: AgeGenderParser formatting and convert age to years * fix: HRNetParser formatting, remove comments, add normalization * fix: add timestamps to outgoing messages * Pre-commit fix. * Add Classifications msg to AgeGender. * Docstrings fix. --------- Co-authored-by: kkeroo <61207502+kkeroo@users.noreply.github.com> --- depthai_nodes/ml/messages/__init__.py | 2 + .../ml/messages/creators/__init__.py | 2 + depthai_nodes/ml/messages/creators/misc.py | 39 +++++++++ depthai_nodes/ml/messages/misc.py | 34 ++++++++ depthai_nodes/ml/parsers/__init__.py | 4 + depthai_nodes/ml/parsers/age_gender.py | 43 ++++++++++ depthai_nodes/ml/parsers/hrnet.py | 80 +++++++++++++++++++ 7 files changed, 204 insertions(+) create mode 100644 depthai_nodes/ml/messages/creators/misc.py create mode 100644 depthai_nodes/ml/messages/misc.py create mode 100644 depthai_nodes/ml/parsers/age_gender.py create mode 100644 depthai_nodes/ml/parsers/hrnet.py diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py index 81d5876..9cc2183 100644 --- a/depthai_nodes/ml/messages/__init__.py +++ b/depthai_nodes/ml/messages/__init__.py @@ -2,6 +2,7 @@ from .img_detections import ImgDetectionsWithKeypoints, ImgDetectionWithKeypoints from .keypoints import HandKeypoints, Keypoints from .lines import Line, Lines +from .misc import AgeGender __all__ = [ "ImgDetectionWithKeypoints", @@ -11,4 +12,5 @@ "Line", "Lines", "Classifications", + "AgeGender", ] diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index 3f15b5e..6750e48 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -3,6 +3,7 @@ from .detection import create_detection_message, create_line_detection_message from .image import create_image_message from .keypoints import create_hand_keypoints_message, create_keypoints_message +from .misc import create_age_gender_message from .segmentation import create_segmentation_message from .thermal import create_thermal_message from .tracked_features import create_tracked_features_message @@ -18,4 +19,5 @@ "create_keypoints_message", "create_thermal_message", "create_classification_message", + "create_age_gender_message", ] diff --git a/depthai_nodes/ml/messages/creators/misc.py b/depthai_nodes/ml/messages/creators/misc.py new file mode 100644 index 0000000..464b309 --- /dev/null +++ b/depthai_nodes/ml/messages/creators/misc.py @@ -0,0 +1,39 @@ +from typing import List + +from ...messages import AgeGender, Classifications + + +def create_age_gender_message(age: float, gender_prob: List[float]) -> AgeGender: + """Create a DepthAI message for the age and gender probability. + + @param age: Detected person age (must be multiplied by 100 to get years). + @type age: float + @param gender_prob: Detected person gender probability [female, male]. + @type gender_prob: List[float] + @return: AgeGender message containing the predicted person's age and Classifications + message containing the classes and probabilities of the predicted gender. + @rtype: AgeGender + @raise ValueError: If age is not a float. + @raise ValueError: If gender_prob is not a list. + @raise ValueError: If each item in gender_prob is not a float. + """ + + if not isinstance(age, float): + raise ValueError(f"age should be float, got {type(age)}.") + + if not isinstance(gender_prob, List): + raise ValueError(f"gender_prob should be list, got {type(gender_prob)}.") + for item in gender_prob: + if not isinstance(item, float): + raise ValueError( + f"gender_prob list values must be of type float, instead got {type(item)}." + ) + + age_gender_message = AgeGender() + age_gender_message.age = age + gender = Classifications() + gender.classes = ["female", "male"] + gender.scores = gender_prob + age_gender_message.gender = gender + + return age_gender_message diff --git a/depthai_nodes/ml/messages/misc.py b/depthai_nodes/ml/messages/misc.py new file mode 100644 index 0000000..c3aaf83 --- /dev/null +++ b/depthai_nodes/ml/messages/misc.py @@ -0,0 +1,34 @@ +import depthai as dai + +from ..messages import Classifications + + +class AgeGender(dai.Buffer): + def __init__(self): + super().__init__() + self._age: float = None + self._gender = Classifications() + + @property + def age(self) -> float: + return self._age + + @age.setter + def age(self, value: float): + if not isinstance(value, float): + raise TypeError( + f"start_point must be of type float, instead got {type(value)}." + ) + self._age = value + + @property + def gender(self) -> Classifications: + return self._gender + + @gender.setter + def gender(self, value: Classifications): + if not isinstance(value, Classifications): + raise TypeError( + f"gender must be of type Classifications, instead got {type(value)}." + ) + self._gender = value diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py index 58dfb43..e167653 100644 --- a/depthai_nodes/ml/parsers/__init__.py +++ b/depthai_nodes/ml/parsers/__init__.py @@ -1,4 +1,6 @@ +from .age_gender import AgeGenderParser from .classification import ClassificationParser +from .hrnet import HRNetParser from .image_output import ImageOutputParser from .keypoints import KeypointParser from .mediapipe_hand_landmarker import MPHandLandmarkParser @@ -26,4 +28,6 @@ "XFeatParser", "ThermalImageParser", "ClassificationParser", + "AgeGenderParser", + "HRNetParser", ] diff --git a/depthai_nodes/ml/parsers/age_gender.py b/depthai_nodes/ml/parsers/age_gender.py new file mode 100644 index 0000000..212e7e4 --- /dev/null +++ b/depthai_nodes/ml/parsers/age_gender.py @@ -0,0 +1,43 @@ +import depthai as dai + +from ..messages.creators import create_age_gender_message + + +class AgeGenderParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the Age-Gender regression model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + + Output Message/s + ---------------- + **Type**: AgeGender + + **Description**: Message containing the detected person age and Classfications object for storing information about the detected person's gender. + """ + + def __init__(self): + """Initializes the AgeGenderParser node.""" + dai.node.ThreadedHostNode.__init__(self) + self.input = dai.Node.Input(self) + self.out = dai.Node.Output(self) + + def run(self): + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + age = output.getTensor("age_conv3", dequantize=True).item() + age *= 100 # convert to years + prob = output.getTensor("prob", dequantize=True).flatten().tolist() + + age_gender_message = create_age_gender_message(age=age, gender_prob=prob) + age_gender_message.setTimestamp(output.getTimestamp()) + + self.out.send(age_gender_message) diff --git a/depthai_nodes/ml/parsers/hrnet.py b/depthai_nodes/ml/parsers/hrnet.py new file mode 100644 index 0000000..be91d1c --- /dev/null +++ b/depthai_nodes/ml/parsers/hrnet.py @@ -0,0 +1,80 @@ +import depthai as dai +import numpy as np + +from ..messages.creators import create_keypoints_message + + +class HRNetParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the HRNet pose estimation model. The code is inspired by https://github.com/ibaiGorordo/ONNX-HRNET-Human-Pose-Estimation. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected keypoints. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing detected body keypoints. + """ + + def __init__(self, score_threshold=0.5): + """Initializes the HRNetParser node. + + @param score_threshold: Confidence score threshold for detected keypoints. + @type score_threshold: float + """ + dai.node.ThreadedHostNode.__init__(self) + self.input = dai.Node.Input(self) + self.out = dai.Node.Output(self) + + self.score_threshold = score_threshold + + def setScoreThreshold(self, threshold): + """Sets the confidence score threshold for the detected body keypoints. + + @param threshold: Confidence score threshold for detected keypoints. + @type threshold: float + """ + self.score_threshold = threshold + + def run(self): + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + heatmaps = output.getTensor("heatmaps", dequantize=True) + + if len(heatmaps.shape) == 4: + heatmaps = heatmaps[0] + if heatmaps.shape[2] == 16: # HW_ instead of _HW + heatmaps = heatmaps.transpose(2, 0, 1) + _, map_h, map_w = heatmaps.shape + + scores = np.array([np.max(heatmap) for heatmap in heatmaps]) + keypoints = np.array( + [ + np.unravel_index(heatmap.argmax(), heatmap.shape) + for heatmap in heatmaps + ] + ) + keypoints = keypoints.astype(np.float32) + keypoints = keypoints[:, ::-1] / np.array( + [map_w, map_h] + ) # normalize keypoints to [0, 1] + + keypoints_message = create_keypoints_message( + keypoints=keypoints, + scores=scores, + confidence_threshold=self.score_threshold, + ) + keypoints_message.setTimestamp(output.getTimestamp()) + + self.out.send(keypoints_message) From 9b4aff5b0ba9a1548dd3ca59c988397a0b716549 Mon Sep 17 00:00:00 2001 From: jkbmrz <74824974+jkbmrz@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:53:09 +0200 Subject: [PATCH 3/3] RVC4 support for parsers v2 (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: YuNetParser dequantization * fix: YuNetParser input size estimation * fix: ImageOutputParser dequantization * style: pre-commit formatting * fix: MonocularDepthParser dequantization * fix: ensure depth map is a 2D array * fix: MonocularDepthParser output reshaping due to dequantization * Precommit fix. * Support for 4D tensors - RVC2 --------- Co-authored-by: Jaša Kerec <61207502+kkeroo@users.noreply.github.com> --- depthai_nodes/ml/messages/creators/depth.py | 18 +++----------- depthai_nodes/ml/parsers/image_output.py | 1 + depthai_nodes/ml/parsers/monocular_depth.py | 17 +++++++++++-- depthai_nodes/ml/parsers/yunet.py | 27 +++++++++++++++++---- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py index dabb44f..69ab60e 100644 --- a/depthai_nodes/ml/messages/creators/depth.py +++ b/depthai_nodes/ml/messages/creators/depth.py @@ -11,7 +11,7 @@ def create_depth_message( ) -> dai.ImgFrame: """Create a DepthAI message for a depth map. - @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC). + @param depth_map: A NumPy array representing the depth map with shape (HW). @type depth_map: np.array @param depth_type: A string indicating the type of depth map. It can either be 'relative' or 'metric'. @@ -19,24 +19,14 @@ def create_depth_message( @return: An ImgFrame object containing the depth information. @rtype: dai.ImgFrame @raise ValueError: If the depth map is not a NumPy array. - @raise ValueError: If the depth map is not 3D. - @raise ValueError: If the depth map shape is not CHW or HWC. + @raise ValueError: If the depth map is not 2D. @raise ValueError: If the depth type is not 'relative' or 'metric'. """ if not isinstance(depth_map, np.ndarray): raise ValueError(f"Expected numpy array, got {type(depth_map)}.") - if len(depth_map.shape) != 3: - raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.") - - if depth_map.shape[0] == 1: - depth_map = depth_map[0, :, :] # CHW to HW - elif depth_map.shape[2] == 1: - depth_map = depth_map[:, :, 0] # HWC to HW - else: - raise ValueError( - "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape - ) + if len(depth_map.shape) != 2: + raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.") if depth_type == "relative": data_type = dai.ImgFrame.Type.RAW16 diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index b159303..4ae4e50 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -58,6 +58,7 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) + output_image = output.getTensor(output_layer_names[0], dequantize=True) if len(output_image.shape) == 4: diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index c76a8b6..5e47b68 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -58,9 +58,22 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) - depth_map = output.getTensor(output_layer_names[0]) - depth_map = depth_map[0] + output_map = output.getTensor(output_layer_names[0], dequantize=True) + + if len(output_map.shape) == 3: + if output_map.shape[0] == 1: + depth_map = output_map[0] + elif output_map.shape[2] == 1: + depth_map = output_map[:, :, 0] + elif len(output_map.shape) == 2: + depth_map = output_map + elif len(output_map.shape) == 4: + depth_map = output_map[0][0] + else: + raise ValueError( + f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional", + ) depth_message = create_depth_message( depth_map=depth_map, diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index 4db4ca9..c8ee782 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -98,7 +98,13 @@ def run(self): # get input_size stride0 = strides[0] - _, spatial_positions0, _ = output.getTensor(f"cls_{stride0}").shape + cls_stride0_shape = output.getTensor( + f"cls_{stride0}", dequantize=True + ).shape + if len(cls_stride0_shape) == 3: + _, spatial_positions0, _ = cls_stride0_shape + elif len(cls_stride0_shape) == 2: + spatial_positions0, _ = cls_stride0_shape input_width = input_height = int( math.sqrt(spatial_positions0) * stride0 ) # TODO: We assume a square input size. How to get input size when height and width are not equal? @@ -106,10 +112,21 @@ def run(self): detections = [] for stride in strides: - cls = output.getTensor(f"cls_{stride}").squeeze(0) - obj = output.getTensor(f"obj_{stride}").flatten() - bbox = output.getTensor(f"bbox_{stride}").squeeze(0) - kps = output.getTensor(f"kps_{stride}").squeeze(0) + cls = output.getTensor(f"cls_{stride}", dequantize=True) + cls = cls.astype(np.float32) + cls = cls.squeeze(0) if cls.shape[0] == 1 else cls + + obj = output.getTensor(f"obj_{stride}", dequantize=True).flatten() + obj = obj.astype(np.float32) + + bbox = output.getTensor(f"bbox_{stride}", dequantize=True) + bbox = bbox.astype(np.float32) + bbox = bbox.squeeze(0) if bbox.shape[0] == 1 else bbox + + kps = output.getTensor(f"kps_{stride}", dequantize=True) + kps = kps.astype(np.float32) + kps = kps.squeeze(0) if kps.shape[0] == 1 else kps + detections += decode_detections( input_size, stride,