From d6dc43d4f7ffc8a3497d9b9fd5b9016ccad92833 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ja=C5=A1a=20Kerec?=
 <61207502+kkeroo@users.noreply.github.com>
Date: Tue, 27 Aug 2024 17:07:00 +0200
Subject: [PATCH 1/3] XFeat parser improvement. (#30)

* Supporting no matches.

* Add max_keypoints parameter.
---
 depthai_nodes/ml/parsers/utils/xfeat.py |  3 +++
 depthai_nodes/ml/parsers/xfeat.py       | 35 +++++++++++++++++++++----
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py
index 5d2d62e..364d9b6 100644
--- a/depthai_nodes/ml/parsers/utils/xfeat.py
+++ b/depthai_nodes/ml/parsers/utils/xfeat.py
@@ -225,6 +225,9 @@ def detect_and_compute(
     grid = 2.0 * (mkpts / div_array) - 1.0
     grid = np.expand_dims(grid, axis=2)
 
+    if grid.size == 0:
+        return None
+
     # Numpy implementation of F.grid_sample
     map_x = grid[..., 0].reshape(-1).astype(np.float32)
     map_y = grid[..., 1].reshape(-1).astype(np.float32)
diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py
index 7a061fc..72db15e 100644
--- a/depthai_nodes/ml/parsers/xfeat.py
+++ b/depthai_nodes/ml/parsers/xfeat.py
@@ -20,6 +20,8 @@ class XFeatParser(dai.node.ThreadedHostNode):
         Original image size.
     input_size : Tuple[float, float]
         Input image size.
+    max_keypoints : int
+        Maximum number of keypoints to keep.
     previous_results : np.ndarray
         Previous results from the model. Previous results are used to match keypoints between two frames.
 
@@ -38,6 +40,7 @@ def __init__(
         self,
         original_size: Tuple[float, float] = None,
         input_size: Tuple[float, float] = (640, 352),
+        max_keypoints: int = 4096,
     ):
         """Initializes the XFeatParser node.
 
@@ -51,6 +54,7 @@ def __init__(
         self.out = self.createOutput()
         self.original_size = original_size
         self.input_size = input_size
+        self.max_keypoints = max_keypoints
         self.previous_results = None
 
     def setOriginalSize(self, original_size):
@@ -69,6 +73,14 @@ def setInputSize(self, input_size):
         """
         self.input_size = input_size
 
+    def setMaxKeypoints(self, max_keypoints):
+        """Sets the maximum number of keypoints to keep.
+
+        @param max_keypoints: Maximum number of keypoints.
+        @type max_keypoints: int
+        """
+        self.max_keypoints = max_keypoints
+
     def run(self):
         if self.original_size is None:
             raise ValueError("Original image size must be specified!")
@@ -95,14 +107,27 @@ def run(self):
                 )
 
             result = detect_and_compute(
-                feats, keypoints, resize_rate_w, resize_rate_h, self.input_size
-            )[0]
+                feats,
+                keypoints,
+                resize_rate_w,
+                resize_rate_h,
+                self.input_size,
+                self.max_keypoints,
+            )
+
+            if result is not None:
+                result = result[0]
+            else:
+                matched_points = dai.TrackedFeatures()
+                matched_points.setTimestamp(output.getTimestamp())
+                self.out.send(matched_points)
+                continue
 
             if self.previous_results is not None:
                 mkpts0, mkpts1 = match(self.previous_results, result)
                 matched_points = create_tracked_features_message(mkpts0, mkpts1)
                 matched_points.setTimestamp(output.getTimestamp())
                 self.out.send(matched_points)
-            else:
-                # save the result from first frame
-                self.previous_results = result
+
+            # save the result from first frame
+            self.previous_results = result

From 1043366a97bf81aa3ddcbafdaedcdfd163ada435 Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 08:11:03 +0200
Subject: [PATCH 2/3] Add parsers for HRNet and AgeGender models. (#16)

* feat: add support for age_gender model

* feat: add support for HRNet model

* fix: formatting and structure

* fix: AgeGenderParser formatting and convert age to years

* fix: HRNetParser formatting, remove comments, add normalization

* fix: add timestamps to outgoing messages

* Pre-commit fix.

* Add Classifications msg to AgeGender.

* Docstrings fix.

---------

Co-authored-by: kkeroo <61207502+kkeroo@users.noreply.github.com>
---
 depthai_nodes/ml/messages/__init__.py         |  2 +
 .../ml/messages/creators/__init__.py          |  2 +
 depthai_nodes/ml/messages/creators/misc.py    | 39 +++++++++
 depthai_nodes/ml/messages/misc.py             | 34 ++++++++
 depthai_nodes/ml/parsers/__init__.py          |  4 +
 depthai_nodes/ml/parsers/age_gender.py        | 43 ++++++++++
 depthai_nodes/ml/parsers/hrnet.py             | 80 +++++++++++++++++++
 7 files changed, 204 insertions(+)
 create mode 100644 depthai_nodes/ml/messages/creators/misc.py
 create mode 100644 depthai_nodes/ml/messages/misc.py
 create mode 100644 depthai_nodes/ml/parsers/age_gender.py
 create mode 100644 depthai_nodes/ml/parsers/hrnet.py

diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py
index 81d5876..9cc2183 100644
--- a/depthai_nodes/ml/messages/__init__.py
+++ b/depthai_nodes/ml/messages/__init__.py
@@ -2,6 +2,7 @@
 from .img_detections import ImgDetectionsWithKeypoints, ImgDetectionWithKeypoints
 from .keypoints import HandKeypoints, Keypoints
 from .lines import Line, Lines
+from .misc import AgeGender
 
 __all__ = [
     "ImgDetectionWithKeypoints",
@@ -11,4 +12,5 @@
     "Line",
     "Lines",
     "Classifications",
+    "AgeGender",
 ]
diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py
index 3f15b5e..6750e48 100644
--- a/depthai_nodes/ml/messages/creators/__init__.py
+++ b/depthai_nodes/ml/messages/creators/__init__.py
@@ -3,6 +3,7 @@
 from .detection import create_detection_message, create_line_detection_message
 from .image import create_image_message
 from .keypoints import create_hand_keypoints_message, create_keypoints_message
+from .misc import create_age_gender_message
 from .segmentation import create_segmentation_message
 from .thermal import create_thermal_message
 from .tracked_features import create_tracked_features_message
@@ -18,4 +19,5 @@
     "create_keypoints_message",
     "create_thermal_message",
     "create_classification_message",
+    "create_age_gender_message",
 ]
diff --git a/depthai_nodes/ml/messages/creators/misc.py b/depthai_nodes/ml/messages/creators/misc.py
new file mode 100644
index 0000000..464b309
--- /dev/null
+++ b/depthai_nodes/ml/messages/creators/misc.py
@@ -0,0 +1,39 @@
+from typing import List
+
+from ...messages import AgeGender, Classifications
+
+
+def create_age_gender_message(age: float, gender_prob: List[float]) -> AgeGender:
+    """Create a DepthAI message for the age and gender probability.
+
+    @param age: Detected person age (must be multiplied by 100 to get years).
+    @type age: float
+    @param gender_prob: Detected person gender probability [female, male].
+    @type gender_prob: List[float]
+    @return: AgeGender message containing the predicted person's age and Classifications
+        message containing the classes and probabilities of the predicted gender.
+    @rtype: AgeGender
+    @raise ValueError: If age is not a float.
+    @raise ValueError: If gender_prob is not a list.
+    @raise ValueError: If each item in gender_prob is not a float.
+    """
+
+    if not isinstance(age, float):
+        raise ValueError(f"age should be float, got {type(age)}.")
+
+    if not isinstance(gender_prob, List):
+        raise ValueError(f"gender_prob should be list, got {type(gender_prob)}.")
+    for item in gender_prob:
+        if not isinstance(item, float):
+            raise ValueError(
+                f"gender_prob list values must be of type float, instead got {type(item)}."
+            )
+
+    age_gender_message = AgeGender()
+    age_gender_message.age = age
+    gender = Classifications()
+    gender.classes = ["female", "male"]
+    gender.scores = gender_prob
+    age_gender_message.gender = gender
+
+    return age_gender_message
diff --git a/depthai_nodes/ml/messages/misc.py b/depthai_nodes/ml/messages/misc.py
new file mode 100644
index 0000000..c3aaf83
--- /dev/null
+++ b/depthai_nodes/ml/messages/misc.py
@@ -0,0 +1,34 @@
+import depthai as dai
+
+from ..messages import Classifications
+
+
+class AgeGender(dai.Buffer):
+    def __init__(self):
+        super().__init__()
+        self._age: float = None
+        self._gender = Classifications()
+
+    @property
+    def age(self) -> float:
+        return self._age
+
+    @age.setter
+    def age(self, value: float):
+        if not isinstance(value, float):
+            raise TypeError(
+                f"start_point must be of type float, instead got {type(value)}."
+            )
+        self._age = value
+
+    @property
+    def gender(self) -> Classifications:
+        return self._gender
+
+    @gender.setter
+    def gender(self, value: Classifications):
+        if not isinstance(value, Classifications):
+            raise TypeError(
+                f"gender must be of type Classifications, instead got {type(value)}."
+            )
+        self._gender = value
diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py
index 58dfb43..e167653 100644
--- a/depthai_nodes/ml/parsers/__init__.py
+++ b/depthai_nodes/ml/parsers/__init__.py
@@ -1,4 +1,6 @@
+from .age_gender import AgeGenderParser
 from .classification import ClassificationParser
+from .hrnet import HRNetParser
 from .image_output import ImageOutputParser
 from .keypoints import KeypointParser
 from .mediapipe_hand_landmarker import MPHandLandmarkParser
@@ -26,4 +28,6 @@
     "XFeatParser",
     "ThermalImageParser",
     "ClassificationParser",
+    "AgeGenderParser",
+    "HRNetParser",
 ]
diff --git a/depthai_nodes/ml/parsers/age_gender.py b/depthai_nodes/ml/parsers/age_gender.py
new file mode 100644
index 0000000..212e7e4
--- /dev/null
+++ b/depthai_nodes/ml/parsers/age_gender.py
@@ -0,0 +1,43 @@
+import depthai as dai
+
+from ..messages.creators import create_age_gender_message
+
+
+class AgeGenderParser(dai.node.ThreadedHostNode):
+    """Parser class for parsing the output of the Age-Gender regression model.
+
+    Attributes
+    ----------
+    input : Node.Input
+        Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
+    out : Node.Output
+        Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
+
+    Output Message/s
+    ----------------
+    **Type**: AgeGender
+
+    **Description**: Message containing the detected person age and Classfications object for storing information about the detected person's gender.
+    """
+
+    def __init__(self):
+        """Initializes the AgeGenderParser node."""
+        dai.node.ThreadedHostNode.__init__(self)
+        self.input = dai.Node.Input(self)
+        self.out = dai.Node.Output(self)
+
+    def run(self):
+        while self.isRunning():
+            try:
+                output: dai.NNData = self.input.get()
+            except dai.MessageQueue.QueueException:
+                break  # Pipeline was stopped
+
+            age = output.getTensor("age_conv3", dequantize=True).item()
+            age *= 100  # convert to years
+            prob = output.getTensor("prob", dequantize=True).flatten().tolist()
+
+            age_gender_message = create_age_gender_message(age=age, gender_prob=prob)
+            age_gender_message.setTimestamp(output.getTimestamp())
+
+            self.out.send(age_gender_message)
diff --git a/depthai_nodes/ml/parsers/hrnet.py b/depthai_nodes/ml/parsers/hrnet.py
new file mode 100644
index 0000000..be91d1c
--- /dev/null
+++ b/depthai_nodes/ml/parsers/hrnet.py
@@ -0,0 +1,80 @@
+import depthai as dai
+import numpy as np
+
+from ..messages.creators import create_keypoints_message
+
+
+class HRNetParser(dai.node.ThreadedHostNode):
+    """Parser class for parsing the output of the HRNet pose estimation model. The code is inspired by https://github.com/ibaiGorordo/ONNX-HRNET-Human-Pose-Estimation.
+
+    Attributes
+    ----------
+    input : Node.Input
+        Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
+    out : Node.Output
+        Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
+    score_threshold : float
+        Confidence score threshold for detected keypoints.
+
+    Output Message/s
+    ----------------
+    **Type**: Keypoints
+
+    **Description**: Keypoints message containing detected body keypoints.
+    """
+
+    def __init__(self, score_threshold=0.5):
+        """Initializes the HRNetParser node.
+
+        @param score_threshold: Confidence score threshold for detected keypoints.
+        @type score_threshold: float
+        """
+        dai.node.ThreadedHostNode.__init__(self)
+        self.input = dai.Node.Input(self)
+        self.out = dai.Node.Output(self)
+
+        self.score_threshold = score_threshold
+
+    def setScoreThreshold(self, threshold):
+        """Sets the confidence score threshold for the detected body keypoints.
+
+        @param threshold: Confidence score threshold for detected keypoints.
+        @type threshold: float
+        """
+        self.score_threshold = threshold
+
+    def run(self):
+        while self.isRunning():
+            try:
+                output: dai.NNData = self.input.get()
+            except dai.MessageQueue.QueueException:
+                break  # Pipeline was stopped
+
+            heatmaps = output.getTensor("heatmaps", dequantize=True)
+
+            if len(heatmaps.shape) == 4:
+                heatmaps = heatmaps[0]
+            if heatmaps.shape[2] == 16:  # HW_ instead of _HW
+                heatmaps = heatmaps.transpose(2, 0, 1)
+            _, map_h, map_w = heatmaps.shape
+
+            scores = np.array([np.max(heatmap) for heatmap in heatmaps])
+            keypoints = np.array(
+                [
+                    np.unravel_index(heatmap.argmax(), heatmap.shape)
+                    for heatmap in heatmaps
+                ]
+            )
+            keypoints = keypoints.astype(np.float32)
+            keypoints = keypoints[:, ::-1] / np.array(
+                [map_w, map_h]
+            )  # normalize keypoints to [0, 1]
+
+            keypoints_message = create_keypoints_message(
+                keypoints=keypoints,
+                scores=scores,
+                confidence_threshold=self.score_threshold,
+            )
+            keypoints_message.setTimestamp(output.getTimestamp())
+
+            self.out.send(keypoints_message)

From 9b4aff5b0ba9a1548dd3ca59c988397a0b716549 Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 08:53:09 +0200
Subject: [PATCH 3/3] RVC4 support for parsers v2 (#14)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: YuNetParser dequantization

* fix: YuNetParser input size estimation

* fix: ImageOutputParser dequantization

* style: pre-commit formatting

* fix: MonocularDepthParser dequantization

* fix: ensure depth map is a 2D array

* fix: MonocularDepthParser output reshaping due to dequantization

* Precommit fix.

* Support for 4D tensors - RVC2

---------

Co-authored-by: Jaša Kerec <61207502+kkeroo@users.noreply.github.com>
---
 depthai_nodes/ml/messages/creators/depth.py | 18 +++-----------
 depthai_nodes/ml/parsers/image_output.py    |  1 +
 depthai_nodes/ml/parsers/monocular_depth.py | 17 +++++++++++--
 depthai_nodes/ml/parsers/yunet.py           | 27 +++++++++++++++++----
 4 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py
index dabb44f..69ab60e 100644
--- a/depthai_nodes/ml/messages/creators/depth.py
+++ b/depthai_nodes/ml/messages/creators/depth.py
@@ -11,7 +11,7 @@ def create_depth_message(
 ) -> dai.ImgFrame:
     """Create a DepthAI message for a depth map.
 
-    @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC).
+    @param depth_map: A NumPy array representing the depth map with shape (HW).
     @type depth_map: np.array
     @param depth_type: A string indicating the type of depth map. It can either be
         'relative' or 'metric'.
@@ -19,24 +19,14 @@ def create_depth_message(
     @return: An ImgFrame object containing the depth information.
     @rtype: dai.ImgFrame
     @raise ValueError: If the depth map is not a NumPy array.
-    @raise ValueError: If the depth map is not 3D.
-    @raise ValueError: If the depth map shape is not CHW or HWC.
+    @raise ValueError: If the depth map is not 2D.
     @raise ValueError: If the depth type is not 'relative' or 'metric'.
     """
 
     if not isinstance(depth_map, np.ndarray):
         raise ValueError(f"Expected numpy array, got {type(depth_map)}.")
-    if len(depth_map.shape) != 3:
-        raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.")
-
-    if depth_map.shape[0] == 1:
-        depth_map = depth_map[0, :, :]  # CHW to HW
-    elif depth_map.shape[2] == 1:
-        depth_map = depth_map[:, :, 0]  # HWC to HW
-    else:
-        raise ValueError(
-            "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape
-        )
+    if len(depth_map.shape) != 2:
+        raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.")
 
     if depth_type == "relative":
         data_type = dai.ImgFrame.Type.RAW16
diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py
index b159303..4ae4e50 100644
--- a/depthai_nodes/ml/parsers/image_output.py
+++ b/depthai_nodes/ml/parsers/image_output.py
@@ -58,6 +58,7 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
+
             output_image = output.getTensor(output_layer_names[0], dequantize=True)
 
             if len(output_image.shape) == 4:
diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py
index c76a8b6..5e47b68 100644
--- a/depthai_nodes/ml/parsers/monocular_depth.py
+++ b/depthai_nodes/ml/parsers/monocular_depth.py
@@ -58,9 +58,22 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
-            depth_map = output.getTensor(output_layer_names[0])
 
-            depth_map = depth_map[0]
+            output_map = output.getTensor(output_layer_names[0], dequantize=True)
+
+            if len(output_map.shape) == 3:
+                if output_map.shape[0] == 1:
+                    depth_map = output_map[0]
+                elif output_map.shape[2] == 1:
+                    depth_map = output_map[:, :, 0]
+            elif len(output_map.shape) == 2:
+                depth_map = output_map
+            elif len(output_map.shape) == 4:
+                depth_map = output_map[0][0]
+            else:
+                raise ValueError(
+                    f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional",
+                )
 
             depth_message = create_depth_message(
                 depth_map=depth_map,
diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py
index 4db4ca9..c8ee782 100644
--- a/depthai_nodes/ml/parsers/yunet.py
+++ b/depthai_nodes/ml/parsers/yunet.py
@@ -98,7 +98,13 @@ def run(self):
 
             # get input_size
             stride0 = strides[0]
-            _, spatial_positions0, _ = output.getTensor(f"cls_{stride0}").shape
+            cls_stride0_shape = output.getTensor(
+                f"cls_{stride0}", dequantize=True
+            ).shape
+            if len(cls_stride0_shape) == 3:
+                _, spatial_positions0, _ = cls_stride0_shape
+            elif len(cls_stride0_shape) == 2:
+                spatial_positions0, _ = cls_stride0_shape
             input_width = input_height = int(
                 math.sqrt(spatial_positions0) * stride0
             )  # TODO: We assume a square input size. How to get input size when height and width are not equal?
@@ -106,10 +112,21 @@ def run(self):
 
             detections = []
             for stride in strides:
-                cls = output.getTensor(f"cls_{stride}").squeeze(0)
-                obj = output.getTensor(f"obj_{stride}").flatten()
-                bbox = output.getTensor(f"bbox_{stride}").squeeze(0)
-                kps = output.getTensor(f"kps_{stride}").squeeze(0)
+                cls = output.getTensor(f"cls_{stride}", dequantize=True)
+                cls = cls.astype(np.float32)
+                cls = cls.squeeze(0) if cls.shape[0] == 1 else cls
+
+                obj = output.getTensor(f"obj_{stride}", dequantize=True).flatten()
+                obj = obj.astype(np.float32)
+
+                bbox = output.getTensor(f"bbox_{stride}", dequantize=True)
+                bbox = bbox.astype(np.float32)
+                bbox = bbox.squeeze(0) if bbox.shape[0] == 1 else bbox
+
+                kps = output.getTensor(f"kps_{stride}", dequantize=True)
+                kps = kps.astype(np.float32)
+                kps = kps.squeeze(0) if kps.shape[0] == 1 else kps
+
                 detections += decode_detections(
                     input_size,
                     stride,