Merge branch 'main' into feat/add-yolo-seg-kpts-parser

luxonis · Aug 29, 2024 · 0d4bec0 · 0d4bec0
2 parents cfeb93e + 9b4aff5
commit 0d4bec0
Show file tree

Hide file tree

Showing 13 changed files with 279 additions and 26 deletions.
diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py
@@ -3,6 +3,7 @@
 from .keypoints import HandKeypoints, Keypoints
 from .lines import Line, Lines
 from .segmentation import SegmentationMasks
+from .misc import AgeGender
 
 __all__ = [
     "ImgDetectionWithAdditionalOutput",
@@ -13,4 +14,5 @@
     "Lines",
     "Classifications",
     "SegmentationMasks",
+    "AgeGender",
 ]
diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py
@@ -4,6 +4,7 @@
 from .image import create_image_message
 from .keypoints import create_hand_keypoints_message, create_keypoints_message
 from .segmentation import create_segmentation_message, create_sam_message
+from .misc import create_age_gender_message
 from .thermal import create_thermal_message
 from .tracked_features import create_tracked_features_message
 
@@ -19,4 +20,5 @@
     "create_thermal_message",
     "create_classification_message",
     "create_sam_message",
+    "create_age_gender_message",
 ]
diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py
@@ -11,32 +11,22 @@ def create_depth_message(
 ) -> dai.ImgFrame:
     """Create a DepthAI message for a depth map.
 
-    @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC).
+    @param depth_map: A NumPy array representing the depth map with shape (HW).
     @type depth_map: np.array
     @param depth_type: A string indicating the type of depth map. It can either be
         'relative' or 'metric'.
     @type depth_type: Literal['relative', 'metric']
     @return: An ImgFrame object containing the depth information.
     @rtype: dai.ImgFrame
     @raise ValueError: If the depth map is not a NumPy array.
-    @raise ValueError: If the depth map is not 3D.
-    @raise ValueError: If the depth map shape is not CHW or HWC.
+    @raise ValueError: If the depth map is not 2D.
     @raise ValueError: If the depth type is not 'relative' or 'metric'.
     """
 
     if not isinstance(depth_map, np.ndarray):
         raise ValueError(f"Expected numpy array, got {type(depth_map)}.")
-    if len(depth_map.shape) != 3:
-        raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.")
-
-    if depth_map.shape[0] == 1:
-        depth_map = depth_map[0, :, :]  # CHW to HW
-    elif depth_map.shape[2] == 1:
-        depth_map = depth_map[:, :, 0]  # HWC to HW
-    else:
-        raise ValueError(
-            "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape
-        )
+    if len(depth_map.shape) != 2:
+        raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.")
 
     if depth_type == "relative":
         data_type = dai.ImgFrame.Type.RAW16

diff --git a/depthai_nodes/ml/messages/creators/misc.py b/depthai_nodes/ml/messages/creators/misc.py
@@ -0,0 +1,39 @@
+from typing import List
+
+from ...messages import AgeGender, Classifications
+
+
+def create_age_gender_message(age: float, gender_prob: List[float]) -> AgeGender:
+    """Create a DepthAI message for the age and gender probability.
+
+    @param age: Detected person age (must be multiplied by 100 to get years).
+    @type age: float
+    @param gender_prob: Detected person gender probability [female, male].
+    @type gender_prob: List[float]
+    @return: AgeGender message containing the predicted person's age and Classifications
+        message containing the classes and probabilities of the predicted gender.
+    @rtype: AgeGender
+    @raise ValueError: If age is not a float.
+    @raise ValueError: If gender_prob is not a list.
+    @raise ValueError: If each item in gender_prob is not a float.
+    """
+
+    if not isinstance(age, float):
+        raise ValueError(f"age should be float, got {type(age)}.")
+
+    if not isinstance(gender_prob, List):
+        raise ValueError(f"gender_prob should be list, got {type(gender_prob)}.")
+    for item in gender_prob:
+        if not isinstance(item, float):
+            raise ValueError(
+                f"gender_prob list values must be of type float, instead got {type(item)}."
+            )
+
+    age_gender_message = AgeGender()
+    age_gender_message.age = age
+    gender = Classifications()
+    gender.classes = ["female", "male"]
+    gender.scores = gender_prob
+    age_gender_message.gender = gender
+
+    return age_gender_message
diff --git a/depthai_nodes/ml/messages/misc.py b/depthai_nodes/ml/messages/misc.py
@@ -0,0 +1,34 @@
+import depthai as dai
+
+from ..messages import Classifications
+
+
+class AgeGender(dai.Buffer):
+    def __init__(self):
+        super().__init__()
+        self._age: float = None
+        self._gender = Classifications()
+
+    @property
+    def age(self) -> float:
+        return self._age
+
+    @age.setter
+    def age(self, value: float):
+        if not isinstance(value, float):
+            raise TypeError(
+                f"start_point must be of type float, instead got {type(value)}."
+            )
+        self._age = value
+
+    @property
+    def gender(self) -> Classifications:
+        return self._gender
+
+    @gender.setter
+    def gender(self, value: Classifications):
+        if not isinstance(value, Classifications):
+            raise TypeError(
+                f"gender must be of type Classifications, instead got {type(value)}."
+            )
+        self._gender = value
diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py
@@ -1,4 +1,6 @@
+from .age_gender import AgeGenderParser
 from .classification import ClassificationParser
+from .hrnet import HRNetParser
 from .image_output import ImageOutputParser
 from .keypoints import KeypointParser
 from .mediapipe_hand_landmarker import MPHandLandmarkParser
@@ -30,4 +32,6 @@
     "ClassificationParser",
     "YOLOParser",
     "FastSAMParser",
+    "AgeGenderParser",
+    "HRNetParser",
 ]
diff --git a/depthai_nodes/ml/parsers/age_gender.py b/depthai_nodes/ml/parsers/age_gender.py
@@ -0,0 +1,43 @@
+import depthai as dai
+
+from ..messages.creators import create_age_gender_message
+
+
+class AgeGenderParser(dai.node.ThreadedHostNode):
+    """Parser class for parsing the output of the Age-Gender regression model.
+
+    Attributes
+    ----------
+    input : Node.Input
+        Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
+    out : Node.Output
+        Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
+
+    Output Message/s
+    ----------------
+    **Type**: AgeGender
+
+    **Description**: Message containing the detected person age and Classfications object for storing information about the detected person's gender.
+    """
+
+    def __init__(self):
+        """Initializes the AgeGenderParser node."""
+        dai.node.ThreadedHostNode.__init__(self)
+        self.input = dai.Node.Input(self)
+        self.out = dai.Node.Output(self)
+
+    def run(self):
+        while self.isRunning():
+            try:
+                output: dai.NNData = self.input.get()
+            except dai.MessageQueue.QueueException:
+                break  # Pipeline was stopped
+
+            age = output.getTensor("age_conv3", dequantize=True).item()
+            age *= 100  # convert to years
+            prob = output.getTensor("prob", dequantize=True).flatten().tolist()
+
+            age_gender_message = create_age_gender_message(age=age, gender_prob=prob)
+            age_gender_message.setTimestamp(output.getTimestamp())
+
+            self.out.send(age_gender_message)
diff --git a/depthai_nodes/ml/parsers/hrnet.py b/depthai_nodes/ml/parsers/hrnet.py
@@ -0,0 +1,80 @@
+import depthai as dai
+import numpy as np
+
+from ..messages.creators import create_keypoints_message
+
+
+class HRNetParser(dai.node.ThreadedHostNode):
+    """Parser class for parsing the output of the HRNet pose estimation model. The code is inspired by https://github.com/ibaiGorordo/ONNX-HRNET-Human-Pose-Estimation.
+
+    Attributes
+    ----------
+    input : Node.Input
+        Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
+    out : Node.Output
+        Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
+    score_threshold : float
+        Confidence score threshold for detected keypoints.
+
+    Output Message/s
+    ----------------
+    **Type**: Keypoints
+
+    **Description**: Keypoints message containing detected body keypoints.
+    """
+
+    def __init__(self, score_threshold=0.5):
+        """Initializes the HRNetParser node.
+
+        @param score_threshold: Confidence score threshold for detected keypoints.
+        @type score_threshold: float
+        """
+        dai.node.ThreadedHostNode.__init__(self)
+        self.input = dai.Node.Input(self)
+        self.out = dai.Node.Output(self)
+
+        self.score_threshold = score_threshold
+
+    def setScoreThreshold(self, threshold):
+        """Sets the confidence score threshold for the detected body keypoints.
+
+        @param threshold: Confidence score threshold for detected keypoints.
+        @type threshold: float
+        """
+        self.score_threshold = threshold
+
+    def run(self):
+        while self.isRunning():
+            try:
+                output: dai.NNData = self.input.get()
+            except dai.MessageQueue.QueueException:
+                break  # Pipeline was stopped
+
+            heatmaps = output.getTensor("heatmaps", dequantize=True)
+
+            if len(heatmaps.shape) == 4:
+                heatmaps = heatmaps[0]
+            if heatmaps.shape[2] == 16:  # HW_ instead of _HW
+                heatmaps = heatmaps.transpose(2, 0, 1)
+            _, map_h, map_w = heatmaps.shape
+
+            scores = np.array([np.max(heatmap) for heatmap in heatmaps])
+            keypoints = np.array(
+                [
+                    np.unravel_index(heatmap.argmax(), heatmap.shape)
+                    for heatmap in heatmaps
+                ]
+            )
+            keypoints = keypoints.astype(np.float32)
+            keypoints = keypoints[:, ::-1] / np.array(
+                [map_w, map_h]
+            )  # normalize keypoints to [0, 1]
+
+            keypoints_message = create_keypoints_message(
+                keypoints=keypoints,
+                scores=scores,
+                confidence_threshold=self.score_threshold,
+            )
+            keypoints_message.setTimestamp(output.getTimestamp())
+
+            self.out.send(keypoints_message)
diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py
@@ -58,6 +58,7 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
+
             output_image = output.getTensor(output_layer_names[0], dequantize=True)
 
             if len(output_image.shape) == 4:

diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py
@@ -58,9 +58,22 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
-            depth_map = output.getTensor(output_layer_names[0])
 
-            depth_map = depth_map[0]
+            output_map = output.getTensor(output_layer_names[0], dequantize=True)
+
+            if len(output_map.shape) == 3:
+                if output_map.shape[0] == 1:
+                    depth_map = output_map[0]
+                elif output_map.shape[2] == 1:
+                    depth_map = output_map[:, :, 0]
+            elif len(output_map.shape) == 2:
+                depth_map = output_map
+            elif len(output_map.shape) == 4:
+                depth_map = output_map[0][0]
+            else:
+                raise ValueError(
+                    f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional",
+                )
 
             depth_message = create_depth_message(
                 depth_map=depth_map,

diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py
@@ -225,6 +225,9 @@ def detect_and_compute(
     grid = 2.0 * (mkpts / div_array) - 1.0
     grid = np.expand_dims(grid, axis=2)
 
+    if grid.size == 0:
+        return None
+
     # Numpy implementation of F.grid_sample
     map_x = grid[..., 0].reshape(-1).astype(np.float32)
     map_y = grid[..., 1].reshape(-1).astype(np.float32)

diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py
@@ -20,6 +20,8 @@ class XFeatParser(dai.node.ThreadedHostNode):
         Original image size.
     input_size : Tuple[float, float]
         Input image size.
+    max_keypoints : int
+        Maximum number of keypoints to keep.
     previous_results : np.ndarray
         Previous results from the model. Previous results are used to match keypoints between two frames.
 
@@ -38,6 +40,7 @@ def __init__(
         self,
         original_size: Tuple[float, float] = None,
         input_size: Tuple[float, float] = (640, 352),
+        max_keypoints: int = 4096,
     ):
         """Initializes the XFeatParser node.
 
@@ -51,6 +54,7 @@ def __init__(
         self.out = self.createOutput()
         self.original_size = original_size
         self.input_size = input_size
+        self.max_keypoints = max_keypoints
         self.previous_results = None
 
     def setOriginalSize(self, original_size):
@@ -69,6 +73,14 @@ def setInputSize(self, input_size):
         """
         self.input_size = input_size
 
+    def setMaxKeypoints(self, max_keypoints):
+        """Sets the maximum number of keypoints to keep.
+
+        @param max_keypoints: Maximum number of keypoints.
+        @type max_keypoints: int
+        """
+        self.max_keypoints = max_keypoints
+
     def run(self):
         if self.original_size is None:
             raise ValueError("Original image size must be specified!")
@@ -95,14 +107,27 @@ def run(self):
                 )
 
             result = detect_and_compute(
-                feats, keypoints, resize_rate_w, resize_rate_h, self.input_size
-            )[0]
+                feats,
+                keypoints,
+                resize_rate_w,
+                resize_rate_h,
+                self.input_size,
+                self.max_keypoints,
+            )
+
+            if result is not None:
+                result = result[0]
+            else:
+                matched_points = dai.TrackedFeatures()
+                matched_points.setTimestamp(output.getTimestamp())
+                self.out.send(matched_points)
+                continue
 
             if self.previous_results is not None:
                 mkpts0, mkpts1 = match(self.previous_results, result)
                 matched_points = create_tracked_features_message(mkpts0, mkpts1)
                 matched_points.setTimestamp(output.getTimestamp())
                 self.out.send(matched_points)
-            else:
-                # save the result from first frame
-                self.previous_results = result
+
+            # save the result from first frame
+            self.previous_results = result