diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py index 4d89be1..dabb44f 100644 --- a/depthai_nodes/ml/messages/creators/depth.py +++ b/depthai_nodes/ml/messages/creators/depth.py @@ -9,16 +9,19 @@ def create_depth_message( depth_map: np.array, depth_type: Literal["relative", "metric"] ) -> dai.ImgFrame: - """Creates a depth message in the form of an ImgFrame using the provided depth map - and depth type. + """Create a DepthAI message for a depth map. - Args: - depth_map (np.array): A NumPy array representing the depth map with shape (CHW or HWC). - depth_type (Literal['relative', 'metric']): A string indicating the type of depth map. - It can either be 'relative' or 'metric'. - - Returns: - dai.ImgFrame: An ImgFrame object containing the depth information. + @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC). + @type depth_map: np.array + @param depth_type: A string indicating the type of depth map. It can either be + 'relative' or 'metric'. + @type depth_type: Literal['relative', 'metric'] + @return: An ImgFrame object containing the depth information. + @rtype: dai.ImgFrame + @raise ValueError: If the depth map is not a NumPy array. + @raise ValueError: If the depth map is not 3D. + @raise ValueError: If the depth map shape is not CHW or HWC. + @raise ValueError: If the depth type is not 'relative' or 'metric'. """ if not isinstance(depth_map, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/detection.py b/depthai_nodes/ml/messages/creators/detection.py index 1fa31c0..69215d5 100644 --- a/depthai_nodes/ml/messages/creators/detection.py +++ b/depthai_nodes/ml/messages/creators/detection.py @@ -17,19 +17,33 @@ def create_detection_message( labels: List[int] = None, keypoints: List[List[Tuple[float, float]]] = None, ) -> dai.ImgDetections: - """Create a message for the detection. The message contains the bounding boxes, - labels, and confidence scores of detected objects. If there are no labels or we only - have one class, we can set labels to None and all detections will have label set to - 0. - - Args: - bboxes (np.ndarray): Detected bounding boxes of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...]. - scores (np.ndarray): Confidence scores of detected objects of shape (N,). - labels (List[int], optional): Labels of detected objects of shape (N,). Defaults to None. - keypoints (List[List[Tuple[float, float]]], optional): Keypoints of detected objects of shape (N,2). Defaults to None. - - Returns: - dai.ImgDetections OR ImgDetectionsWithKeypoints: Message containing the bounding boxes, labels, confidence scores, and keypoints of detected objects. + """Create a DepthAI message for an object detection. + + @param bbox: Bounding boxes of detected objects of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...]. + @type bbox: np.ndarray + @param scores: Confidence scores of detected objects of shape (N,). + @type scores: np.ndarray + @param labels: Labels of detected objects of shape (N,). + @type labels: List[int] + @param keypoints: Keypoints of detected objects of shape (N,2). + @type keypoints: Optional[List[List[Tuple[float, float]]]] + + @return: Message containing the bounding boxes, labels, confidence scores, and keypoints of detected objects. + @rtype: dai.ImgDetections OR ImgDetectionsWithKeypoints + + @raise ValueError: If the bboxes are not a numpy array. + @raise ValueError: If the bboxes are not of shape (N,4). + @raise ValueError: If the bboxes 2nd dimension is not of size 4. + @raise ValueError: If the bboxes are not in format [x_min, y_min, x_max, y_max] where xmin < xmax and ymin < ymax. + @raise ValueError: If the scores are not a numpy array. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the scores do not have the same length as bboxes. + @raise ValueError: If the labels are not a list. + @raise ValueError: If each label is not an integer. + @raise ValueError: If the labels do not have the same length as bboxes. + @raise ValueError: If the keypoints are not a list. + @raise ValueError: If each keypoint pair is not a tuple of two floats. + @raise ValueError: If the keypoints do not have the same length as bboxes. """ # checks for bboxes @@ -122,15 +136,22 @@ def create_detection_message( def create_line_detection_message(lines: np.ndarray, scores: np.ndarray): - """Create a message for the line detection. The message contains the lines and - confidence scores of detected lines. - - Args: - lines (np.ndarray): Detected lines of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...]. - scores (np.ndarray): Confidence scores of detected lines of shape (N,). - - Returns: - dai.Lines: Message containing the lines and confidence scores of detected lines. + """Create a DepthAI message for a line detection. + + @param lines: Detected lines of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...]. + @type lines: np.ndarray + @param scores: Confidence scores of detected lines of shape (N,). + @type scores: np.ndarray + + @return: Message containing the lines and confidence scores of detected lines. + @rtype: Lines + + @raise ValueError: If the lines are not a numpy array. + @raise ValueError: If the lines are not of shape (N,4). + @raise ValueError: If the lines 2nd dimension is not of size E{4}. + @raise ValueError: If the scores are not a numpy array. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the scores do not have the same length as lines. """ # checks for lines diff --git a/depthai_nodes/ml/messages/creators/image.py b/depthai_nodes/ml/messages/creators/image.py index 5492468..da5f069 100644 --- a/depthai_nodes/ml/messages/creators/image.py +++ b/depthai_nodes/ml/messages/creators/image.py @@ -7,13 +7,16 @@ def create_image_message( image: np.array, is_bgr: bool = True, ) -> dai.ImgFrame: - """Create a depthai message for an image array. + """Create a DepthAI message for an image array. + @param image: Image array in HWC or CHW format. @type image: np.array - @ivar image: Image array in HWC or CHW format. + @param is_bgr: If True, the image is in BGR format. If False, the image is in RGB + format. Defaults to True. @type is_bgr: bool - @ivar is_bgr: If True, the image is in BGR format. If False, the image is in RGB - format. + @return: dai.ImgFrame object containing the image information. + @rtype: dai.ImgFrame + @raise ValueError: If the image shape is not CHW or HWC. """ if image.shape[0] in [1, 3]: diff --git a/depthai_nodes/ml/messages/creators/keypoints.py b/depthai_nodes/ml/messages/creators/keypoints.py index f86ab55..c62ac1c 100644 --- a/depthai_nodes/ml/messages/creators/keypoints.py +++ b/depthai_nodes/ml/messages/creators/keypoints.py @@ -12,17 +12,25 @@ def create_hand_keypoints_message( confidence: float, confidence_threshold: float, ) -> HandKeypoints: - """Create a message for the hand keypoint detection. The message contains the 3D - coordinates of the detected hand keypoints, handedness, and confidence score. - - Args: - hand_keypoints (np.ndarray): Detected hand keypoints of shape (N,3) meaning [...,[x, y, z],...]. - handedness (float): Handedness score of the detected hand (left or right). - confidence (float): Confidence score of the detected hand. - confidence_threshold (float): Confidence threshold for the overall hand. - - Returns: - HandKeypoints: Message containing the 3D coordinates of the detected hand keypoints, handedness, and confidence score. + """Create a DepthAI message for hand keypoints detection. + + @param hand_keypoints: Detected 3D hand keypoints of shape (N,3) meaning [...,[x, y, z],...]. + @type hand_keypoints: np.ndarray + @param handedness: Handedness score of the detected hand (left: < 0.5, right > 0.5). + @type handedness: float + @param confidence: Confidence score of the detected hand. + @type confidence: float + @param confidence_threshold: Confidence threshold for the present hand. + @type confidence_threshold: float + + @return: HandKeypoints message containing the detected hand keypoints, handedness, and confidence score. + @rtype: HandKeypoints + + @raise ValueError: If the hand_keypoints are not a numpy array. + @raise ValueError: If the hand_keypoints are not of shape (N,3). + @raise ValueError: If the hand_keypoints 2nd dimension is not of size E{3}. + @raise ValueError: If the handedness is not a float. + @raise ValueError: If the confidence is not a float. """ if not isinstance(hand_keypoints, np.ndarray): @@ -63,16 +71,26 @@ def create_keypoints_message( scores: Union[np.ndarray, List[float]] = None, confidence_threshold: float = None, ) -> Keypoints: - """Create a message for the keypoints. The message contains 2D or 3D coordinates of - the detected keypoints. - - Args: - keypoints (np.ndarray OR List[List[float]]): Detected keypoints of shape (N,2 or 3) meaning [...,[x, y],...] or [...,[x, y, z],...]. - scores (np.ndarray or List[float]): Confidence scores of the detected keypoints. - confidence_threshold (float): Confidence threshold for the keypoints. - - Returns: - Keypoints: Message containing 2D or 3D coordinates of the detected keypoints. + """Create a DepthAI message for the keypoints. + + @param keypoints: Detected 2D or 3D keypoints of shape (N,2 or 3) meaning [...,[x, y],...] or [...,[x, y, z],...]. + @type keypoints: np.ndarray or List[List[float]] + @param scores: Confidence scores of the detected keypoints. + @type scores: np.ndarray or List[float] + @param confidence_threshold: Confidence threshold of keypoint detections. + @type confidence_threshold: float + + @return: Keypoints message containing the detected keypoints. + @rtype: Keypoints + + @raise ValueError: If the keypoints are not a numpy array or list. + @raise ValueError: If the keypoints are not of shape (N,2 or 3). + @raise ValueError: If the keypoints 2nd dimension is not of size E{2} or E{3}. + @raise ValueError: If the scores are not a numpy array or list. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the keypoints and scores do not have the same length. + @raise ValueError: If the confidence threshold is not a float. + @raise ValueError: If the confidence threshold is not provided when scores are provided. """ if not isinstance(keypoints, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/segmentation.py b/depthai_nodes/ml/messages/creators/segmentation.py index 386f38f..07928c6 100644 --- a/depthai_nodes/ml/messages/creators/segmentation.py +++ b/depthai_nodes/ml/messages/creators/segmentation.py @@ -3,14 +3,16 @@ def create_segmentation_message(x: np.array) -> dai.ImgFrame: - """Create a message for the segmentation node output. Input is of the shape (H, W, - 1). In the third dimesion we specify the class of the segmented objects. + """Create a DepthAI message for segmentation mask. - Args: - x (np.array): Input from the segmentation node. - - Returns: - dai.ImgFrame: Output segmentaion message in ImgFrame.Type.RAW8. + @param x: Segmentation map array of the shape (H, W, E{1}) where E{1} stands for the + class of the segmented objects. + @type x: np.array + @return: Output segmentaion message in ImgFrame.Type.RAW8. + @rtype: dai.ImgFrame + @raise ValueError: If the input is not a numpy array. + @raise ValueError: If the input is not 3D. + @raise ValueError: If the input 3rd dimension is not E{1}. """ if not isinstance(x, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/thermal.py b/depthai_nodes/ml/messages/creators/thermal.py index 2e82d41..8f0f0ae 100644 --- a/depthai_nodes/ml/messages/creators/thermal.py +++ b/depthai_nodes/ml/messages/creators/thermal.py @@ -3,14 +3,16 @@ def create_thermal_message(thermal_image: np.array) -> dai.ImgFrame: - """Creates a thermal image message in the form of an ImgFrame using the provided - thermal image array. - - Args: - thermal_image (np.array): A NumPy array representing the thermal image with shape (CHW or HWC). - - Returns: - dai.ImgFrame: An ImgFrame object containing the thermal information. + """Create a DepthAI message for thermal image. + + @param thermal_image: A NumPy array representing the thermal image with shape (CHW + or HWC). + @type thermal_image: np.array + @return: An ImgFrame object containing the thermal information. + @rtype: dai.ImgFrame + @raise ValueError: If the input is not a NumPy array. + @raise ValueError: If the input is not 3D. + @raise ValueError: If the input shape is not CHW or HWC. """ if not isinstance(thermal_image, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/tracked_features.py b/depthai_nodes/ml/messages/creators/tracked_features.py index 75fbfdb..7208ac7 100644 --- a/depthai_nodes/ml/messages/creators/tracked_features.py +++ b/depthai_nodes/ml/messages/creators/tracked_features.py @@ -5,14 +5,16 @@ def create_feature_point(x: float, y: float, id: int, age: int) -> dai.TrackedFeature: """Create a tracked feature point. - Args: - x (float): X coordinate of the feature point. - y (float): Y coordinate of the feature point. - id (int): ID of the feature point. - age (int): Age of the feature point. - - Returns: - dai.TrackedFeature: Tracked feature point. + @param x: X coordinate of the feature point. + @type x: float + @param y: Y coordinate of the feature point. + @type y: float + @param id: ID of the feature point. + @type id: int + @param age: Age of the feature point. + @type age: int + @return: Tracked feature point. + @rtype: dai.TrackedFeature """ feature = dai.TrackedFeature() @@ -27,14 +29,22 @@ def create_feature_point(x: float, y: float, id: int, age: int) -> dai.TrackedFe def create_tracked_features_message( reference_points: np.ndarray, target_points: np.ndarray ) -> dai.TrackedFeatures: - """Create a message for the tracked features. + """Create a DepthAI message for tracked features. + + @param reference_points: Reference points of shape (N,2) meaning [...,[x, y],...]. + @type reference_points: np.ndarray + @param target_points: Target points of shape (N,2) meaning [...,[x, y],...]. + @type target_points: np.ndarray - Args: - reference_points (np.ndarray): Reference points of shape (N,2) meaning [...,[x, y],...]. - target_points (np.ndarray): Target points of shape (N,2) meaning [...,[x, y],...]. + @return: Message containing the tracked features. + @rtype: dai.TrackedFeatures - Returns: - dai.TrackedFeatures: Message containing the tracked features. + @raise ValueError: If the reference_points are not a numpy array. + @raise ValueError: If the reference_points are not of shape (N,2). + @raise ValueError: If the reference_points 2nd dimension is not of size E{2}. + @raise ValueError: If the target_points are not a numpy array. + @raise ValueError: If the target_points are not of shape (N,2). + @raise ValueError: If the target_points 2nd dimension is not of size E{2}. """ if not isinstance(reference_points, np.ndarray): diff --git a/depthai_nodes/ml/messages/img_detections.py b/depthai_nodes/ml/messages/img_detections.py index ebd1c39..9c6114e 100644 --- a/depthai_nodes/ml/messages/img_detections.py +++ b/depthai_nodes/ml/messages/img_detections.py @@ -4,16 +4,37 @@ class ImgDetectionWithKeypoints(dai.ImgDetection): + """ImgDetectionWithKeypoints class for storing image detection with keypoints. + + Attributes + ---------- + keypoints: List[Tuple[float, float]] + Keypoints of the image detection. + """ + def __init__(self): + """Initializes the ImgDetectionWithKeypoints object.""" dai.ImgDetection.__init__(self) # TODO: change to super().__init__()? self._keypoints: List[Tuple[float, float]] = [] @property def keypoints(self) -> List[Tuple[float, float]]: + """Returns the keypoints. + + @return: List of keypoints. + @rtype: List[Tuple[float, float]] + """ return self._keypoints @keypoints.setter def keypoints(self, value: List[Tuple[Union[int, float], Union[int, float]]]): + """Sets the keypoints. + + @param value: List of keypoints. + @type value: List[Tuple[Union[int, float], Union[int, float]]] + @raise TypeError: If the keypoints are not a list. + @raise TypeError: If each keypoint is not a tuple of two floats or integers. + """ if not isinstance(value, list): raise TypeError("Keypoints must be a list") for item in value: @@ -29,16 +50,38 @@ def keypoints(self, value: List[Tuple[Union[int, float], Union[int, float]]]): class ImgDetectionsWithKeypoints(dai.Buffer): + """ImgDetectionsWithKeypoints class for storing image detections with keypoints. + + Attributes + ---------- + detections: List[ImgDetectionWithKeypoints] + Image detections with keypoints. + """ + def __init__(self): + """Initializes the ImgDetectionsWithKeypoints object.""" dai.Buffer.__init__(self) # TODO: change to super().__init__()? self._detections: List[ImgDetectionWithKeypoints] = [] @property def detections(self) -> List[ImgDetectionWithKeypoints]: + """Returns the image detections with keypoints. + + @return: List of image detections with keypoints. + @rtype: List[ImgDetectionWithKeypoints] + """ return self._detections @detections.setter def detections(self, value: List[ImgDetectionWithKeypoints]): + """Sets the image detections with keypoints. + + @param value: List of image detections with keypoints. + @type value: List[ImgDetectionWithKeypoints] + @raise TypeError: If the detections are not a list. + @raise TypeError: If each detection is not an instance of + ImgDetectionWithKeypoints. + """ if not isinstance(value, list): raise TypeError("Detections must be a list") for item in value: diff --git a/depthai_nodes/ml/messages/keypoints.py b/depthai_nodes/ml/messages/keypoints.py index 2d2d466..693759e 100644 --- a/depthai_nodes/ml/messages/keypoints.py +++ b/depthai_nodes/ml/messages/keypoints.py @@ -4,16 +4,37 @@ class Keypoints(dai.Buffer): + """Keypoints class for storing keypoints. + + Attributes + ---------- + keypoints: List[dai.Point3f] + List of dai.Point3f, each representing a keypoint. + """ + def __init__(self): + """Initializes the Keypoints object.""" super().__init__() self._keypoints: List[dai.Point3f] = [] @property def keypoints(self) -> List[dai.Point3f]: + """Returns the keypoints. + + @return: List of keypoints. + @rtype: List[dai.Point3f] + """ return self._keypoints @keypoints.setter def keypoints(self, value: List[dai.Point3f]): + """Sets the keypoints. + + @param value: List of keypoints. + @type value: List[dai.Point3f] + @raise TypeError: If the keypoints are not a list. + @raise TypeError: If each keypoint is not of type dai.Point3f. + """ if not isinstance(value, list): raise TypeError("keypoints must be a list.") for item in value: @@ -23,27 +44,60 @@ def keypoints(self, value: List[dai.Point3f]): class HandKeypoints(Keypoints): + """HandKeypoints class for storing hand keypoints. + + Attributes + ---------- + confidence: float + Confidence of the hand keypoints. + handdedness: float + Handedness of the hand keypoints. 0.0 for left hand and 1.0 for right hand. + """ + def __init__(self): + """Initializes the HandKeypoints object.""" Keypoints.__init__(self) self._confidence: float = 0.0 self._handdedness: float = 0.0 @property def confidence(self) -> float: + """Returns the confidence of the hand keypoints. + + @return: Confidence of the hand keypoints. + @rtype: float + """ return self._confidence @confidence.setter def confidence(self, value: float): + """Sets the confidence of the hand keypoints. + + @param value: Confidence of the hand keypoints. + @type value: float + @raise TypeError: If the confidence is not a float. + """ if not isinstance(value, float): raise TypeError("confidence must be a float.") self._confidence = value @property def handdedness(self) -> float: + """Returns the handdedness of the hand keypoints. + + @return: Handdedness of the hand keypoints. + @rtype: float + """ return self._handdedness @handdedness.setter def handdedness(self, value: float): + """Sets the handdedness of the hand keypoints. + + @param value: Handdedness of the hand keypoints. + @type value: float + @raise TypeError: If the handdedness is not a float. + """ if not isinstance(value, float): raise TypeError("handdedness must be a float.") self._handdedness = value diff --git a/depthai_nodes/ml/messages/lines.py b/depthai_nodes/ml/messages/lines.py index 832499a..3c63191 100644 --- a/depthai_nodes/ml/messages/lines.py +++ b/depthai_nodes/ml/messages/lines.py @@ -4,7 +4,20 @@ class Line(dai.Buffer): + """Line class for storing a line. + + Attributes + ---------- + start_point : dai.Point2f + Start point of the line with x and y coordinate. + end_point : dai.Point2f + End point of the line with x and y coordinate. + confidence : float + Confidence of the line. + """ + def __init__(self): + """Initializes the Line object.""" super().__init__() self._start_point: dai.Point2f = None self._end_point: dai.Point2f = None @@ -12,10 +25,21 @@ def __init__(self): @property def start_point(self) -> dai.Point2f: + """Returns the start point of the line. + + @return: Start point of the line. + @rtype: dai.Point2f + """ return self._start_point @start_point.setter def start_point(self, value: dai.Point2f): + """Sets the start point of the line. + + @param value: Start point of the line. + @type value: dai.Point2f + @raise TypeError: If the start point is not of type dai.Point2f. + """ if not isinstance(value, dai.Point2f): raise TypeError( f"start_point must be of type Point2f, instead got {type(value)}." @@ -24,10 +48,21 @@ def start_point(self, value: dai.Point2f): @property def end_point(self) -> dai.Point2f: + """Returns the end point of the line. + + @return: End point of the line. + @rtype: dai.Point2f + """ return self._end_point @end_point.setter def end_point(self, value: dai.Point2f): + """Sets the end point of the line. + + @param value: End point of the line. + @type value: dai.Point2f + @raise TypeError: If the end point is not of type dai.Point2f. + """ if not isinstance(value, dai.Point2f): raise TypeError( f"end_point must be of type Point2f, instead got {type(value)}." @@ -36,10 +71,21 @@ def end_point(self, value: dai.Point2f): @property def confidence(self) -> float: + """Returns the confidence of the line. + + @return: Confidence of the line. + @rtype: float + """ return self._confidence @confidence.setter def confidence(self, value: float): + """Sets the confidence of the line. + + @param value: Confidence of the line. + @type value: float + @raise TypeError: If the confidence is not of type float. + """ if not isinstance(value, float): raise TypeError( f"confidence must be of type float, instead got {type(value)}." @@ -48,16 +94,37 @@ def confidence(self, value: float): class Lines(dai.Buffer): + """Lines class for storing lines. + + Attributes + ---------- + lines : List[Line] + List of detected lines. + """ + def __init__(self): + """Initializes the Lines object.""" super().__init__() self._lines: List[Line] = [] @property def lines(self) -> List[Line]: + """Returns the lines. + + @return: List of lines. + @rtype: List[Line] + """ return self._lines @lines.setter def lines(self, value: List[Line]): + """Sets the lines. + + @param value: List of lines. + @type value: List[Line] + @raise TypeError: If the lines are not a list. + @raise TypeError: If each line is not of type Line. + """ if not isinstance(value, List): raise TypeError( f"lines must be of type List[Line], instead got {type(value)}." diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index 5b0d88c..534db76 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -5,7 +5,37 @@ class ImageOutputParser(dai.node.ThreadedHostNode): + """Parser class for image-to-image models (e.g. DnCNN3, zero-dce etc.) where the + output is a modifed image (denoised, enhanced etc.). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + output_is_bgr : bool + Flag indicating if the output image is in BGR (Blue-Green-Red) format. + + Output Message/s + ------- + **Type**: dai.ImgFrame + + **Description**: Image message containing the output image e.g. denoised or enhanced images. + + Error Handling + -------------- + **ValueError**: If the output is not 3- or 4-dimensional. + + **ValueError**: If the number of output layers is not 1. + """ + def __init__(self, output_is_bgr=False): + """Initializes ImageOutputParser node. + + @param output_is_bgr: Flag indicating if the output image is in BGR. + @type output_is_bgr: bool + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -13,15 +43,10 @@ def __init__(self, output_is_bgr=False): self.output_is_bgr = output_is_bgr def setBGROutput(self): + """Sets the flag indicating that output image is in BGR.""" self.output_is_bgr = True def run(self): - """Postprocessing logic for image-to-image models (e.g. DnCNN3, zero-dce etc.). - - Returns: - dai.ImgFrame: uint8, grayscale HW / colorscale HWC BGR image. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index 00cb130..4f9f8ce 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -5,11 +5,48 @@ class KeypointParser(dai.node.ThreadedHostNode): + """Parser class for 2D or 3D keypoints models. It expects one ouput layer containing + keypoints. The number of keypoints must be specified. Moreover, the keypoints are + normalized by a scale factor if provided. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + scale_factor : float + Scale factor to divide the keypoints by. + num_keypoints : int + Number of keypoints the model detects. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing 2D or 3D keypoints. + + Error Handling + -------------- + **ValueError**: If the number of keypoints is not specified. + + **ValueError**: If the number of coordinates per keypoint is not 2 or 3. + + **ValueError**: If the number of output layers is not 1. + """ + def __init__( self, scale_factor=1, num_keypoints=None, ): + """Initializes KeypointParser node. + + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + @param num_keypoints: Number of keypoints. + @type num_keypoints: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -18,18 +55,22 @@ def __init__( self.num_keypoints = num_keypoints def setScaleFactor(self, scale_factor): + """Sets the scale factor to divide the keypoints by. + + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + """ self.scale_factor = scale_factor def setNumKeypoints(self, num_keypoints): - self.num_keypoints = num_keypoints + """Sets the number of keypoints. - def run(self): - """Postprocessing logic for Keypoint model. - - Returns: - dai.Keypoints: num_keypoints keypoints (2D or 3D). + @param num_keypoints: Number of keypoints. + @type num_keypoints: int """ + self.num_keypoints = num_keypoints + def run(self): if self.num_keypoints is None: raise ValueError("Number of keypoints must be specified!") diff --git a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py index a55bef9..f358c60 100644 --- a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py +++ b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py @@ -5,7 +5,42 @@ class MPHandLandmarkParser(dai.node.ThreadedHostNode): + """Parser class for MediaPipe Hand landmark model. It parses the output of the + MediaPipe Hand landmark model containing 21 3D hand landmarks. The landmarks are + normalized and sent as a message to the output. Besides landmarks, the message + contains confidence score and handedness score (right or left hand). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for hand landmarks. + scale_factor : float + Scale factor to divide the landmarks by. + + Output Message/s + ---------------- + **Type**: HandLandmarks + + **Description**: HandLandmarks message containing normalized 21 3D landmarks, confidence score, and handedness score (right or left hand). + + See also + -------- + Official MediaPipe Hands solution: + https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker + """ + def __init__(self, score_threshold=0.5, scale_factor=224): + """Initialize MPHandLandmarkParser node. + + @param score_threshold: Confidence score threshold for hand landmarks. + @type score_threshold: float + @param scale_factor: Scale factor to divide the landmarks by. + @type scale_factor: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -14,18 +49,22 @@ def __init__(self, score_threshold=0.5, scale_factor=224): self.scale_factor = scale_factor def setScoreThreshold(self, threshold): + """Set the confidence score threshold for hand landmarks. + + @param threshold: Confidence score threshold for hand landmarks. + @type threshold: float + """ self.score_threshold = threshold def setScaleFactor(self, scale_factor): - self.scale_factor = scale_factor + """Set the scale factor to divide the landmarks by. - def run(self): - """Postprocessing logic for MediaPipe Hand landmark model. - - Returns: - HandLandmarks containing normalized 21 landmarks, confidence score, and handdedness score (right or left hand). + @param scale_factor: Scale factor to divide the landmarks by. + @type scale_factor: float """ + self.scale_factor = scale_factor + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index 3a22960..0c992ec 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -7,7 +7,45 @@ class MPPalmDetectionParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the Mediapipe Palm detection model. As the + result, the node sends out the detected hands in the form of a message containing + bounding boxes, labels, and confidence scores. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.Parser sends the processed network results to this output in form of messages. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected hands. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ------- + **Type**: dai.ImgDetections + + **Description**: ImgDetections message containing bounding boxes, labels, and confidence scores of detected hands. + + See also + -------- + Official MediaPipe Hands solution: + https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker + """ + def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + """Initializes the MPPalmDetectionParser node. + + @param score_threshold: Confidence score threshold for detected hands. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -17,21 +55,30 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected hands. + + @param threshold: Confidence score threshold for detected hands. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for MediPipe Hand detection model. + """Sets the maximum number of detections to keep. - Returns: - dai.ImgDetections containing bounding boxes, labels, and confidence scores of detected hands. + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/mlsd.py b/depthai_nodes/ml/parsers/mlsd.py index 2390a6d..e13dadf 100644 --- a/depthai_nodes/ml/parsers/mlsd.py +++ b/depthai_nodes/ml/parsers/mlsd.py @@ -6,12 +6,48 @@ class MLSDParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the M-LSD line detection model. The parser + is specifically designed to parse the output of the M-LSD model. As the result, the + node sends out the detected lines in the form of a message. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + nn_passthrough : Node.Input + Node's 2nd input. It accepts the passthrough of the Neural Network node. This is required for parsing the output of the M-LSD model. + It is a linking point to which the Neural Network's passthrough (network's input accutualy) is linked. + topk_n : int + Number of top candidates to keep. + score_thr : float + Confidence score threshold for detected lines. + dist_thr : float + Distance threshold for merging lines. + + Output Message/s + ---------------- + **Type**: LineDetections + + **Description**: LineDetections message containing detected lines and confidence scores. + """ + def __init__( self, topk_n=200, score_thr=0.10, dist_thr=20.0, ): + """Initializes the MLSDParser node. + + @param topk_n: Number of top candidates to keep. + @type topk_n: int + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.nn_passthrough = dai.Node.Input(self) @@ -21,21 +57,30 @@ def __init__( self.dist_thr = dist_thr def setTopK(self, topk_n): + """Sets the number of top candidates to keep. + + @param topk_n: Number of top candidates to keep. + @type topk_n: int + """ self.topk_n = topk_n def setScoreThreshold(self, score_thr): + """Sets the confidence score threshold for detected lines. + + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + """ self.score_thr = score_thr def setDistanceThreshold(self, dist_thr): - self.dist_thr = dist_thr + """Sets the distance threshold for merging lines. - def run(self): - """Postprocessing logic for M-LSD line detection model. - - Returns: - Normalized detected lines and confidence scores. + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float """ + self.dist_thr = dist_thr + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index afa9996..21e48d4 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -4,7 +4,34 @@ class MonocularDepthParser(dai.node.ThreadedHostNode): + """Parser class for monocular depth models (e.g. Depth Anything model). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + depth_type : str + Type of depth output (relative or metric). + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Depth message containing the depth map. The depth map is represented with dai.ImgFrame. + + Error Handling + -------------- + **ValueError**: If the number of output layers is not E{1}. + """ + def __init__(self, depth_type="relative"): + """Initializes the MonocularDepthParser node. + + @param depth_type: Type of depth output (relative or metric). + @type depth_type: str + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -12,19 +39,14 @@ def __init__(self, depth_type="relative"): self.depth_type = depth_type def setRelativeDepthType(self): + """Sets the depth type to relative.""" self.depth_type = "relative" def setMetricDepthType(self): + """Sets the depth type to metric.""" self.depth_type = "metric" def run(self): - """Postprocessing logic for a model with monocular depth output (e.g.Depth - Anything model). - - Returns: - dai.ImgFrame: uint16, HW depth map. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index c107dff..b2f2411 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -6,7 +6,38 @@ class SCRFDParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the SCRFD face detection model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected faces. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ---------------- + **Type**: dai.ImgDetections + + **Description**: ImgDetections message containing bounding boxes, labels, and confidence scores of detected faces. + """ + def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + """Initializes the SCRFDParser node. + + @param score_threshold: Confidence score threshold for detected faces. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -16,30 +47,36 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected faces. + + @param threshold: Confidence score threshold for detected faces. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for SCRFD model. + """Sets the maximum number of detections to keep. - Returns: - ... + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() except dai.MessageQueue.QueueException: break # Pipeline was stopped - print("SCRFD node") - print(f"Layer names = {output.getAllLayerNames()}") - score_8 = output.getTensor("score_8").flatten().astype(np.float32) score_16 = output.getTensor("score_16").flatten().astype(np.float32) score_32 = output.getTensor("score_32").flatten().astype(np.float32) diff --git a/depthai_nodes/ml/parsers/segmentation.py b/depthai_nodes/ml/parsers/segmentation.py index 2febe41..2f2fc2d 100644 --- a/depthai_nodes/ml/parsers/segmentation.py +++ b/depthai_nodes/ml/parsers/segmentation.py @@ -5,22 +5,50 @@ class SegmentationParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the segmentation models. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + background_class : bool + Whether to add additional layer for background. + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Segmentation message containing the segmentation mask. Every pixel belongs to exactly one class. + + Error Handling + -------------- + **ValueError**: If the number of output layers is not E{1}. + + **ValueError**: If the number of dimensions of the output tensor is not E{3}. + """ + def __init__(self, background_class=False): + """Initializes the SegmentationParser node. + + @param background_class: Whether to add additional layer for background. + @type background_class: bool + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) self.background_class = background_class def setBackgroundClass(self, background_class): - self.background_class = background_class - - def run(self): - """Postprocessing logic for Segmentation model. + """Sets the background class. - Returns: - Segmenation mask with classes given by the model and background class 0. + @param background_class: Whether to add additional layer for background. + @type background_class: bool """ + self.background_class = background_class + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/superanimal_landmarker.py b/depthai_nodes/ml/parsers/superanimal_landmarker.py index f417aff..48fd400 100644 --- a/depthai_nodes/ml/parsers/superanimal_landmarker.py +++ b/depthai_nodes/ml/parsers/superanimal_landmarker.py @@ -6,11 +6,38 @@ class SuperAnimalParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the SuperAnimal landmark model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected keypoints. + scale_factor : float + Scale factor to divide the keypoints by. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing detected keypoints that exceeds confidence threshold. + """ + def __init__( self, score_threshold=0.5, scale_factor=256, ): + """Initializes the SuperAnimalParser node. + + @param score_threshold: Confidence score threshold for detected keypoints. + @type score_threshold: float + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -19,18 +46,22 @@ def __init__( self.scale_factor = scale_factor def setScoreThreshold(self, threshold): + """Sets the confidence score threshold for detected keypoints. + + @param threshold: Confidence score threshold for detected keypoints. + @type threshold: float + """ self.score_threshold = threshold def setScaleFactor(self, scale_factor): - self.scale_factor = scale_factor - - def run(self): - """Postprocessing logic for SuperAnimal landmark model. + """Sets the scale factor to divide the keypoints by. - Returns: - dai.Keypoints: Max 39 keypoints detected on the quadrupedal animal. + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float """ + self.scale_factor = scale_factor + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/thermal_image.py b/depthai_nodes/ml/parsers/thermal_image.py index 62f716f..44ed955 100644 --- a/depthai_nodes/ml/parsers/thermal_image.py +++ b/depthai_nodes/ml/parsers/thermal_image.py @@ -4,18 +4,30 @@ class ThermalImageParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of models with thermal image output (e.g. + UGSR-FA). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Thermal message containing the thermal image. + """ + def __init__(self): + """Initializes the ThermalImageParser node.""" dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) def run(self): - """Postprocessing logic for a model with thermal image output (e.g. UGSR-FA). - - Returns: - dai.ImgFrame: uint16, HW thermal image. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/utils/decode_detections.py b/depthai_nodes/ml/parsers/utils/decode_detections.py index 2deb838..d10b85d 100644 --- a/depthai_nodes/ml/parsers/utils/decode_detections.py +++ b/depthai_nodes/ml/parsers/utils/decode_detections.py @@ -14,23 +14,31 @@ def decode_detections( ) -> List[Dict[str, Any]]: """Decode the detections from neural network output tensors. - Args: - input_size (float): The input size of the model that produced the detections, (width, height). - stride (int): The stride used in the detection grid. - rows (int): Number of rows in the detection grid. - cols (int): Number of columns in the detection grid. - score_threshold (float): Minimum score threshold for a detection to be considered valid. - cls (np.ndarray): 2D array of class scores for each grid cell, shape (grid_size, num_classes). - obj (np.ndarray): 1D array of objectness scores for each grid cell, shape (grid_size,). - bbox (np.ndarray): 2D array of bounding box coordinates, shape (grid_size, 4). - kps (np.ndarray): 2D array of keypoint coordinates, shape (grid_size, num_keypoints * 2). + @param input_size: The input size of the model that produced the detections, (width, height). + @type input_size: float + @param stride: The stride used in the detection grid. + @type stride: int + @param rows: Number of rows in the detection grid. + @type rows: int + @param cols: Number of columns in the detection grid. + @type cols: int + @param score_threshold: Minimum score threshold for a detection to be considered valid. + @type score_threshold: float + @param cls: 2D array of class scores for each grid cell, shape (grid_size, num_classes). + @type cls: np.ndarray + @param obj: 1D array of objectness scores for each grid cell, shape (grid_size,). + @type obj: np.ndarray + @param bbox: 2D array of bounding box coordinates, shape (grid_size, 4). + @type bbox: np.ndarray + @param kps: 2D array of keypoint coordinates, shape (grid_size, num_keypoints * 2). + @type kps: np.ndarray - Returns: - List[Dict[str, Any]]: A list of detections, where each detection is a dictionary containing: - - "bbox": [x1, y1, width, height] (relative bounding box coordinates) - - "label": int (class label) - - "keypoints": List[float] (relative keypoint coordinates) - - "score": float (detection score) + @return: A list of detections, where each detection is a dictionary containing: + - "bbox": [x1, y1, width, height] (relative bounding box coordinates) + - "label": int (class label) + - "keypoints": List[float] (relative keypoint coordinates) + - "score": float (detection score) + @rtype: List[Dict[str, Any]] """ input_width, input_height = input_size diff --git a/depthai_nodes/ml/parsers/utils/denormalize.py b/depthai_nodes/ml/parsers/utils/denormalize.py index 7455403..148cfa7 100644 --- a/depthai_nodes/ml/parsers/utils/denormalize.py +++ b/depthai_nodes/ml/parsers/utils/denormalize.py @@ -4,12 +4,12 @@ def unnormalize_image(image, normalize=True): """Un-normalize an image tensor by scaling it to the [0, 255] range. - Args: - image (np.ndarray): The normalized image tensor of shape (H, W, C) or (C, H, W). - normalize (bool, optional): Whether to normalize the image tensor. Defaults to True. - - Returns: - np.ndarray: The un-normalized image. + @param image: The normalized image tensor of shape (H, W, C) or (C, H, W). + @type image: np.ndarray + @param normalize: Whether to normalize the image tensor. Defaults to True. + @type normalize: bool + @return: The un-normalized image. + @rtype: np.ndarray """ # Normalize the image tensor to the range [0, 1] if normalize: diff --git a/depthai_nodes/ml/parsers/utils/medipipe.py b/depthai_nodes/ml/parsers/utils/medipipe.py index e298659..8d0234b 100644 --- a/depthai_nodes/ml/parsers/utils/medipipe.py +++ b/depthai_nodes/ml/parsers/utils/medipipe.py @@ -9,9 +9,6 @@ License: MIT License -MIT License ------------ - Copyright (c) [2021] [geax] """ @@ -166,69 +163,69 @@ def generate_handtracker_anchors(input_size_width, input_size_height): def decode_bboxes(score_thresh, scores, bboxes, anchors, scale=128, best_only=False): - """Wi, hi : NN input shape - mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc # Decodes - the detection tensors generated by the model, based on # the SSD anchors and the - specification in the options, into a vector of # detections. Each detection - describes a detected object. - - https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt : - node { - calculator: "TensorsToDetectionsCalculator" - input_stream: "TENSORS:detection_tensors" - input_side_packet: "ANCHORS:anchors" - output_stream: "DETECTIONS:unfiltered_detections" - options: { - [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { - num_classes: 1 - num_boxes: 896 - num_coords: 18 - box_coord_offset: 0 - keypoint_coord_offset: 4 - num_keypoints: 7 - num_values_per_keypoint: 2 - sigmoid_score: true - score_clipping_thresh: 100.0 - reverse_output_order: true - - x_scale: 128.0 - y_scale: 128.0 - h_scale: 128.0 - w_scale: 128.0 - min_score_thresh: 0.5 - } - } - } - node { - calculator: "TensorsToDetectionsCalculator" - input_stream: "TENSORS:detection_tensors" - input_side_packet: "ANCHORS:anchors" - output_stream: "DETECTIONS:unfiltered_detections" - options: { - [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { - num_classes: 1 - num_boxes: 2016 - num_coords: 18 - box_coord_offset: 0 - keypoint_coord_offset: 4 - num_keypoints: 7 - num_values_per_keypoint: 2 - sigmoid_score: true - score_clipping_thresh: 100.0 - reverse_output_order: true - - x_scale: 192.0 - y_scale: 192.0 - w_scale: 192.0 - h_scale: 192.0 - min_score_thresh: 0.5 - } - } - } - - scores: shape = [number of anchors 896 or 2016] - bboxes: shape = [ number of anchors x 18], 18 = 4 (bounding box : (cx,cy,w,h) + 14 (7 palm keypoints) - """ + # Wi, hi : NN input shape + # mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc # Decodes + # the detection tensors generated by the model, based on # the SSD anchors and the + # specification in the options, into a vector of # detections. Each detection + # describes a detected object. + + # https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt : + # node { + # calculator: "TensorsToDetectionsCalculator" + # input_stream: "TENSORS:detection_tensors" + # input_side_packet: "ANCHORS:anchors" + # output_stream: "DETECTIONS:unfiltered_detections" + # options: { + # [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + # num_classes: 1 + # num_boxes: 896 + # num_coords: 18 + # box_coord_offset: 0 + # keypoint_coord_offset: 4 + # num_keypoints: 7 + # num_values_per_keypoint: 2 + # sigmoid_score: true + # score_clipping_thresh: 100.0 + # reverse_output_order: true + + # x_scale: 128.0 + # y_scale: 128.0 + # h_scale: 128.0 + # w_scale: 128.0 + # min_score_thresh: 0.5 + # } + # } + # } + # node { + # calculator: "TensorsToDetectionsCalculator" + # input_stream: "TENSORS:detection_tensors" + # input_side_packet: "ANCHORS:anchors" + # output_stream: "DETECTIONS:unfiltered_detections" + # options: { + # [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + # num_classes: 1 + # num_boxes: 2016 + # num_coords: 18 + # box_coord_offset: 0 + # keypoint_coord_offset: 4 + # num_keypoints: 7 + # num_values_per_keypoint: 2 + # sigmoid_score: true + # score_clipping_thresh: 100.0 + # reverse_output_order: true + + # x_scale: 192.0 + # y_scale: 192.0 + # w_scale: 192.0 + # h_scale: 192.0 + # min_score_thresh: 0.5 + # } + # } + # } + + # scores: shape = [number of anchors 896 or 2016] + # bboxes: shape = [ number of anchors x 18], 18 = 4 (bounding box : (cx,cy,w,h) + 14 (7 palm keypoints) + regions = [] scores = 1 / (1 + np.exp(-scores)) if best_only: diff --git a/depthai_nodes/ml/parsers/utils/mlsd.py b/depthai_nodes/ml/parsers/utils/mlsd.py index 3b97805..2306ca2 100644 --- a/depthai_nodes/ml/parsers/utils/mlsd.py +++ b/depthai_nodes/ml/parsers/utils/mlsd.py @@ -6,6 +6,18 @@ def decode_scores_and_points( tpMap: np.ndarray, heat: np.ndarray, topk_n: int ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Decode the scores and points from the neural network output tensors. Used for + MLSD model. + + @param tpMap: Tensor containing the vector map. + @type tpMap: np.ndarray + @param heat: Tensor containing the heat map. + @type heat: np.ndarray + @param topk_n: Number of top candidates to keep. + @type topk_n: int + @return: Detected points, confidence scores for the detected points, and vector map. + @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray] + """ b, c, h, w = tpMap.shape displacement = tpMap[:, 1:5, :, :][0] @@ -27,6 +39,24 @@ def get_lines( dist_thr: float, input_size: int = 512, ) -> Tuple[np.ndarray, List[float]]: + """Get lines from the detected points and scores. The lines are filtered by the + score threshold and distance threshold. Used for MLSD model. + + @param pts: Detected points. + @type pts: np.ndarray + @param pts_score: Confidence scores for the detected points. + @type pts_score: np.ndarray + @param vmap: Vector map. + @type vmap: np.ndarray + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float + @param input_size: Input size of the model. + @type input_size: int + @return: Detected lines and their confidence scores. + @rtype: Tuple[np.ndarray, List[float]] + """ start = vmap[:, :, :2] end = vmap[:, :, 2:] dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) diff --git a/depthai_nodes/ml/parsers/utils/superanimal.py b/depthai_nodes/ml/parsers/utils/superanimal.py index dd1dc01..8c416b6 100644 --- a/depthai_nodes/ml/parsers/utils/superanimal.py +++ b/depthai_nodes/ml/parsers/utils/superanimal.py @@ -2,6 +2,13 @@ def get_top_values(heatmap): + """Get the top values from the heatmap tensor. + + @param heatmap: Heatmap tensor. + @type heatmap: np.ndarray + @return: Y and X coordinates of the top values. + @rtype: Tuple[np.ndarray, np.ndarray] + """ batchsize, ny, nx, num_joints = heatmap.shape heatmap_flat = heatmap.reshape(batchsize, nx * ny, num_joints) @@ -12,6 +19,18 @@ def get_top_values(heatmap): def get_pose_prediction(heatmap, locref, scale_factors): + """Get the pose prediction from the heatmap and locref tensors. Used for SuperAnimal + model. + + @param heatmap: Heatmap tensor. + @type heatmap: np.ndarray + @param locref: Locref tensor. + @type locref: np.ndarray + @param scale_factors: Scale factors for the x and y axes. + @type scale_factors: Tuple[float, float] + @return: Pose prediction. + @rtype: np.ndarray + """ Y, X = get_top_values(heatmap) batch_size, num_joints = X.shape diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py index 6836402..add6ee1 100644 --- a/depthai_nodes/ml/parsers/utils/xfeat.py +++ b/depthai_nodes/ml/parsers/utils/xfeat.py @@ -5,6 +5,15 @@ def local_maximum_filter(x: np.ndarray, kernel_size: int) -> np.ndarray: + """Apply a local maximum filter to the input array. + + @param x: Input array. + @type x: np.ndarray + @param kernel_size: Size of the local maximum filter. + @type kernel_size: int + @return: Output array after applying the local maximum filter. + @rtype: np.ndarray + """ # Ensure input is a 4D array (e.g., batch, channels, height, width) if len(x.shape) != 4: raise ValueError("Input array must be 4-dimensional.") @@ -39,6 +48,17 @@ def local_maximum_filter(x: np.ndarray, kernel_size: int) -> np.ndarray: def bilinear_grid_sample( im: np.ndarray, grid: np.ndarray, align_corners: bool = False ) -> np.ndarray: + """Bilinear grid sample. + + @param im: Input image tensor. + @type im: np.ndarray + @param grid: Grid tensor. + @type grid: np.ndarray + @param align_corners: Whether to align corners. + @type align_corners: bool + @return: Output image tensor after applying bilinear grid sample. + @rtype: np.ndarray + """ n, c, h, w = im.shape gn, gh, gw, _ = grid.shape assert n == gn @@ -105,6 +125,15 @@ def _get_kpts_heatmap( kpts: np.ndarray, softmax_temp: float = 1.0, ) -> np.ndarray: + """Get the keypoints heatmap. + + @param kpts: Keypoints. + @type kpts: np.ndarray + @param softmax_temp: Softmax temperature. + @type softmax_temp: float + @return: Keypoints heatmap. + @rtype: np.ndarray + """ kpts = np.exp(kpts * softmax_temp) scores = kpts / np.sum(kpts, axis=1, keepdims=True) scores = scores[:, :64] @@ -119,6 +148,17 @@ def _nms( threshold: float = 0.05, kernel_size: int = 5, ) -> np.ndarray: + """Non-Maximum Suppression. + + @param x: Input array. + @type x: np.ndarray + @param threshold: Non-maximum suppression threshold. + @type threshold: float + @param kernel_size: Size of the local maximum filter. + @type kernel_size: int + @return: Output array after applying non-maximum suppression. + @rtype: np.ndarray + """ # Non-Maximum Suppression B, _, H, W = x.shape local_max = local_maximum_filter(x, kernel_size) @@ -143,6 +183,23 @@ def detect_and_compute( input_size: Tuple[int, int], top_k: int = 4096, ) -> List[Dict[str, Any]]: + """Detect and compute keypoints. + + @param feats: Features. + @type feats: np.ndarray + @param kpts: Keypoints. + @type kpts: np.ndarray + @param resize_rate_w: Resize rate for width. + @type resize_rate_w: float + @param resize_rate_h: Resize rate for height. + @type resize_rate_h: float + @param input_size: Input size. + @type input_size: Tuple[int, int] + @param top_k: Maximum number of keypoints to keep. + @type top_k: int + @return: List of dictionaries containing keypoints, scores, and descriptors. + @rtype: List[Dict[str, Any]] + """ norm = np.linalg.norm(feats, axis=1, keepdims=True) feats = feats / norm @@ -223,6 +280,17 @@ def detect_and_compute( def _match_mkpts( feats1: np.ndarray, feats2: np.ndarray, min_cossim: float = 0.62 ) -> Tuple[np.ndarray, np.ndarray]: + """Match features. + + @param feats1: Features 1. + @type feats1: np.ndarray + @param feats2: Features 2. + @type feats2: np.ndarray + @param min_cossim: Minimum cosine similarity. + @type min_cossim: float + @return: Matched features. + @rtype: Tuple[np.ndarray, np.ndarray] + """ cossim = feats1 @ feats2.T cossim_t = feats2 @ feats1.T match12 = np.argmax(cossim, axis=1) @@ -246,6 +314,17 @@ def _match_mkpts( def match( result1: Dict[str, Any], result2: Dict[str, Any], min_cossim: float = -1 ) -> Tuple[np.ndarray, np.ndarray]: + """Match keypoints. + + @param result1: Result 1. + @type result1: Dict[str, Any] + @param result2: Result 2. + @type result2: Dict[str, Any] + @param min_cossim: Minimum cosine similarity. + @type min_cossim: float + @return: Matched keypoints. + @rtype: Tuple[np.ndarray, np.ndarray] + """ indexes1, indexes2 = _match_mkpts( result1["descriptors"], result2["descriptors"], diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 4ec06e8..2065cdd 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -8,11 +8,44 @@ class XFeatParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the XFeat model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + original_size : Tuple[float, float] + Original image size. + input_size : Tuple[float, float] + Input image size. + previous_results : np.ndarray + Previous results from the model. Previous results are used to match keypoints between two frames. + + Output Message/s + ---------------- + **Type**: dai.TrackedFeatures + + **Description**: TrackedFeatures message containing matched keypoints with the same ID. + + Error Handling + -------------- + **ValueError**: If the original image size is not specified. + """ + def __init__( self, original_size: Tuple[float, float] = None, input_size: Tuple[float, float] = (640, 352), ): + """Initializes the XFeatParser node. + + @param original_size: Original image size. + @type original_size: Tuple[float, float] + @param input_size: Input image size. + @type input_size: Tuple[float, float] + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -21,17 +54,22 @@ def __init__( self.previous_results = None def setOriginalSize(self, original_size): + """Sets the original image size. + + @param original_size: Original image size. + @type original_size: Tuple[float, float] + """ self.original_size = original_size def setInputSize(self, input_size): + """Sets the input image size. + + @param input_size: Input image size. + @type input_size: Tuple[float, float] + """ self.input_size = input_size def run(self): - """Postprocessing logic for XFeat model. - - Returns: - dai.MatchedPoints containing matched keypoints. - """ if self.original_size is None: raise ValueError("Original image size must be specified!") diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index 0bfe555..a2d1a4a 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -9,12 +9,43 @@ class YuNetParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the YuNet face detection model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected faces. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ---------------- + **Type**: ImgDetectionsWithKeypoints + + **Description**: Message containing bounding boxes, labels, confidence scores, and keypoints of detected faces. + """ + def __init__( self, score_threshold=0.6, nms_threshold=0.3, top_k=5000, ): + """Initializes the YuNetParser node. + + @param score_threshold: Confidence score threshold for detected faces. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -24,21 +55,30 @@ def __init__( self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected faces. + + @param threshold: Confidence score threshold for detected faces. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for YuNet model. + """Sets the maximum number of detections to keep. - Returns: - dai.ImgDetectionsWithKeypoints: Detections with keypoints. + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get()