From 5c3193b176596a1489c8a69a6102042bb85dc92b Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Wed, 24 Jul 2024 16:16:21 +0200 Subject: [PATCH 01/25] classification parser added --- .gitignore | 4 +- .../ml/messages/creators/classification.py | 77 +++++++++++++++++++ .../ml/parsers/classification_parser.py | 0 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 depthai_nodes/ml/messages/creators/classification.py create mode 100644 depthai_nodes/ml/parsers/classification_parser.py diff --git a/.gitignore b/.gitignore index 0111672..118ba12 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ __pycache__/ *.py[cod] *$py.class +tst.py + # C extensions *.so @@ -162,4 +164,4 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -.DS_Store \ No newline at end of file +.DS_Store diff --git a/depthai_nodes/ml/messages/creators/classification.py b/depthai_nodes/ml/messages/creators/classification.py new file mode 100644 index 0000000..bb422cf --- /dev/null +++ b/depthai_nodes/ml/messages/creators/classification.py @@ -0,0 +1,77 @@ +import math + +import cv2 +import depthai as dai +import numpy as np + +class ClassificationParser(dai.node.ThreadedHostNode): + def __init__(self, labels: list = [], threshold: float = 0.5, top_k: int = 1): + dai.node.ThreadedHostNode.__init__(self) + self.input = dai.Node.Input(self) + self.out = dai.Node.Output(self) + self.threshold = threshold + self.labels = np.array(labels) + self.top_k = top_k + self.nr_classes = len(labels) + + def setLabels(self, labels): + self.labels = labels + self.nr_classes = len(labels) + + def setThreshold(self, threshold): + self.threshold = threshold + + def setTopK(self, top_k): + self.top_k = top_k + + + def run(self): + """ Postprocessing logic for Classification model. + + Parameters + ---------- + labels : list + List of class labels. + threshold : float + Minimum confidence threshold for a class to be considered valid. + top_k : int + Number of top classes to return. + + Returns + ------- + result: ndarray + 2D array containing top k classes and (optionally) their scores. + + """ + + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + output_layer_names = output.getAllLayerNames() + if len(output_layer_names) != 1: + raise ValueError(f"Expected 1 output layer, got {len(output_layer_names)}.") + + scores = output.getTensor(output_layer_names[0]) + scores = np.array(scores).flatten() + + if len(scores) != self.nr_classes: + raise ValueError(f"Expected {self.nr_classes} scores, got {len(scores)}.") + + scores = scores[scores >= self.threshold] + + top_k_args = np.argsort(scores)[::-1][:self.top_k] + top_k_scores = scores[top_k_args] + + classes = np.expand_dims(top_k_scores, axis=1) + if self.labels: + top_k_labels = self.labels[top_k_args] + classes = np.vstack((top_k_labels, top_k_scores)).T + + # make message + + + + diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py new file mode 100644 index 0000000..e69de29 From d252de84dc36ae7c964719dbfb4967fcfe3f8ac6 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Fri, 26 Jul 2024 16:05:29 +0200 Subject: [PATCH 02/25] added classification parser --- .gitignore | 2 +- .../ml/messages/creators/__init__.py | 3 +- .../ml/messages/creators/classification.py | 77 --------- .../creators/classification_message.py | 13 ++ depthai_nodes/ml/parsers/__init__.py | 26 +-- .../ml/parsers/classification_parser.py | 148 ++++++++++++++++++ 6 files changed, 178 insertions(+), 91 deletions(-) delete mode 100644 depthai_nodes/ml/messages/creators/classification.py create mode 100644 depthai_nodes/ml/messages/creators/classification_message.py diff --git a/.gitignore b/.gitignore index 118ba12..2b27e9d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ __pycache__/ *$py.class tst.py - +*unit_test*.py # C extensions *.so diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index 6fb47e2..3d1c916 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -5,7 +5,7 @@ from .segmentation import create_segmentation_message from .thermal import create_thermal_message from .tracked_features import create_tracked_features_message - +from .classification_message import create_classification_message __all__ = [ "create_image_message", "create_segmentation_message", @@ -16,4 +16,5 @@ "create_tracked_features_message", "create_keypoints_message", "create_thermal_message", + "create_classification_message", ] diff --git a/depthai_nodes/ml/messages/creators/classification.py b/depthai_nodes/ml/messages/creators/classification.py deleted file mode 100644 index bb422cf..0000000 --- a/depthai_nodes/ml/messages/creators/classification.py +++ /dev/null @@ -1,77 +0,0 @@ -import math - -import cv2 -import depthai as dai -import numpy as np - -class ClassificationParser(dai.node.ThreadedHostNode): - def __init__(self, labels: list = [], threshold: float = 0.5, top_k: int = 1): - dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) - self.threshold = threshold - self.labels = np.array(labels) - self.top_k = top_k - self.nr_classes = len(labels) - - def setLabels(self, labels): - self.labels = labels - self.nr_classes = len(labels) - - def setThreshold(self, threshold): - self.threshold = threshold - - def setTopK(self, top_k): - self.top_k = top_k - - - def run(self): - """ Postprocessing logic for Classification model. - - Parameters - ---------- - labels : list - List of class labels. - threshold : float - Minimum confidence threshold for a class to be considered valid. - top_k : int - Number of top classes to return. - - Returns - ------- - result: ndarray - 2D array containing top k classes and (optionally) their scores. - - """ - - while self.isRunning(): - try: - output: dai.NNData = self.input.get() - except dai.MessageQueue.QueueException: - break # Pipeline was stopped - - output_layer_names = output.getAllLayerNames() - if len(output_layer_names) != 1: - raise ValueError(f"Expected 1 output layer, got {len(output_layer_names)}.") - - scores = output.getTensor(output_layer_names[0]) - scores = np.array(scores).flatten() - - if len(scores) != self.nr_classes: - raise ValueError(f"Expected {self.nr_classes} scores, got {len(scores)}.") - - scores = scores[scores >= self.threshold] - - top_k_args = np.argsort(scores)[::-1][:self.top_k] - top_k_scores = scores[top_k_args] - - classes = np.expand_dims(top_k_scores, axis=1) - if self.labels: - top_k_labels = self.labels[top_k_args] - classes = np.vstack((top_k_labels, top_k_scores)).T - - # make message - - - - diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py new file mode 100644 index 0000000..acf2a4f --- /dev/null +++ b/depthai_nodes/ml/messages/creators/classification_message.py @@ -0,0 +1,13 @@ +import depthai as dai +import numpy as np + +def create_classification_message(scores: np.array, labels: np.array = []) -> dai.ADatatype: + msg = dai.ADatatype() + + msg.labels = labels + msg.scores = scores + msg.combined_results = [] + if len(labels) == len(scores): + msg.combined_results = [[labels[i], scores[i]] for i in range(len(labels))] + + return msg diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py index cd546e1..6018512 100644 --- a/depthai_nodes/ml/parsers/__init__.py +++ b/depthai_nodes/ml/parsers/__init__.py @@ -1,15 +1,16 @@ -from .image_output import ImageOutputParser -from .keypoints import KeypointParser -from .mediapipe_hand_landmarker import MPHandLandmarkParser -from .mediapipe_palm_detection import MPPalmDetectionParser -from .mlsd import MLSDParser -from .monocular_depth import MonocularDepthParser -from .scrfd import SCRFDParser -from .segmentation import SegmentationParser -from .superanimal_landmarker import SuperAnimalParser -from .thermal_image import ThermalImageParser -from .xfeat import XFeatParser -from .yunet import YuNetParser +# from .image_output import ImageOutputParser +# from .keypoints import KeypointParser +# from .mediapipe_hand_landmarker import MPHandLandmarkParser +# from .mediapipe_palm_detection import MPPalmDetectionParser +# from .mlsd import MLSDParser +# from .monocular_depth import MonocularDepthParser +# from .scrfd import SCRFDParser +# from .segmentation import SegmentationParser +# from .superanimal_landmarker import SuperAnimalParser +# from .thermal_image import ThermalImageParser +# from .xfeat import XFeatParser +# from .yunet import YuNetParser +from .classification_parser import ClassificationParser __all__ = [ "ImageOutputParser", @@ -24,4 +25,5 @@ "MLSDParser", "XFeatParser", "ThermalImageParser", + "ClassificationParser", ] diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py index e69de29..bbfffe5 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification_parser.py @@ -0,0 +1,148 @@ +import math + +import cv2 +import depthai as dai +import numpy as np + +# from ..messages.creators import create_classification_message + +class ClassificationMessage(dai.ADatatype): + def __init__(self): + dai.ADatatype.__init__(self) + self.labels = [] + self.scores = [] + self.combined_results = [] + + def setLabels(self, labels): + self.labels = np.array(labels, dtype= np.str_) + + def setScores(self, scores): + self.scores = scores + + def setCombinedResults(self, combined_results): + self.combined_results = combined_results + + def getLabels(self): + return self.labels + + def getScores(self): + return self.scores + + def getCombinedResults(self): + return self.combined_results + + +def create_classification_message(scores: np.array, labels: np.array = []) -> dai.ADatatype: + # msg = dai.Buffer() + + # combined_results = list(scores) + # if len(labels) == len(scores): + # combined_results = [[labels[i], scores[i]] for i in range(len(labels))] + # print(combined_results) + # msg.setData(combined_results) + msg = ClassificationMessage() + msg.setLabels(labels) + msg.setScores(scores) + if len(labels) == len(scores): + combined_results = [[labels[i], scores[i]] for i in range(len(labels))] + msg.setCombinedResults(combined_results) + + return msg + +class ClassificationParser(dai.node.ThreadedHostNode): + def __init__(self, class_labels: list = [], top_k: int = 1, threshold: float = 0): + dai.node.ThreadedHostNode.__init__(self) + self.out = self.createOutput() + self.input = self.createInput() + # self.input = dai.Node.Input(self) + # self.out = dai.Node.Output(self) + self.threshold = threshold + self.class_labels = np.array(class_labels) + self.top_k = top_k + self.nr_classes = len(class_labels) + + self.checkTypes() + + def checkTypes(self): + if self.top_k > self.nr_classes and self.nr_classes != 0: + raise ValueError(f"Top k ({self.top_k}) is greater than number of classes ({self.nr_classes}).") + + if self.threshold < 0 or self.threshold >= 1: + raise ValueError(f"Threshold should be between 0 and 1, got {self.threshold}.") + + if self.top_k <= 0: + raise ValueError(f"Top k should be a positive integer, got {self.top_k}.") + + def setLabels(self, class_labels): + self.class_labels = class_labels + self.nr_classes = len(class_labels) + self.checkTypes() + + def setThreshold(self, threshold): + self.threshold = threshold + self.checkTypes() + + def setTopK(self, top_k): + self.top_k = top_k + self.checkTypes() + + + def run(self): + """ Postprocessing logic for Classification model. + + Parameters + ---------- + top_k : int + Number of classes to return. + class_labels : list + List of class labels. + threshold : float + Minimum confidence threshold for a class to be considered valid. + Not used by default. + + Returns + ------- + result: ndarray + 2D array containing top k classes and (optionally) their scores. + + """ + + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + output_layer_names = output.getAllLayerNames() + if len(output_layer_names) != 1: + raise ValueError(f"Expected 1 output layer, got {len(output_layer_names)}.") + + scores = output.getTensor(output_layer_names[0]) + scores = np.array(scores).flatten() + + if len(scores) != self.nr_classes and self.nr_classes != 0: + raise ValueError(f"Number of labels and scores mismatch. Provided {self.nr_classes} labels and {len(scores)} scores.") + + + top_k_args = np.argsort(scores)[::-1][:self.top_k] + top_k_scores = scores[top_k_args] + + top_k_scores = top_k_scores[top_k_scores >= self.threshold] + top_k_args = top_k_args[top_k_scores >= self.threshold] + + + # if len(top_k_scores) < self.top_k: + # raise ValueError(f"No scores meet criteria, list is empty.") + + top_k_labels = [] + if len(self.class_labels) > 0: + top_k_labels = self.class_labels[top_k_args] + + + msg = create_classification_message(top_k_scores, top_k_labels) + + self.out.send(msg) + + + + From bf144f2d910fee8b28df518fa9d10b5795754fa6 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Mon, 29 Jul 2024 10:44:26 +0200 Subject: [PATCH 03/25] classification parser added --- .gitignore | 2 +- depthai_nodes/ml/messages/__init__.py | 2 + depthai_nodes/ml/messages/classification.py | 24 +++ .../creators/classification_message.py | 23 ++- depthai_nodes/ml/parsers/__init__.py | 24 +-- .../ml/parsers/classification_parser.py | 163 ++++-------------- 6 files changed, 91 insertions(+), 147 deletions(-) create mode 100644 depthai_nodes/ml/messages/classification.py diff --git a/.gitignore b/.gitignore index 2b27e9d..b30f2b8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ __pycache__/ *.py[cod] *$py.class - +*_old.py tst.py *unit_test*.py # C extensions diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py index 977b57b..d46c90b 100644 --- a/depthai_nodes/ml/messages/__init__.py +++ b/depthai_nodes/ml/messages/__init__.py @@ -1,3 +1,4 @@ +from .classification import ClassificationMessage from .img_detections import ImgDetectionsWithKeypoints, ImgDetectionWithKeypoints from .keypoints import HandKeypoints, Keypoints from .lines import Line, Lines @@ -9,4 +10,5 @@ "Keypoints", "Line", "Lines", + "ClassificationMessage", ] diff --git a/depthai_nodes/ml/messages/classification.py b/depthai_nodes/ml/messages/classification.py new file mode 100644 index 0000000..f45035f --- /dev/null +++ b/depthai_nodes/ml/messages/classification.py @@ -0,0 +1,24 @@ +from typing import List + +import depthai as dai + + +class ClassificationMessage(dai.Buffer): + def __init__(self): + dai.Buffer.__init__(self) + self._sortedClasses = [] + + @property + def sortedClasses(self) -> List: + return self._sortedClasses + + @sortedClasses.setter + def sortedClasses(self, value: List): + if not isinstance(value, list): + raise TypeError("Sorted classes must be a list.") + for item in value: + if not isinstance(item, list) or len(item) != 2: + raise TypeError("Each sorted class must be a list of 2 elements.") + if not isinstance(item[0], str): + raise TypeError("Class name must be a string.") + self._sortedClasses = value diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py index acf2a4f..02ee9fd 100644 --- a/depthai_nodes/ml/messages/creators/classification_message.py +++ b/depthai_nodes/ml/messages/creators/classification_message.py @@ -1,13 +1,18 @@ import depthai as dai import numpy as np -def create_classification_message(scores: np.array, labels: np.array = []) -> dai.ADatatype: - msg = dai.ADatatype() - - msg.labels = labels - msg.scores = scores - msg.combined_results = [] - if len(labels) == len(scores): - msg.combined_results = [[labels[i], scores[i]] for i in range(len(labels))] - +from ...messages import ClassificationMessage + + +def create_classification_message(scores, classes) -> dai.Buffer: + msg = ClassificationMessage() + + sorted_args = np.argsort(scores)[::-1] + scores = scores[sorted_args] + classes = classes[sorted_args] + + msg.sortedClasses = [ + [str(classes[i]), float(scores[i])] for i in range(len(classes)) + ] + return msg diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py index 6018512..89386a4 100644 --- a/depthai_nodes/ml/parsers/__init__.py +++ b/depthai_nodes/ml/parsers/__init__.py @@ -1,16 +1,16 @@ -# from .image_output import ImageOutputParser -# from .keypoints import KeypointParser -# from .mediapipe_hand_landmarker import MPHandLandmarkParser -# from .mediapipe_palm_detection import MPPalmDetectionParser -# from .mlsd import MLSDParser -# from .monocular_depth import MonocularDepthParser -# from .scrfd import SCRFDParser -# from .segmentation import SegmentationParser -# from .superanimal_landmarker import SuperAnimalParser -# from .thermal_image import ThermalImageParser -# from .xfeat import XFeatParser -# from .yunet import YuNetParser from .classification_parser import ClassificationParser +from .image_output import ImageOutputParser +from .keypoints import KeypointParser +from .mediapipe_hand_landmarker import MPHandLandmarkParser +from .mediapipe_palm_detection import MPPalmDetectionParser +from .mlsd import MLSDParser +from .monocular_depth import MonocularDepthParser +from .scrfd import SCRFDParser +from .segmentation import SegmentationParser +from .superanimal_landmarker import SuperAnimalParser +from .thermal_image import ThermalImageParser +from .xfeat import XFeatParser +from .yunet import YuNetParser __all__ = [ "ImageOutputParser", diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py index bbfffe5..46d711c 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification_parser.py @@ -1,148 +1,61 @@ -import math - -import cv2 import depthai as dai import numpy as np -# from ..messages.creators import create_classification_message - -class ClassificationMessage(dai.ADatatype): - def __init__(self): - dai.ADatatype.__init__(self) - self.labels = [] - self.scores = [] - self.combined_results = [] - - def setLabels(self, labels): - self.labels = np.array(labels, dtype= np.str_) - - def setScores(self, scores): - self.scores = scores - - def setCombinedResults(self, combined_results): - self.combined_results = combined_results - - def getLabels(self): - return self.labels - - def getScores(self): - return self.scores - - def getCombinedResults(self): - return self.combined_results - +from ..messages.creators import create_classification_message -def create_classification_message(scores: np.array, labels: np.array = []) -> dai.ADatatype: - # msg = dai.Buffer() - - # combined_results = list(scores) - # if len(labels) == len(scores): - # combined_results = [[labels[i], scores[i]] for i in range(len(labels))] - # print(combined_results) - # msg.setData(combined_results) - msg = ClassificationMessage() - msg.setLabels(labels) - msg.setScores(scores) - if len(labels) == len(scores): - combined_results = [[labels[i], scores[i]] for i in range(len(labels))] - msg.setCombinedResults(combined_results) - - return msg class ClassificationParser(dai.node.ThreadedHostNode): - def __init__(self, class_labels: list = [], top_k: int = 1, threshold: float = 0): - dai.node.ThreadedHostNode.__init__(self) - self.out = self.createOutput() - self.input = self.createInput() - # self.input = dai.Node.Input(self) - # self.out = dai.Node.Output(self) - self.threshold = threshold - self.class_labels = np.array(class_labels) - self.top_k = top_k - self.nr_classes = len(class_labels) - - self.checkTypes() - - def checkTypes(self): - if self.top_k > self.nr_classes and self.nr_classes != 0: - raise ValueError(f"Top k ({self.top_k}) is greater than number of classes ({self.nr_classes}).") - - if self.threshold < 0 or self.threshold >= 1: - raise ValueError(f"Threshold should be between 0 and 1, got {self.threshold}.") - - if self.top_k <= 0: - raise ValueError(f"Top k should be a positive integer, got {self.top_k}.") - - def setLabels(self, class_labels): - self.class_labels = class_labels - self.nr_classes = len(class_labels) - self.checkTypes() - - def setThreshold(self, threshold): - self.threshold = threshold - self.checkTypes() - - def setTopK(self, top_k): - self.top_k = top_k - self.checkTypes() + """Postprocessing logic for Classification model. + Parameters + ---------- + classes : list + List of class labels. + is_softmax : bool = True + True, if output is already softmaxed. - def run(self): - """ Postprocessing logic for Classification model. + Returns + ------- + ClassificationMessage: A dai.Buffer object with atribute `sortedClasses` of classes and scores. + """ - Parameters - ---------- - top_k : int - Number of classes to return. - class_labels : list - List of class labels. - threshold : float - Minimum confidence threshold for a class to be considered valid. - Not used by default. - - Returns - ------- - result: ndarray - 2D array containing top k classes and (optionally) their scores. + def __init__(self, classes: list, is_softmax: bool = True): + dai.node.ThreadedHostNode.__init__(self) + self.out = self.createOutput() + self.input = self.createInput() + self.classes = np.array(classes) + self.n_classes = len(classes) + self.is_softmax = is_softmax - """ + def setClasses(self, classes): + self.classes = classes + self.n_classes = len(classes) + def run(self) -> dai.Buffer: while self.isRunning(): try: output: dai.NNData = self.input.get() except dai.MessageQueue.QueueException: - break # Pipeline was stopped - + break # Pipeline was stopped + output_layer_names = output.getAllLayerNames() if len(output_layer_names) != 1: - raise ValueError(f"Expected 1 output layer, got {len(output_layer_names)}.") - + raise ValueError( + f"Expected 1 output layer, got {len(output_layer_names)}." + ) + scores = output.getTensor(output_layer_names[0]) scores = np.array(scores).flatten() - if len(scores) != self.nr_classes and self.nr_classes != 0: - raise ValueError(f"Number of labels and scores mismatch. Provided {self.nr_classes} labels and {len(scores)} scores.") - - - top_k_args = np.argsort(scores)[::-1][:self.top_k] - top_k_scores = scores[top_k_args] - - top_k_scores = top_k_scores[top_k_scores >= self.threshold] - top_k_args = top_k_args[top_k_scores >= self.threshold] - - - # if len(top_k_scores) < self.top_k: - # raise ValueError(f"No scores meet criteria, list is empty.") - - top_k_labels = [] - if len(self.class_labels) > 0: - top_k_labels = self.class_labels[top_k_args] - + if not self.is_softmax: + ex = np.exp(scores) + scores = ex / np.sum(ex) - msg = create_classification_message(top_k_scores, top_k_labels) - - self.out.send(msg) - + if len(scores) != self.n_classes and self.n_classes != 0: + raise ValueError( + f"Number of labels and scores mismatch. Provided {self.n_classes} labels and {len(scores)} scores." + ) - + msg = create_classification_message(scores, self.classes) + self.out.send(msg) From 0cac7e6efc34ec9f15062dcb5ae42ec1388a1ad9 Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Mon, 29 Jul 2024 11:07:18 +0200 Subject: [PATCH 04/25] Added import to __init__ --- depthai_nodes/ml/messages/creators/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index 3d1c916..f184ffd 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -1,3 +1,4 @@ +from .classification_message import create_classification_message from .depth import create_depth_message from .detection import create_detection_message, create_line_detection_message from .image import create_image_message @@ -5,7 +6,7 @@ from .segmentation import create_segmentation_message from .thermal import create_thermal_message from .tracked_features import create_tracked_features_message -from .classification_message import create_classification_message + __all__ = [ "create_image_message", "create_segmentation_message", From 0ecc4c5681c129c7495898f93d88d09f6a19a17d Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Mon, 29 Jul 2024 11:17:10 +0200 Subject: [PATCH 05/25] Removed custom files from git .gitignore --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index b30f2b8..c772aba 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,6 @@ __pycache__/ *.py[cod] *$py.class -*_old.py -tst.py -*unit_test*.py # C extensions *.so From 0601059f2e3ba8ddebe89363b4d59574d1b1937e Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Mon, 29 Jul 2024 15:34:35 +0200 Subject: [PATCH 06/25] Updated docstrings and added error raises --- depthai_nodes/ml/messages/__init__.py | 4 +- depthai_nodes/ml/messages/classification.py | 22 +++---- .../creators/classification_message.py | 61 +++++++++++++++++-- .../ml/parsers/classification_parser.py | 25 +++++--- 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py index d46c90b..81d5876 100644 --- a/depthai_nodes/ml/messages/__init__.py +++ b/depthai_nodes/ml/messages/__init__.py @@ -1,4 +1,4 @@ -from .classification import ClassificationMessage +from .classification import Classifications from .img_detections import ImgDetectionsWithKeypoints, ImgDetectionWithKeypoints from .keypoints import HandKeypoints, Keypoints from .lines import Line, Lines @@ -10,5 +10,5 @@ "Keypoints", "Line", "Lines", - "ClassificationMessage", + "Classifications", ] diff --git a/depthai_nodes/ml/messages/classification.py b/depthai_nodes/ml/messages/classification.py index f45035f..97dc37c 100644 --- a/depthai_nodes/ml/messages/classification.py +++ b/depthai_nodes/ml/messages/classification.py @@ -3,22 +3,22 @@ import depthai as dai -class ClassificationMessage(dai.Buffer): +class Classifications(dai.Buffer): def __init__(self): dai.Buffer.__init__(self) - self._sortedClasses = [] + self._classes = [] @property - def sortedClasses(self) -> List: - return self._sortedClasses + def classes(self) -> List: + return self._classes - @sortedClasses.setter - def sortedClasses(self, value: List): + @classes.setter + def classes(self, value: List): if not isinstance(value, list): - raise TypeError("Sorted classes must be a list.") + raise TypeError("Must be a list.") for item in value: if not isinstance(item, list) or len(item) != 2: - raise TypeError("Each sorted class must be a list of 2 elements.") - if not isinstance(item[0], str): - raise TypeError("Class name must be a string.") - self._sortedClasses = value + raise TypeError( + "Each item must be a list of [class_name, probability_score], got {item}." + ) + self._classes = value diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py index 02ee9fd..c1eb458 100644 --- a/depthai_nodes/ml/messages/creators/classification_message.py +++ b/depthai_nodes/ml/messages/creators/classification_message.py @@ -1,18 +1,67 @@ import depthai as dai import numpy as np -from ...messages import ClassificationMessage +from ...messages import Classifications -def create_classification_message(scores, classes) -> dai.Buffer: - msg = ClassificationMessage() +def create_classification_message( + scores: np.ndarray, classes: np.ndarray = None +) -> dai.Buffer: + """Create a message for classification. The message contains the class names and + their respective scores, sorted in descending order of scores. + + Parameters + ---------- + scores : np.ndarray + A numpy array of shape (n_classes,) containing the probability score of each class. + + classes : np.ndarray = [] + A numpy array of class names. If not provided, class names are set to None. + + + Returns + -------- + Classifications : dai.Buffer + A message with parameter `classes` which is a list of shape (n_classes, 2) + where each item is [class_name, probability_score]. + If no class names are provided, class_name is set to None. + """ + if classes is None: + classes = np.array([]) + + if len(scores) == 0: + raise ValueError("Scores should not be empty.") + if len(scores) != len(scores.flatten()): + raise ValueError(f"Scores should be a 1D array, got {scores.shape}.") + + scores = scores.flatten() + + if not np.issubdtype(scores.dtype, np.floating): + raise ValueError(f"Scores should be of type float, got {scores.dtype}.") + + print("scores", np.sum(scores)) + if not np.isclose(np.sum(scores), 1.0, atol=1e-1): + raise ValueError(f"Scores should sum to 1, got {np.sum(scores)}.") + + if len(scores) != len(classes) and len(classes) != 0: + raise ValueError( + f"Number of labels and scores mismatch. Provided {len(scores)} scores and {len(classes)} class names." + ) + + classification_msg = Classifications() sorted_args = np.argsort(scores)[::-1] scores = scores[sorted_args] - classes = classes[sorted_args] - msg.sortedClasses = [ + if len(classes) == 0: + classification_msg.classes = [ + [None, float(scores[i])] for i in range(len(scores)) + ] + return classification_msg + + classes = classes[sorted_args] + classification_msg.classes = [ [str(classes[i]), float(scores[i])] for i in range(len(classes)) ] - return msg + return classification_msg diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py index 46d711c..62ef02c 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification_parser.py @@ -9,21 +9,26 @@ class ClassificationParser(dai.node.ThreadedHostNode): Parameters ---------- - classes : list + classes : list[str] List of class labels. is_softmax : bool = True True, if output is already softmaxed. Returns ------- - ClassificationMessage: A dai.Buffer object with atribute `sortedClasses` of classes and scores. + Classifications: dai.Buffer + An object with parameter `classes`, which is a list of items like [class_name, probability_score]. + If no class names are provided, class_name is set to None. """ - def __init__(self, classes: list, is_softmax: bool = True): + def __init__(self, classes: list[str] = None, is_softmax: bool = True): dai.node.ThreadedHostNode.__init__(self) self.out = self.createOutput() self.input = self.createInput() - self.classes = np.array(classes) + if classes is None: + self.classes = [] + else: + self.classes = np.array(classes) self.n_classes = len(classes) self.is_softmax = is_softmax @@ -31,7 +36,7 @@ def setClasses(self, classes): self.classes = classes self.n_classes = len(classes) - def run(self) -> dai.Buffer: + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() @@ -47,15 +52,15 @@ def run(self) -> dai.Buffer: scores = output.getTensor(output_layer_names[0]) scores = np.array(scores).flatten() - if not self.is_softmax: - ex = np.exp(scores) - scores = ex / np.sum(ex) - if len(scores) != self.n_classes and self.n_classes != 0: raise ValueError( - f"Number of labels and scores mismatch. Provided {self.n_classes} labels and {len(scores)} scores." + f"Number of labels and scores mismatch. Provided {self.n_classes} class names and {len(scores)} scores." ) + if not self.is_softmax: + ex = np.exp(scores) + scores = ex / np.sum(ex) + msg = create_classification_message(scores, self.classes) self.out.send(msg) From 6a5ecfc7568fed8d336009a341f9795d3a58068b Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Mon, 29 Jul 2024 15:36:51 +0200 Subject: [PATCH 07/25] pre-commit fixes --- depthai_nodes/ml/messages/creators/classification_message.py | 1 + 1 file changed, 1 insertion(+) diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py index c1eb458..672ba68 100644 --- a/depthai_nodes/ml/messages/creators/classification_message.py +++ b/depthai_nodes/ml/messages/creators/classification_message.py @@ -31,6 +31,7 @@ def create_classification_message( if len(scores) == 0: raise ValueError("Scores should not be empty.") + if len(scores) != len(scores.flatten()): raise ValueError(f"Scores should be a 1D array, got {scores.shape}.") From 61b6613a6f3f4f8ea1f48f617e6500d246d64bfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:40:22 +0200 Subject: [PATCH 08/25] Epytext documentation. (#13) --- depthai_nodes/ml/messages/creators/depth.py | 21 +-- .../ml/messages/creators/detection.py | 65 ++++++--- depthai_nodes/ml/messages/creators/image.py | 11 +- .../ml/messages/creators/keypoints.py | 60 +++++--- .../ml/messages/creators/segmentation.py | 16 ++- depthai_nodes/ml/messages/creators/thermal.py | 18 +-- .../ml/messages/creators/tracked_features.py | 38 ++++-- depthai_nodes/ml/messages/img_detections.py | 43 ++++++ depthai_nodes/ml/messages/keypoints.py | 54 ++++++++ depthai_nodes/ml/messages/lines.py | 67 +++++++++ depthai_nodes/ml/parsers/image_output.py | 37 ++++- depthai_nodes/ml/parsers/keypoints.py | 53 ++++++- .../ml/parsers/mediapipe_hand_landmarker.py | 51 ++++++- .../ml/parsers/mediapipe_palm_detection.py | 59 +++++++- depthai_nodes/ml/parsers/mlsd.py | 57 +++++++- depthai_nodes/ml/parsers/monocular_depth.py | 36 ++++- depthai_nodes/ml/parsers/scrfd.py | 55 ++++++-- depthai_nodes/ml/parsers/segmentation.py | 40 +++++- .../ml/parsers/superanimal_landmarker.py | 43 +++++- depthai_nodes/ml/parsers/thermal_image.py | 24 +++- .../ml/parsers/utils/decode_detections.py | 40 +++--- depthai_nodes/ml/parsers/utils/denormalize.py | 12 +- depthai_nodes/ml/parsers/utils/medipipe.py | 129 +++++++++--------- depthai_nodes/ml/parsers/utils/mlsd.py | 30 ++++ depthai_nodes/ml/parsers/utils/superanimal.py | 19 +++ depthai_nodes/ml/parsers/utils/xfeat.py | 79 +++++++++++ depthai_nodes/ml/parsers/xfeat.py | 48 ++++++- depthai_nodes/ml/parsers/yunet.py | 52 ++++++- 28 files changed, 1009 insertions(+), 248 deletions(-) diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py index 4d89be1..dabb44f 100644 --- a/depthai_nodes/ml/messages/creators/depth.py +++ b/depthai_nodes/ml/messages/creators/depth.py @@ -9,16 +9,19 @@ def create_depth_message( depth_map: np.array, depth_type: Literal["relative", "metric"] ) -> dai.ImgFrame: - """Creates a depth message in the form of an ImgFrame using the provided depth map - and depth type. + """Create a DepthAI message for a depth map. - Args: - depth_map (np.array): A NumPy array representing the depth map with shape (CHW or HWC). - depth_type (Literal['relative', 'metric']): A string indicating the type of depth map. - It can either be 'relative' or 'metric'. - - Returns: - dai.ImgFrame: An ImgFrame object containing the depth information. + @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC). + @type depth_map: np.array + @param depth_type: A string indicating the type of depth map. It can either be + 'relative' or 'metric'. + @type depth_type: Literal['relative', 'metric'] + @return: An ImgFrame object containing the depth information. + @rtype: dai.ImgFrame + @raise ValueError: If the depth map is not a NumPy array. + @raise ValueError: If the depth map is not 3D. + @raise ValueError: If the depth map shape is not CHW or HWC. + @raise ValueError: If the depth type is not 'relative' or 'metric'. """ if not isinstance(depth_map, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/detection.py b/depthai_nodes/ml/messages/creators/detection.py index 1fa31c0..69215d5 100644 --- a/depthai_nodes/ml/messages/creators/detection.py +++ b/depthai_nodes/ml/messages/creators/detection.py @@ -17,19 +17,33 @@ def create_detection_message( labels: List[int] = None, keypoints: List[List[Tuple[float, float]]] = None, ) -> dai.ImgDetections: - """Create a message for the detection. The message contains the bounding boxes, - labels, and confidence scores of detected objects. If there are no labels or we only - have one class, we can set labels to None and all detections will have label set to - 0. - - Args: - bboxes (np.ndarray): Detected bounding boxes of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...]. - scores (np.ndarray): Confidence scores of detected objects of shape (N,). - labels (List[int], optional): Labels of detected objects of shape (N,). Defaults to None. - keypoints (List[List[Tuple[float, float]]], optional): Keypoints of detected objects of shape (N,2). Defaults to None. - - Returns: - dai.ImgDetections OR ImgDetectionsWithKeypoints: Message containing the bounding boxes, labels, confidence scores, and keypoints of detected objects. + """Create a DepthAI message for an object detection. + + @param bbox: Bounding boxes of detected objects of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...]. + @type bbox: np.ndarray + @param scores: Confidence scores of detected objects of shape (N,). + @type scores: np.ndarray + @param labels: Labels of detected objects of shape (N,). + @type labels: List[int] + @param keypoints: Keypoints of detected objects of shape (N,2). + @type keypoints: Optional[List[List[Tuple[float, float]]]] + + @return: Message containing the bounding boxes, labels, confidence scores, and keypoints of detected objects. + @rtype: dai.ImgDetections OR ImgDetectionsWithKeypoints + + @raise ValueError: If the bboxes are not a numpy array. + @raise ValueError: If the bboxes are not of shape (N,4). + @raise ValueError: If the bboxes 2nd dimension is not of size 4. + @raise ValueError: If the bboxes are not in format [x_min, y_min, x_max, y_max] where xmin < xmax and ymin < ymax. + @raise ValueError: If the scores are not a numpy array. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the scores do not have the same length as bboxes. + @raise ValueError: If the labels are not a list. + @raise ValueError: If each label is not an integer. + @raise ValueError: If the labels do not have the same length as bboxes. + @raise ValueError: If the keypoints are not a list. + @raise ValueError: If each keypoint pair is not a tuple of two floats. + @raise ValueError: If the keypoints do not have the same length as bboxes. """ # checks for bboxes @@ -122,15 +136,22 @@ def create_detection_message( def create_line_detection_message(lines: np.ndarray, scores: np.ndarray): - """Create a message for the line detection. The message contains the lines and - confidence scores of detected lines. - - Args: - lines (np.ndarray): Detected lines of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...]. - scores (np.ndarray): Confidence scores of detected lines of shape (N,). - - Returns: - dai.Lines: Message containing the lines and confidence scores of detected lines. + """Create a DepthAI message for a line detection. + + @param lines: Detected lines of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...]. + @type lines: np.ndarray + @param scores: Confidence scores of detected lines of shape (N,). + @type scores: np.ndarray + + @return: Message containing the lines and confidence scores of detected lines. + @rtype: Lines + + @raise ValueError: If the lines are not a numpy array. + @raise ValueError: If the lines are not of shape (N,4). + @raise ValueError: If the lines 2nd dimension is not of size E{4}. + @raise ValueError: If the scores are not a numpy array. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the scores do not have the same length as lines. """ # checks for lines diff --git a/depthai_nodes/ml/messages/creators/image.py b/depthai_nodes/ml/messages/creators/image.py index 5492468..da5f069 100644 --- a/depthai_nodes/ml/messages/creators/image.py +++ b/depthai_nodes/ml/messages/creators/image.py @@ -7,13 +7,16 @@ def create_image_message( image: np.array, is_bgr: bool = True, ) -> dai.ImgFrame: - """Create a depthai message for an image array. + """Create a DepthAI message for an image array. + @param image: Image array in HWC or CHW format. @type image: np.array - @ivar image: Image array in HWC or CHW format. + @param is_bgr: If True, the image is in BGR format. If False, the image is in RGB + format. Defaults to True. @type is_bgr: bool - @ivar is_bgr: If True, the image is in BGR format. If False, the image is in RGB - format. + @return: dai.ImgFrame object containing the image information. + @rtype: dai.ImgFrame + @raise ValueError: If the image shape is not CHW or HWC. """ if image.shape[0] in [1, 3]: diff --git a/depthai_nodes/ml/messages/creators/keypoints.py b/depthai_nodes/ml/messages/creators/keypoints.py index f86ab55..c62ac1c 100644 --- a/depthai_nodes/ml/messages/creators/keypoints.py +++ b/depthai_nodes/ml/messages/creators/keypoints.py @@ -12,17 +12,25 @@ def create_hand_keypoints_message( confidence: float, confidence_threshold: float, ) -> HandKeypoints: - """Create a message for the hand keypoint detection. The message contains the 3D - coordinates of the detected hand keypoints, handedness, and confidence score. - - Args: - hand_keypoints (np.ndarray): Detected hand keypoints of shape (N,3) meaning [...,[x, y, z],...]. - handedness (float): Handedness score of the detected hand (left or right). - confidence (float): Confidence score of the detected hand. - confidence_threshold (float): Confidence threshold for the overall hand. - - Returns: - HandKeypoints: Message containing the 3D coordinates of the detected hand keypoints, handedness, and confidence score. + """Create a DepthAI message for hand keypoints detection. + + @param hand_keypoints: Detected 3D hand keypoints of shape (N,3) meaning [...,[x, y, z],...]. + @type hand_keypoints: np.ndarray + @param handedness: Handedness score of the detected hand (left: < 0.5, right > 0.5). + @type handedness: float + @param confidence: Confidence score of the detected hand. + @type confidence: float + @param confidence_threshold: Confidence threshold for the present hand. + @type confidence_threshold: float + + @return: HandKeypoints message containing the detected hand keypoints, handedness, and confidence score. + @rtype: HandKeypoints + + @raise ValueError: If the hand_keypoints are not a numpy array. + @raise ValueError: If the hand_keypoints are not of shape (N,3). + @raise ValueError: If the hand_keypoints 2nd dimension is not of size E{3}. + @raise ValueError: If the handedness is not a float. + @raise ValueError: If the confidence is not a float. """ if not isinstance(hand_keypoints, np.ndarray): @@ -63,16 +71,26 @@ def create_keypoints_message( scores: Union[np.ndarray, List[float]] = None, confidence_threshold: float = None, ) -> Keypoints: - """Create a message for the keypoints. The message contains 2D or 3D coordinates of - the detected keypoints. - - Args: - keypoints (np.ndarray OR List[List[float]]): Detected keypoints of shape (N,2 or 3) meaning [...,[x, y],...] or [...,[x, y, z],...]. - scores (np.ndarray or List[float]): Confidence scores of the detected keypoints. - confidence_threshold (float): Confidence threshold for the keypoints. - - Returns: - Keypoints: Message containing 2D or 3D coordinates of the detected keypoints. + """Create a DepthAI message for the keypoints. + + @param keypoints: Detected 2D or 3D keypoints of shape (N,2 or 3) meaning [...,[x, y],...] or [...,[x, y, z],...]. + @type keypoints: np.ndarray or List[List[float]] + @param scores: Confidence scores of the detected keypoints. + @type scores: np.ndarray or List[float] + @param confidence_threshold: Confidence threshold of keypoint detections. + @type confidence_threshold: float + + @return: Keypoints message containing the detected keypoints. + @rtype: Keypoints + + @raise ValueError: If the keypoints are not a numpy array or list. + @raise ValueError: If the keypoints are not of shape (N,2 or 3). + @raise ValueError: If the keypoints 2nd dimension is not of size E{2} or E{3}. + @raise ValueError: If the scores are not a numpy array or list. + @raise ValueError: If the scores are not of shape (N,). + @raise ValueError: If the keypoints and scores do not have the same length. + @raise ValueError: If the confidence threshold is not a float. + @raise ValueError: If the confidence threshold is not provided when scores are provided. """ if not isinstance(keypoints, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/segmentation.py b/depthai_nodes/ml/messages/creators/segmentation.py index 386f38f..07928c6 100644 --- a/depthai_nodes/ml/messages/creators/segmentation.py +++ b/depthai_nodes/ml/messages/creators/segmentation.py @@ -3,14 +3,16 @@ def create_segmentation_message(x: np.array) -> dai.ImgFrame: - """Create a message for the segmentation node output. Input is of the shape (H, W, - 1). In the third dimesion we specify the class of the segmented objects. + """Create a DepthAI message for segmentation mask. - Args: - x (np.array): Input from the segmentation node. - - Returns: - dai.ImgFrame: Output segmentaion message in ImgFrame.Type.RAW8. + @param x: Segmentation map array of the shape (H, W, E{1}) where E{1} stands for the + class of the segmented objects. + @type x: np.array + @return: Output segmentaion message in ImgFrame.Type.RAW8. + @rtype: dai.ImgFrame + @raise ValueError: If the input is not a numpy array. + @raise ValueError: If the input is not 3D. + @raise ValueError: If the input 3rd dimension is not E{1}. """ if not isinstance(x, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/thermal.py b/depthai_nodes/ml/messages/creators/thermal.py index 2e82d41..8f0f0ae 100644 --- a/depthai_nodes/ml/messages/creators/thermal.py +++ b/depthai_nodes/ml/messages/creators/thermal.py @@ -3,14 +3,16 @@ def create_thermal_message(thermal_image: np.array) -> dai.ImgFrame: - """Creates a thermal image message in the form of an ImgFrame using the provided - thermal image array. - - Args: - thermal_image (np.array): A NumPy array representing the thermal image with shape (CHW or HWC). - - Returns: - dai.ImgFrame: An ImgFrame object containing the thermal information. + """Create a DepthAI message for thermal image. + + @param thermal_image: A NumPy array representing the thermal image with shape (CHW + or HWC). + @type thermal_image: np.array + @return: An ImgFrame object containing the thermal information. + @rtype: dai.ImgFrame + @raise ValueError: If the input is not a NumPy array. + @raise ValueError: If the input is not 3D. + @raise ValueError: If the input shape is not CHW or HWC. """ if not isinstance(thermal_image, np.ndarray): diff --git a/depthai_nodes/ml/messages/creators/tracked_features.py b/depthai_nodes/ml/messages/creators/tracked_features.py index 75fbfdb..7208ac7 100644 --- a/depthai_nodes/ml/messages/creators/tracked_features.py +++ b/depthai_nodes/ml/messages/creators/tracked_features.py @@ -5,14 +5,16 @@ def create_feature_point(x: float, y: float, id: int, age: int) -> dai.TrackedFeature: """Create a tracked feature point. - Args: - x (float): X coordinate of the feature point. - y (float): Y coordinate of the feature point. - id (int): ID of the feature point. - age (int): Age of the feature point. - - Returns: - dai.TrackedFeature: Tracked feature point. + @param x: X coordinate of the feature point. + @type x: float + @param y: Y coordinate of the feature point. + @type y: float + @param id: ID of the feature point. + @type id: int + @param age: Age of the feature point. + @type age: int + @return: Tracked feature point. + @rtype: dai.TrackedFeature """ feature = dai.TrackedFeature() @@ -27,14 +29,22 @@ def create_feature_point(x: float, y: float, id: int, age: int) -> dai.TrackedFe def create_tracked_features_message( reference_points: np.ndarray, target_points: np.ndarray ) -> dai.TrackedFeatures: - """Create a message for the tracked features. + """Create a DepthAI message for tracked features. + + @param reference_points: Reference points of shape (N,2) meaning [...,[x, y],...]. + @type reference_points: np.ndarray + @param target_points: Target points of shape (N,2) meaning [...,[x, y],...]. + @type target_points: np.ndarray - Args: - reference_points (np.ndarray): Reference points of shape (N,2) meaning [...,[x, y],...]. - target_points (np.ndarray): Target points of shape (N,2) meaning [...,[x, y],...]. + @return: Message containing the tracked features. + @rtype: dai.TrackedFeatures - Returns: - dai.TrackedFeatures: Message containing the tracked features. + @raise ValueError: If the reference_points are not a numpy array. + @raise ValueError: If the reference_points are not of shape (N,2). + @raise ValueError: If the reference_points 2nd dimension is not of size E{2}. + @raise ValueError: If the target_points are not a numpy array. + @raise ValueError: If the target_points are not of shape (N,2). + @raise ValueError: If the target_points 2nd dimension is not of size E{2}. """ if not isinstance(reference_points, np.ndarray): diff --git a/depthai_nodes/ml/messages/img_detections.py b/depthai_nodes/ml/messages/img_detections.py index ebd1c39..9c6114e 100644 --- a/depthai_nodes/ml/messages/img_detections.py +++ b/depthai_nodes/ml/messages/img_detections.py @@ -4,16 +4,37 @@ class ImgDetectionWithKeypoints(dai.ImgDetection): + """ImgDetectionWithKeypoints class for storing image detection with keypoints. + + Attributes + ---------- + keypoints: List[Tuple[float, float]] + Keypoints of the image detection. + """ + def __init__(self): + """Initializes the ImgDetectionWithKeypoints object.""" dai.ImgDetection.__init__(self) # TODO: change to super().__init__()? self._keypoints: List[Tuple[float, float]] = [] @property def keypoints(self) -> List[Tuple[float, float]]: + """Returns the keypoints. + + @return: List of keypoints. + @rtype: List[Tuple[float, float]] + """ return self._keypoints @keypoints.setter def keypoints(self, value: List[Tuple[Union[int, float], Union[int, float]]]): + """Sets the keypoints. + + @param value: List of keypoints. + @type value: List[Tuple[Union[int, float], Union[int, float]]] + @raise TypeError: If the keypoints are not a list. + @raise TypeError: If each keypoint is not a tuple of two floats or integers. + """ if not isinstance(value, list): raise TypeError("Keypoints must be a list") for item in value: @@ -29,16 +50,38 @@ def keypoints(self, value: List[Tuple[Union[int, float], Union[int, float]]]): class ImgDetectionsWithKeypoints(dai.Buffer): + """ImgDetectionsWithKeypoints class for storing image detections with keypoints. + + Attributes + ---------- + detections: List[ImgDetectionWithKeypoints] + Image detections with keypoints. + """ + def __init__(self): + """Initializes the ImgDetectionsWithKeypoints object.""" dai.Buffer.__init__(self) # TODO: change to super().__init__()? self._detections: List[ImgDetectionWithKeypoints] = [] @property def detections(self) -> List[ImgDetectionWithKeypoints]: + """Returns the image detections with keypoints. + + @return: List of image detections with keypoints. + @rtype: List[ImgDetectionWithKeypoints] + """ return self._detections @detections.setter def detections(self, value: List[ImgDetectionWithKeypoints]): + """Sets the image detections with keypoints. + + @param value: List of image detections with keypoints. + @type value: List[ImgDetectionWithKeypoints] + @raise TypeError: If the detections are not a list. + @raise TypeError: If each detection is not an instance of + ImgDetectionWithKeypoints. + """ if not isinstance(value, list): raise TypeError("Detections must be a list") for item in value: diff --git a/depthai_nodes/ml/messages/keypoints.py b/depthai_nodes/ml/messages/keypoints.py index 2d2d466..693759e 100644 --- a/depthai_nodes/ml/messages/keypoints.py +++ b/depthai_nodes/ml/messages/keypoints.py @@ -4,16 +4,37 @@ class Keypoints(dai.Buffer): + """Keypoints class for storing keypoints. + + Attributes + ---------- + keypoints: List[dai.Point3f] + List of dai.Point3f, each representing a keypoint. + """ + def __init__(self): + """Initializes the Keypoints object.""" super().__init__() self._keypoints: List[dai.Point3f] = [] @property def keypoints(self) -> List[dai.Point3f]: + """Returns the keypoints. + + @return: List of keypoints. + @rtype: List[dai.Point3f] + """ return self._keypoints @keypoints.setter def keypoints(self, value: List[dai.Point3f]): + """Sets the keypoints. + + @param value: List of keypoints. + @type value: List[dai.Point3f] + @raise TypeError: If the keypoints are not a list. + @raise TypeError: If each keypoint is not of type dai.Point3f. + """ if not isinstance(value, list): raise TypeError("keypoints must be a list.") for item in value: @@ -23,27 +44,60 @@ def keypoints(self, value: List[dai.Point3f]): class HandKeypoints(Keypoints): + """HandKeypoints class for storing hand keypoints. + + Attributes + ---------- + confidence: float + Confidence of the hand keypoints. + handdedness: float + Handedness of the hand keypoints. 0.0 for left hand and 1.0 for right hand. + """ + def __init__(self): + """Initializes the HandKeypoints object.""" Keypoints.__init__(self) self._confidence: float = 0.0 self._handdedness: float = 0.0 @property def confidence(self) -> float: + """Returns the confidence of the hand keypoints. + + @return: Confidence of the hand keypoints. + @rtype: float + """ return self._confidence @confidence.setter def confidence(self, value: float): + """Sets the confidence of the hand keypoints. + + @param value: Confidence of the hand keypoints. + @type value: float + @raise TypeError: If the confidence is not a float. + """ if not isinstance(value, float): raise TypeError("confidence must be a float.") self._confidence = value @property def handdedness(self) -> float: + """Returns the handdedness of the hand keypoints. + + @return: Handdedness of the hand keypoints. + @rtype: float + """ return self._handdedness @handdedness.setter def handdedness(self, value: float): + """Sets the handdedness of the hand keypoints. + + @param value: Handdedness of the hand keypoints. + @type value: float + @raise TypeError: If the handdedness is not a float. + """ if not isinstance(value, float): raise TypeError("handdedness must be a float.") self._handdedness = value diff --git a/depthai_nodes/ml/messages/lines.py b/depthai_nodes/ml/messages/lines.py index 832499a..3c63191 100644 --- a/depthai_nodes/ml/messages/lines.py +++ b/depthai_nodes/ml/messages/lines.py @@ -4,7 +4,20 @@ class Line(dai.Buffer): + """Line class for storing a line. + + Attributes + ---------- + start_point : dai.Point2f + Start point of the line with x and y coordinate. + end_point : dai.Point2f + End point of the line with x and y coordinate. + confidence : float + Confidence of the line. + """ + def __init__(self): + """Initializes the Line object.""" super().__init__() self._start_point: dai.Point2f = None self._end_point: dai.Point2f = None @@ -12,10 +25,21 @@ def __init__(self): @property def start_point(self) -> dai.Point2f: + """Returns the start point of the line. + + @return: Start point of the line. + @rtype: dai.Point2f + """ return self._start_point @start_point.setter def start_point(self, value: dai.Point2f): + """Sets the start point of the line. + + @param value: Start point of the line. + @type value: dai.Point2f + @raise TypeError: If the start point is not of type dai.Point2f. + """ if not isinstance(value, dai.Point2f): raise TypeError( f"start_point must be of type Point2f, instead got {type(value)}." @@ -24,10 +48,21 @@ def start_point(self, value: dai.Point2f): @property def end_point(self) -> dai.Point2f: + """Returns the end point of the line. + + @return: End point of the line. + @rtype: dai.Point2f + """ return self._end_point @end_point.setter def end_point(self, value: dai.Point2f): + """Sets the end point of the line. + + @param value: End point of the line. + @type value: dai.Point2f + @raise TypeError: If the end point is not of type dai.Point2f. + """ if not isinstance(value, dai.Point2f): raise TypeError( f"end_point must be of type Point2f, instead got {type(value)}." @@ -36,10 +71,21 @@ def end_point(self, value: dai.Point2f): @property def confidence(self) -> float: + """Returns the confidence of the line. + + @return: Confidence of the line. + @rtype: float + """ return self._confidence @confidence.setter def confidence(self, value: float): + """Sets the confidence of the line. + + @param value: Confidence of the line. + @type value: float + @raise TypeError: If the confidence is not of type float. + """ if not isinstance(value, float): raise TypeError( f"confidence must be of type float, instead got {type(value)}." @@ -48,16 +94,37 @@ def confidence(self, value: float): class Lines(dai.Buffer): + """Lines class for storing lines. + + Attributes + ---------- + lines : List[Line] + List of detected lines. + """ + def __init__(self): + """Initializes the Lines object.""" super().__init__() self._lines: List[Line] = [] @property def lines(self) -> List[Line]: + """Returns the lines. + + @return: List of lines. + @rtype: List[Line] + """ return self._lines @lines.setter def lines(self, value: List[Line]): + """Sets the lines. + + @param value: List of lines. + @type value: List[Line] + @raise TypeError: If the lines are not a list. + @raise TypeError: If each line is not of type Line. + """ if not isinstance(value, List): raise TypeError( f"lines must be of type List[Line], instead got {type(value)}." diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index 5b0d88c..534db76 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -5,7 +5,37 @@ class ImageOutputParser(dai.node.ThreadedHostNode): + """Parser class for image-to-image models (e.g. DnCNN3, zero-dce etc.) where the + output is a modifed image (denoised, enhanced etc.). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + output_is_bgr : bool + Flag indicating if the output image is in BGR (Blue-Green-Red) format. + + Output Message/s + ------- + **Type**: dai.ImgFrame + + **Description**: Image message containing the output image e.g. denoised or enhanced images. + + Error Handling + -------------- + **ValueError**: If the output is not 3- or 4-dimensional. + + **ValueError**: If the number of output layers is not 1. + """ + def __init__(self, output_is_bgr=False): + """Initializes ImageOutputParser node. + + @param output_is_bgr: Flag indicating if the output image is in BGR. + @type output_is_bgr: bool + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -13,15 +43,10 @@ def __init__(self, output_is_bgr=False): self.output_is_bgr = output_is_bgr def setBGROutput(self): + """Sets the flag indicating that output image is in BGR.""" self.output_is_bgr = True def run(self): - """Postprocessing logic for image-to-image models (e.g. DnCNN3, zero-dce etc.). - - Returns: - dai.ImgFrame: uint8, grayscale HW / colorscale HWC BGR image. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index 00cb130..4f9f8ce 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -5,11 +5,48 @@ class KeypointParser(dai.node.ThreadedHostNode): + """Parser class for 2D or 3D keypoints models. It expects one ouput layer containing + keypoints. The number of keypoints must be specified. Moreover, the keypoints are + normalized by a scale factor if provided. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + scale_factor : float + Scale factor to divide the keypoints by. + num_keypoints : int + Number of keypoints the model detects. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing 2D or 3D keypoints. + + Error Handling + -------------- + **ValueError**: If the number of keypoints is not specified. + + **ValueError**: If the number of coordinates per keypoint is not 2 or 3. + + **ValueError**: If the number of output layers is not 1. + """ + def __init__( self, scale_factor=1, num_keypoints=None, ): + """Initializes KeypointParser node. + + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + @param num_keypoints: Number of keypoints. + @type num_keypoints: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -18,18 +55,22 @@ def __init__( self.num_keypoints = num_keypoints def setScaleFactor(self, scale_factor): + """Sets the scale factor to divide the keypoints by. + + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + """ self.scale_factor = scale_factor def setNumKeypoints(self, num_keypoints): - self.num_keypoints = num_keypoints + """Sets the number of keypoints. - def run(self): - """Postprocessing logic for Keypoint model. - - Returns: - dai.Keypoints: num_keypoints keypoints (2D or 3D). + @param num_keypoints: Number of keypoints. + @type num_keypoints: int """ + self.num_keypoints = num_keypoints + def run(self): if self.num_keypoints is None: raise ValueError("Number of keypoints must be specified!") diff --git a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py index a55bef9..f358c60 100644 --- a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py +++ b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py @@ -5,7 +5,42 @@ class MPHandLandmarkParser(dai.node.ThreadedHostNode): + """Parser class for MediaPipe Hand landmark model. It parses the output of the + MediaPipe Hand landmark model containing 21 3D hand landmarks. The landmarks are + normalized and sent as a message to the output. Besides landmarks, the message + contains confidence score and handedness score (right or left hand). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for hand landmarks. + scale_factor : float + Scale factor to divide the landmarks by. + + Output Message/s + ---------------- + **Type**: HandLandmarks + + **Description**: HandLandmarks message containing normalized 21 3D landmarks, confidence score, and handedness score (right or left hand). + + See also + -------- + Official MediaPipe Hands solution: + https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker + """ + def __init__(self, score_threshold=0.5, scale_factor=224): + """Initialize MPHandLandmarkParser node. + + @param score_threshold: Confidence score threshold for hand landmarks. + @type score_threshold: float + @param scale_factor: Scale factor to divide the landmarks by. + @type scale_factor: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -14,18 +49,22 @@ def __init__(self, score_threshold=0.5, scale_factor=224): self.scale_factor = scale_factor def setScoreThreshold(self, threshold): + """Set the confidence score threshold for hand landmarks. + + @param threshold: Confidence score threshold for hand landmarks. + @type threshold: float + """ self.score_threshold = threshold def setScaleFactor(self, scale_factor): - self.scale_factor = scale_factor + """Set the scale factor to divide the landmarks by. - def run(self): - """Postprocessing logic for MediaPipe Hand landmark model. - - Returns: - HandLandmarks containing normalized 21 landmarks, confidence score, and handdedness score (right or left hand). + @param scale_factor: Scale factor to divide the landmarks by. + @type scale_factor: float """ + self.scale_factor = scale_factor + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index 3a22960..0c992ec 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -7,7 +7,45 @@ class MPPalmDetectionParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the Mediapipe Palm detection model. As the + result, the node sends out the detected hands in the form of a message containing + bounding boxes, labels, and confidence scores. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.Parser sends the processed network results to this output in form of messages. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected hands. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ------- + **Type**: dai.ImgDetections + + **Description**: ImgDetections message containing bounding boxes, labels, and confidence scores of detected hands. + + See also + -------- + Official MediaPipe Hands solution: + https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker + """ + def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + """Initializes the MPPalmDetectionParser node. + + @param score_threshold: Confidence score threshold for detected hands. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -17,21 +55,30 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected hands. + + @param threshold: Confidence score threshold for detected hands. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for MediPipe Hand detection model. + """Sets the maximum number of detections to keep. - Returns: - dai.ImgDetections containing bounding boxes, labels, and confidence scores of detected hands. + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/mlsd.py b/depthai_nodes/ml/parsers/mlsd.py index 2390a6d..e13dadf 100644 --- a/depthai_nodes/ml/parsers/mlsd.py +++ b/depthai_nodes/ml/parsers/mlsd.py @@ -6,12 +6,48 @@ class MLSDParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the M-LSD line detection model. The parser + is specifically designed to parse the output of the M-LSD model. As the result, the + node sends out the detected lines in the form of a message. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + nn_passthrough : Node.Input + Node's 2nd input. It accepts the passthrough of the Neural Network node. This is required for parsing the output of the M-LSD model. + It is a linking point to which the Neural Network's passthrough (network's input accutualy) is linked. + topk_n : int + Number of top candidates to keep. + score_thr : float + Confidence score threshold for detected lines. + dist_thr : float + Distance threshold for merging lines. + + Output Message/s + ---------------- + **Type**: LineDetections + + **Description**: LineDetections message containing detected lines and confidence scores. + """ + def __init__( self, topk_n=200, score_thr=0.10, dist_thr=20.0, ): + """Initializes the MLSDParser node. + + @param topk_n: Number of top candidates to keep. + @type topk_n: int + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.nn_passthrough = dai.Node.Input(self) @@ -21,21 +57,30 @@ def __init__( self.dist_thr = dist_thr def setTopK(self, topk_n): + """Sets the number of top candidates to keep. + + @param topk_n: Number of top candidates to keep. + @type topk_n: int + """ self.topk_n = topk_n def setScoreThreshold(self, score_thr): + """Sets the confidence score threshold for detected lines. + + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + """ self.score_thr = score_thr def setDistanceThreshold(self, dist_thr): - self.dist_thr = dist_thr + """Sets the distance threshold for merging lines. - def run(self): - """Postprocessing logic for M-LSD line detection model. - - Returns: - Normalized detected lines and confidence scores. + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float """ + self.dist_thr = dist_thr + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index afa9996..21e48d4 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -4,7 +4,34 @@ class MonocularDepthParser(dai.node.ThreadedHostNode): + """Parser class for monocular depth models (e.g. Depth Anything model). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + depth_type : str + Type of depth output (relative or metric). + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Depth message containing the depth map. The depth map is represented with dai.ImgFrame. + + Error Handling + -------------- + **ValueError**: If the number of output layers is not E{1}. + """ + def __init__(self, depth_type="relative"): + """Initializes the MonocularDepthParser node. + + @param depth_type: Type of depth output (relative or metric). + @type depth_type: str + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -12,19 +39,14 @@ def __init__(self, depth_type="relative"): self.depth_type = depth_type def setRelativeDepthType(self): + """Sets the depth type to relative.""" self.depth_type = "relative" def setMetricDepthType(self): + """Sets the depth type to metric.""" self.depth_type = "metric" def run(self): - """Postprocessing logic for a model with monocular depth output (e.g.Depth - Anything model). - - Returns: - dai.ImgFrame: uint16, HW depth map. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index c107dff..b2f2411 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -6,7 +6,38 @@ class SCRFDParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the SCRFD face detection model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected faces. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ---------------- + **Type**: dai.ImgDetections + + **Description**: ImgDetections message containing bounding boxes, labels, and confidence scores of detected faces. + """ + def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + """Initializes the SCRFDParser node. + + @param score_threshold: Confidence score threshold for detected faces. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -16,30 +47,36 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected faces. + + @param threshold: Confidence score threshold for detected faces. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for SCRFD model. + """Sets the maximum number of detections to keep. - Returns: - ... + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() except dai.MessageQueue.QueueException: break # Pipeline was stopped - print("SCRFD node") - print(f"Layer names = {output.getAllLayerNames()}") - score_8 = output.getTensor("score_8").flatten().astype(np.float32) score_16 = output.getTensor("score_16").flatten().astype(np.float32) score_32 = output.getTensor("score_32").flatten().astype(np.float32) diff --git a/depthai_nodes/ml/parsers/segmentation.py b/depthai_nodes/ml/parsers/segmentation.py index 2febe41..2f2fc2d 100644 --- a/depthai_nodes/ml/parsers/segmentation.py +++ b/depthai_nodes/ml/parsers/segmentation.py @@ -5,22 +5,50 @@ class SegmentationParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the segmentation models. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + background_class : bool + Whether to add additional layer for background. + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Segmentation message containing the segmentation mask. Every pixel belongs to exactly one class. + + Error Handling + -------------- + **ValueError**: If the number of output layers is not E{1}. + + **ValueError**: If the number of dimensions of the output tensor is not E{3}. + """ + def __init__(self, background_class=False): + """Initializes the SegmentationParser node. + + @param background_class: Whether to add additional layer for background. + @type background_class: bool + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) self.background_class = background_class def setBackgroundClass(self, background_class): - self.background_class = background_class - - def run(self): - """Postprocessing logic for Segmentation model. + """Sets the background class. - Returns: - Segmenation mask with classes given by the model and background class 0. + @param background_class: Whether to add additional layer for background. + @type background_class: bool """ + self.background_class = background_class + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/superanimal_landmarker.py b/depthai_nodes/ml/parsers/superanimal_landmarker.py index f417aff..48fd400 100644 --- a/depthai_nodes/ml/parsers/superanimal_landmarker.py +++ b/depthai_nodes/ml/parsers/superanimal_landmarker.py @@ -6,11 +6,38 @@ class SuperAnimalParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the SuperAnimal landmark model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected keypoints. + scale_factor : float + Scale factor to divide the keypoints by. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing detected keypoints that exceeds confidence threshold. + """ + def __init__( self, score_threshold=0.5, scale_factor=256, ): + """Initializes the SuperAnimalParser node. + + @param score_threshold: Confidence score threshold for detected keypoints. + @type score_threshold: float + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -19,18 +46,22 @@ def __init__( self.scale_factor = scale_factor def setScoreThreshold(self, threshold): + """Sets the confidence score threshold for detected keypoints. + + @param threshold: Confidence score threshold for detected keypoints. + @type threshold: float + """ self.score_threshold = threshold def setScaleFactor(self, scale_factor): - self.scale_factor = scale_factor - - def run(self): - """Postprocessing logic for SuperAnimal landmark model. + """Sets the scale factor to divide the keypoints by. - Returns: - dai.Keypoints: Max 39 keypoints detected on the quadrupedal animal. + @param scale_factor: Scale factor to divide the keypoints by. + @type scale_factor: float """ + self.scale_factor = scale_factor + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/thermal_image.py b/depthai_nodes/ml/parsers/thermal_image.py index 62f716f..44ed955 100644 --- a/depthai_nodes/ml/parsers/thermal_image.py +++ b/depthai_nodes/ml/parsers/thermal_image.py @@ -4,18 +4,30 @@ class ThermalImageParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of models with thermal image output (e.g. + UGSR-FA). + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + + Output Message/s + ---------------- + **Type**: dai.ImgFrame + + **Description**: Thermal message containing the thermal image. + """ + def __init__(self): + """Initializes the ThermalImageParser node.""" dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) def run(self): - """Postprocessing logic for a model with thermal image output (e.g. UGSR-FA). - - Returns: - dai.ImgFrame: uint16, HW thermal image. - """ - while self.isRunning(): try: output: dai.NNData = self.input.get() diff --git a/depthai_nodes/ml/parsers/utils/decode_detections.py b/depthai_nodes/ml/parsers/utils/decode_detections.py index 2deb838..d10b85d 100644 --- a/depthai_nodes/ml/parsers/utils/decode_detections.py +++ b/depthai_nodes/ml/parsers/utils/decode_detections.py @@ -14,23 +14,31 @@ def decode_detections( ) -> List[Dict[str, Any]]: """Decode the detections from neural network output tensors. - Args: - input_size (float): The input size of the model that produced the detections, (width, height). - stride (int): The stride used in the detection grid. - rows (int): Number of rows in the detection grid. - cols (int): Number of columns in the detection grid. - score_threshold (float): Minimum score threshold for a detection to be considered valid. - cls (np.ndarray): 2D array of class scores for each grid cell, shape (grid_size, num_classes). - obj (np.ndarray): 1D array of objectness scores for each grid cell, shape (grid_size,). - bbox (np.ndarray): 2D array of bounding box coordinates, shape (grid_size, 4). - kps (np.ndarray): 2D array of keypoint coordinates, shape (grid_size, num_keypoints * 2). + @param input_size: The input size of the model that produced the detections, (width, height). + @type input_size: float + @param stride: The stride used in the detection grid. + @type stride: int + @param rows: Number of rows in the detection grid. + @type rows: int + @param cols: Number of columns in the detection grid. + @type cols: int + @param score_threshold: Minimum score threshold for a detection to be considered valid. + @type score_threshold: float + @param cls: 2D array of class scores for each grid cell, shape (grid_size, num_classes). + @type cls: np.ndarray + @param obj: 1D array of objectness scores for each grid cell, shape (grid_size,). + @type obj: np.ndarray + @param bbox: 2D array of bounding box coordinates, shape (grid_size, 4). + @type bbox: np.ndarray + @param kps: 2D array of keypoint coordinates, shape (grid_size, num_keypoints * 2). + @type kps: np.ndarray - Returns: - List[Dict[str, Any]]: A list of detections, where each detection is a dictionary containing: - - "bbox": [x1, y1, width, height] (relative bounding box coordinates) - - "label": int (class label) - - "keypoints": List[float] (relative keypoint coordinates) - - "score": float (detection score) + @return: A list of detections, where each detection is a dictionary containing: + - "bbox": [x1, y1, width, height] (relative bounding box coordinates) + - "label": int (class label) + - "keypoints": List[float] (relative keypoint coordinates) + - "score": float (detection score) + @rtype: List[Dict[str, Any]] """ input_width, input_height = input_size diff --git a/depthai_nodes/ml/parsers/utils/denormalize.py b/depthai_nodes/ml/parsers/utils/denormalize.py index 7455403..148cfa7 100644 --- a/depthai_nodes/ml/parsers/utils/denormalize.py +++ b/depthai_nodes/ml/parsers/utils/denormalize.py @@ -4,12 +4,12 @@ def unnormalize_image(image, normalize=True): """Un-normalize an image tensor by scaling it to the [0, 255] range. - Args: - image (np.ndarray): The normalized image tensor of shape (H, W, C) or (C, H, W). - normalize (bool, optional): Whether to normalize the image tensor. Defaults to True. - - Returns: - np.ndarray: The un-normalized image. + @param image: The normalized image tensor of shape (H, W, C) or (C, H, W). + @type image: np.ndarray + @param normalize: Whether to normalize the image tensor. Defaults to True. + @type normalize: bool + @return: The un-normalized image. + @rtype: np.ndarray """ # Normalize the image tensor to the range [0, 1] if normalize: diff --git a/depthai_nodes/ml/parsers/utils/medipipe.py b/depthai_nodes/ml/parsers/utils/medipipe.py index e298659..8d0234b 100644 --- a/depthai_nodes/ml/parsers/utils/medipipe.py +++ b/depthai_nodes/ml/parsers/utils/medipipe.py @@ -9,9 +9,6 @@ License: MIT License -MIT License ------------ - Copyright (c) [2021] [geax] """ @@ -166,69 +163,69 @@ def generate_handtracker_anchors(input_size_width, input_size_height): def decode_bboxes(score_thresh, scores, bboxes, anchors, scale=128, best_only=False): - """Wi, hi : NN input shape - mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc # Decodes - the detection tensors generated by the model, based on # the SSD anchors and the - specification in the options, into a vector of # detections. Each detection - describes a detected object. - - https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt : - node { - calculator: "TensorsToDetectionsCalculator" - input_stream: "TENSORS:detection_tensors" - input_side_packet: "ANCHORS:anchors" - output_stream: "DETECTIONS:unfiltered_detections" - options: { - [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { - num_classes: 1 - num_boxes: 896 - num_coords: 18 - box_coord_offset: 0 - keypoint_coord_offset: 4 - num_keypoints: 7 - num_values_per_keypoint: 2 - sigmoid_score: true - score_clipping_thresh: 100.0 - reverse_output_order: true - - x_scale: 128.0 - y_scale: 128.0 - h_scale: 128.0 - w_scale: 128.0 - min_score_thresh: 0.5 - } - } - } - node { - calculator: "TensorsToDetectionsCalculator" - input_stream: "TENSORS:detection_tensors" - input_side_packet: "ANCHORS:anchors" - output_stream: "DETECTIONS:unfiltered_detections" - options: { - [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { - num_classes: 1 - num_boxes: 2016 - num_coords: 18 - box_coord_offset: 0 - keypoint_coord_offset: 4 - num_keypoints: 7 - num_values_per_keypoint: 2 - sigmoid_score: true - score_clipping_thresh: 100.0 - reverse_output_order: true - - x_scale: 192.0 - y_scale: 192.0 - w_scale: 192.0 - h_scale: 192.0 - min_score_thresh: 0.5 - } - } - } - - scores: shape = [number of anchors 896 or 2016] - bboxes: shape = [ number of anchors x 18], 18 = 4 (bounding box : (cx,cy,w,h) + 14 (7 palm keypoints) - """ + # Wi, hi : NN input shape + # mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator.cc # Decodes + # the detection tensors generated by the model, based on # the SSD anchors and the + # specification in the options, into a vector of # detections. Each detection + # describes a detected object. + + # https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt : + # node { + # calculator: "TensorsToDetectionsCalculator" + # input_stream: "TENSORS:detection_tensors" + # input_side_packet: "ANCHORS:anchors" + # output_stream: "DETECTIONS:unfiltered_detections" + # options: { + # [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + # num_classes: 1 + # num_boxes: 896 + # num_coords: 18 + # box_coord_offset: 0 + # keypoint_coord_offset: 4 + # num_keypoints: 7 + # num_values_per_keypoint: 2 + # sigmoid_score: true + # score_clipping_thresh: 100.0 + # reverse_output_order: true + + # x_scale: 128.0 + # y_scale: 128.0 + # h_scale: 128.0 + # w_scale: 128.0 + # min_score_thresh: 0.5 + # } + # } + # } + # node { + # calculator: "TensorsToDetectionsCalculator" + # input_stream: "TENSORS:detection_tensors" + # input_side_packet: "ANCHORS:anchors" + # output_stream: "DETECTIONS:unfiltered_detections" + # options: { + # [mediapipe.TensorsToDetectionsCalculatorOptions.ext] { + # num_classes: 1 + # num_boxes: 2016 + # num_coords: 18 + # box_coord_offset: 0 + # keypoint_coord_offset: 4 + # num_keypoints: 7 + # num_values_per_keypoint: 2 + # sigmoid_score: true + # score_clipping_thresh: 100.0 + # reverse_output_order: true + + # x_scale: 192.0 + # y_scale: 192.0 + # w_scale: 192.0 + # h_scale: 192.0 + # min_score_thresh: 0.5 + # } + # } + # } + + # scores: shape = [number of anchors 896 or 2016] + # bboxes: shape = [ number of anchors x 18], 18 = 4 (bounding box : (cx,cy,w,h) + 14 (7 palm keypoints) + regions = [] scores = 1 / (1 + np.exp(-scores)) if best_only: diff --git a/depthai_nodes/ml/parsers/utils/mlsd.py b/depthai_nodes/ml/parsers/utils/mlsd.py index 3b97805..2306ca2 100644 --- a/depthai_nodes/ml/parsers/utils/mlsd.py +++ b/depthai_nodes/ml/parsers/utils/mlsd.py @@ -6,6 +6,18 @@ def decode_scores_and_points( tpMap: np.ndarray, heat: np.ndarray, topk_n: int ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Decode the scores and points from the neural network output tensors. Used for + MLSD model. + + @param tpMap: Tensor containing the vector map. + @type tpMap: np.ndarray + @param heat: Tensor containing the heat map. + @type heat: np.ndarray + @param topk_n: Number of top candidates to keep. + @type topk_n: int + @return: Detected points, confidence scores for the detected points, and vector map. + @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray] + """ b, c, h, w = tpMap.shape displacement = tpMap[:, 1:5, :, :][0] @@ -27,6 +39,24 @@ def get_lines( dist_thr: float, input_size: int = 512, ) -> Tuple[np.ndarray, List[float]]: + """Get lines from the detected points and scores. The lines are filtered by the + score threshold and distance threshold. Used for MLSD model. + + @param pts: Detected points. + @type pts: np.ndarray + @param pts_score: Confidence scores for the detected points. + @type pts_score: np.ndarray + @param vmap: Vector map. + @type vmap: np.ndarray + @param score_thr: Confidence score threshold for detected lines. + @type score_thr: float + @param dist_thr: Distance threshold for merging lines. + @type dist_thr: float + @param input_size: Input size of the model. + @type input_size: int + @return: Detected lines and their confidence scores. + @rtype: Tuple[np.ndarray, List[float]] + """ start = vmap[:, :, :2] end = vmap[:, :, 2:] dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1)) diff --git a/depthai_nodes/ml/parsers/utils/superanimal.py b/depthai_nodes/ml/parsers/utils/superanimal.py index dd1dc01..8c416b6 100644 --- a/depthai_nodes/ml/parsers/utils/superanimal.py +++ b/depthai_nodes/ml/parsers/utils/superanimal.py @@ -2,6 +2,13 @@ def get_top_values(heatmap): + """Get the top values from the heatmap tensor. + + @param heatmap: Heatmap tensor. + @type heatmap: np.ndarray + @return: Y and X coordinates of the top values. + @rtype: Tuple[np.ndarray, np.ndarray] + """ batchsize, ny, nx, num_joints = heatmap.shape heatmap_flat = heatmap.reshape(batchsize, nx * ny, num_joints) @@ -12,6 +19,18 @@ def get_top_values(heatmap): def get_pose_prediction(heatmap, locref, scale_factors): + """Get the pose prediction from the heatmap and locref tensors. Used for SuperAnimal + model. + + @param heatmap: Heatmap tensor. + @type heatmap: np.ndarray + @param locref: Locref tensor. + @type locref: np.ndarray + @param scale_factors: Scale factors for the x and y axes. + @type scale_factors: Tuple[float, float] + @return: Pose prediction. + @rtype: np.ndarray + """ Y, X = get_top_values(heatmap) batch_size, num_joints = X.shape diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py index 6836402..add6ee1 100644 --- a/depthai_nodes/ml/parsers/utils/xfeat.py +++ b/depthai_nodes/ml/parsers/utils/xfeat.py @@ -5,6 +5,15 @@ def local_maximum_filter(x: np.ndarray, kernel_size: int) -> np.ndarray: + """Apply a local maximum filter to the input array. + + @param x: Input array. + @type x: np.ndarray + @param kernel_size: Size of the local maximum filter. + @type kernel_size: int + @return: Output array after applying the local maximum filter. + @rtype: np.ndarray + """ # Ensure input is a 4D array (e.g., batch, channels, height, width) if len(x.shape) != 4: raise ValueError("Input array must be 4-dimensional.") @@ -39,6 +48,17 @@ def local_maximum_filter(x: np.ndarray, kernel_size: int) -> np.ndarray: def bilinear_grid_sample( im: np.ndarray, grid: np.ndarray, align_corners: bool = False ) -> np.ndarray: + """Bilinear grid sample. + + @param im: Input image tensor. + @type im: np.ndarray + @param grid: Grid tensor. + @type grid: np.ndarray + @param align_corners: Whether to align corners. + @type align_corners: bool + @return: Output image tensor after applying bilinear grid sample. + @rtype: np.ndarray + """ n, c, h, w = im.shape gn, gh, gw, _ = grid.shape assert n == gn @@ -105,6 +125,15 @@ def _get_kpts_heatmap( kpts: np.ndarray, softmax_temp: float = 1.0, ) -> np.ndarray: + """Get the keypoints heatmap. + + @param kpts: Keypoints. + @type kpts: np.ndarray + @param softmax_temp: Softmax temperature. + @type softmax_temp: float + @return: Keypoints heatmap. + @rtype: np.ndarray + """ kpts = np.exp(kpts * softmax_temp) scores = kpts / np.sum(kpts, axis=1, keepdims=True) scores = scores[:, :64] @@ -119,6 +148,17 @@ def _nms( threshold: float = 0.05, kernel_size: int = 5, ) -> np.ndarray: + """Non-Maximum Suppression. + + @param x: Input array. + @type x: np.ndarray + @param threshold: Non-maximum suppression threshold. + @type threshold: float + @param kernel_size: Size of the local maximum filter. + @type kernel_size: int + @return: Output array after applying non-maximum suppression. + @rtype: np.ndarray + """ # Non-Maximum Suppression B, _, H, W = x.shape local_max = local_maximum_filter(x, kernel_size) @@ -143,6 +183,23 @@ def detect_and_compute( input_size: Tuple[int, int], top_k: int = 4096, ) -> List[Dict[str, Any]]: + """Detect and compute keypoints. + + @param feats: Features. + @type feats: np.ndarray + @param kpts: Keypoints. + @type kpts: np.ndarray + @param resize_rate_w: Resize rate for width. + @type resize_rate_w: float + @param resize_rate_h: Resize rate for height. + @type resize_rate_h: float + @param input_size: Input size. + @type input_size: Tuple[int, int] + @param top_k: Maximum number of keypoints to keep. + @type top_k: int + @return: List of dictionaries containing keypoints, scores, and descriptors. + @rtype: List[Dict[str, Any]] + """ norm = np.linalg.norm(feats, axis=1, keepdims=True) feats = feats / norm @@ -223,6 +280,17 @@ def detect_and_compute( def _match_mkpts( feats1: np.ndarray, feats2: np.ndarray, min_cossim: float = 0.62 ) -> Tuple[np.ndarray, np.ndarray]: + """Match features. + + @param feats1: Features 1. + @type feats1: np.ndarray + @param feats2: Features 2. + @type feats2: np.ndarray + @param min_cossim: Minimum cosine similarity. + @type min_cossim: float + @return: Matched features. + @rtype: Tuple[np.ndarray, np.ndarray] + """ cossim = feats1 @ feats2.T cossim_t = feats2 @ feats1.T match12 = np.argmax(cossim, axis=1) @@ -246,6 +314,17 @@ def _match_mkpts( def match( result1: Dict[str, Any], result2: Dict[str, Any], min_cossim: float = -1 ) -> Tuple[np.ndarray, np.ndarray]: + """Match keypoints. + + @param result1: Result 1. + @type result1: Dict[str, Any] + @param result2: Result 2. + @type result2: Dict[str, Any] + @param min_cossim: Minimum cosine similarity. + @type min_cossim: float + @return: Matched keypoints. + @rtype: Tuple[np.ndarray, np.ndarray] + """ indexes1, indexes2 = _match_mkpts( result1["descriptors"], result2["descriptors"], diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 4ec06e8..2065cdd 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -8,11 +8,44 @@ class XFeatParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the XFeat model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + original_size : Tuple[float, float] + Original image size. + input_size : Tuple[float, float] + Input image size. + previous_results : np.ndarray + Previous results from the model. Previous results are used to match keypoints between two frames. + + Output Message/s + ---------------- + **Type**: dai.TrackedFeatures + + **Description**: TrackedFeatures message containing matched keypoints with the same ID. + + Error Handling + -------------- + **ValueError**: If the original image size is not specified. + """ + def __init__( self, original_size: Tuple[float, float] = None, input_size: Tuple[float, float] = (640, 352), ): + """Initializes the XFeatParser node. + + @param original_size: Original image size. + @type original_size: Tuple[float, float] + @param input_size: Input image size. + @type input_size: Tuple[float, float] + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -21,17 +54,22 @@ def __init__( self.previous_results = None def setOriginalSize(self, original_size): + """Sets the original image size. + + @param original_size: Original image size. + @type original_size: Tuple[float, float] + """ self.original_size = original_size def setInputSize(self, input_size): + """Sets the input image size. + + @param input_size: Input image size. + @type input_size: Tuple[float, float] + """ self.input_size = input_size def run(self): - """Postprocessing logic for XFeat model. - - Returns: - dai.MatchedPoints containing matched keypoints. - """ if self.original_size is None: raise ValueError("Original image size must be specified!") diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index 0bfe555..a2d1a4a 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -9,12 +9,43 @@ class YuNetParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the YuNet face detection model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected faces. + nms_threshold : float + Non-maximum suppression threshold. + top_k : int + Maximum number of detections to keep. + + Output Message/s + ---------------- + **Type**: ImgDetectionsWithKeypoints + + **Description**: Message containing bounding boxes, labels, confidence scores, and keypoints of detected faces. + """ + def __init__( self, score_threshold=0.6, nms_threshold=0.3, top_k=5000, ): + """Initializes the YuNetParser node. + + @param score_threshold: Confidence score threshold for detected faces. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @param top_k: Maximum number of detections to keep. + @type top_k: int + """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) @@ -24,21 +55,30 @@ def __init__( self.top_k = top_k def setConfidenceThreshold(self, threshold): + """Sets the confidence score threshold for detected faces. + + @param threshold: Confidence score threshold for detected faces. + @type threshold: float + """ self.score_threshold = threshold def setNMSThreshold(self, threshold): + """Sets the non-maximum suppression threshold. + + @param threshold: Non-maximum suppression threshold. + @type threshold: float + """ self.nms_threshold = threshold def setTopK(self, top_k): - self.top_k = top_k - - def run(self): - """Postprocessing logic for YuNet model. + """Sets the maximum number of detections to keep. - Returns: - dai.ImgDetectionsWithKeypoints: Detections with keypoints. + @param top_k: Maximum number of detections to keep. + @type top_k: int """ + self.top_k = top_k + def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() From 766b716afd5762d389e5ac12a5ac763636cc72fe Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Wed, 31 Jul 2024 12:03:53 +0200 Subject: [PATCH 09/25] Added Docstring to all methods and changed classification object to have two attributes: scores and classes, instead of a list of shape (n_classes, 2) --- depthai_nodes/ml/messages/classification.py | 44 ++++++++++---- .../creators/classification_message.py | 24 ++++---- .../ml/parsers/classification_parser.py | 59 +++++++++++++------ 3 files changed, 87 insertions(+), 40 deletions(-) diff --git a/depthai_nodes/ml/messages/classification.py b/depthai_nodes/ml/messages/classification.py index 97dc37c..57bf979 100644 --- a/depthai_nodes/ml/messages/classification.py +++ b/depthai_nodes/ml/messages/classification.py @@ -4,21 +4,45 @@ class Classifications(dai.Buffer): + """Classification class for storing the class names and their respective scores. + + Attributes + ---------- + classes : list[str] + A list of classes. + scores : list[float] + A list of corresponding probability scores. + """ + def __init__(self): + """Initializes the Classifications object and sets the classes and scores to + empty lists.""" dai.Buffer.__init__(self) - self._classes = [] + self._classes: List[str] = [] + self._scores: List[float] = [] @property def classes(self) -> List: + """Returns the list of classes.""" return self._classes + @property + def scores(self) -> List: + """Returns the list of scores.""" + return self._scores + @classes.setter - def classes(self, value: List): - if not isinstance(value, list): - raise TypeError("Must be a list.") - for item in value: - if not isinstance(item, list) or len(item) != 2: - raise TypeError( - "Each item must be a list of [class_name, probability_score], got {item}." - ) - self._classes = value + def classes(self, class_names: List[str]): + """Sets the list of classes. + + @param classes: A list of class names. + """ + self._classes = class_names + + @scores.setter + def scores(self, scores: List[float]): + """Sets the list of scores. + + @param scores: A list of scores. + """ + self._scores = scores diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py index 672ba68..0577954 100644 --- a/depthai_nodes/ml/messages/creators/classification_message.py +++ b/depthai_nodes/ml/messages/creators/classification_message.py @@ -16,7 +16,7 @@ def create_classification_message( A numpy array of shape (n_classes,) containing the probability score of each class. classes : np.ndarray = [] - A numpy array of class names. If not provided, class names are set to None. + A numpy array of shape (n_classes, ), containing class names. If not provided, class names are set to []. Returns @@ -26,8 +26,11 @@ def create_classification_message( where each item is [class_name, probability_score]. If no class names are provided, class_name is set to None. """ - if classes is None: + + if type(classes) == type(None): classes = np.array([]) + else: + classes = np.array(classes) if len(scores) == 0: raise ValueError("Scores should not be empty.") @@ -35,12 +38,15 @@ def create_classification_message( if len(scores) != len(scores.flatten()): raise ValueError(f"Scores should be a 1D array, got {scores.shape}.") + if len(classes) != len(classes.flatten()): + raise ValueError(f"Classes should be a 1D array, got {classes.shape}.") + scores = scores.flatten() + classes = classes.flatten() if not np.issubdtype(scores.dtype, np.floating): raise ValueError(f"Scores should be of type float, got {scores.dtype}.") - print("scores", np.sum(scores)) if not np.isclose(np.sum(scores), 1.0, atol=1e-1): raise ValueError(f"Scores should sum to 1, got {np.sum(scores)}.") @@ -54,15 +60,9 @@ def create_classification_message( sorted_args = np.argsort(scores)[::-1] scores = scores[sorted_args] - if len(classes) == 0: - classification_msg.classes = [ - [None, float(scores[i])] for i in range(len(scores)) - ] - return classification_msg + if len(classes) != 0: + classification_msg.classes = classes[sorted_args].tolist() - classes = classes[sorted_args] - classification_msg.classes = [ - [str(classes[i]), float(scores[i])] for i in range(len(classes)) - ] + classification_msg.scores = scores.tolist() return classification_msg diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py index 62ef02c..c7fc0b7 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification_parser.py @@ -7,34 +7,57 @@ class ClassificationParser(dai.node.ThreadedHostNode): """Postprocessing logic for Classification model. - Parameters + Attributes ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. classes : list[str] - List of class labels. + List of class names to be used for linking with their respective scores. Expected to be in the same order as Neural Network's output. If not provided, the message will only return sorted scores. is_softmax : bool = True - True, if output is already softmaxed. + If False, the scores are converted to probabilities using softmax function. + n_classes : int = len(classes) + Number of provided classes. This variable is set automatically based on provided classes. - Returns - ------- - Classifications: dai.Buffer - An object with parameter `classes`, which is a list of items like [class_name, probability_score]. - If no class names are provided, class_name is set to None. + Output Message/s + ---------------- + **Type** : Classifications(dai.Buffer): + An object with attributes `classes` and `scores`. `classes` is a list of classes, sorted in descending order of scores. `scores` is a list of corresponding scores. """ def __init__(self, classes: list[str] = None, is_softmax: bool = True): + """Initializes the ClassificationParser node. + + @param classes: List of class names to be used for linking with their respective + scores. + @param is_softmax: If False, the scores are converted to probabilities using + softmax function. + """ + dai.node.ThreadedHostNode.__init__(self) self.out = self.createOutput() self.input = self.createInput() - if classes is None: - self.classes = [] - else: - self.classes = np.array(classes) - self.n_classes = len(classes) + self.classes = classes if classes is not None else [] + self.n_classes = len(self.classes) self.is_softmax = is_softmax - def setClasses(self, classes): - self.classes = classes - self.n_classes = len(classes) + def setClasses(self, classes: list[str]): + """Sets the class names for the classification model. + + @param classes: List of class names to be used for linking with their respective + scores. + """ + self.classes = classes if classes is not None else [] + self.n_classes = len(self.classes) + + def setSoftmax(self, is_softmax: bool): + """Sets the softmax flag for the classification model. + + @param is_softmax: If False, the parser will convert the scores to probabilities + using softmax function. + """ + self.is_softmax = is_softmax def run(self): while self.isRunning(): @@ -51,7 +74,7 @@ def run(self): scores = output.getTensor(output_layer_names[0]) scores = np.array(scores).flatten() - + classes = np.array(self.classes) if len(scores) != self.n_classes and self.n_classes != 0: raise ValueError( f"Number of labels and scores mismatch. Provided {self.n_classes} class names and {len(scores)} scores." @@ -61,6 +84,6 @@ def run(self): ex = np.exp(scores) scores = ex / np.sum(ex) - msg = create_classification_message(scores, self.classes) + msg = create_classification_message(scores, classes) self.out.send(msg) From ad0b0a1532b3677e7fef997f24455a442f16b747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Thu, 8 Aug 2024 12:29:56 +0200 Subject: [PATCH 10/25] XFeat postprocessing speed-up. (#18) --- depthai_nodes/ml/parsers/utils/xfeat.py | 34 +++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py index add6ee1..5d2d62e 100644 --- a/depthai_nodes/ml/parsers/utils/xfeat.py +++ b/depthai_nodes/ml/parsers/utils/xfeat.py @@ -31,16 +31,30 @@ def local_maximum_filter(x: np.ndarray, kernel_size: int) -> np.ndarray: mode="constant", ) - # Initialize the output array - local_max = np.zeros_like(x) - - # Apply the maximum filter - for i in range(height): - for j in range(width): - # Extract the local region - local_region = padded_x[:, :, i : i + kernel_size, j : j + kernel_size] - # Compute the local maximum - local_max[:, :, i, j] = np.max(local_region, axis=(2, 3)) + # Use stride tricks to generate a view of the array with sliding windows + shape = ( + padded_x.shape[0], + padded_x.shape[1], + height, + width, + kernel_size, + kernel_size, + ) + strides = ( + padded_x.strides[0], + padded_x.strides[1], + padded_x.strides[2], + padded_x.strides[3], + padded_x.strides[2], + padded_x.strides[3], + ) + + sliding_window_view = np.lib.stride_tricks.as_strided( + padded_x, shape=shape, strides=strides + ) + + # Compute the local maximum over the sliding windows + local_max = np.max(sliding_window_view, axis=(4, 5)) return local_max From d40b13e26eda542285222d8ffd75a4ef9912101c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:06:12 +0200 Subject: [PATCH 11/25] NN Archive parameters documentation. (#17) --- CONTRIBUTING.md | 5 +++++ docs/nn_archive_parameters.md | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 docs/nn_archive_parameters.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3ae585b..f15319a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,11 +4,16 @@ It outlines our workflow and standards for contributing to this project. ## Table of Contents +- [Developing parser](#developing-parser) - [Pre-commit Hooks](#pre-commit-hooks) - [Documentation](#documentation) - [Editor Support](#editor-support) - [Making and Reviewing Changes](#making-and-reviewing-changes) +## Developing parser + +Parser should be developed so that it is consistent with other parsers. Check out other parsers to see the required structure. Additionally, pay attention to the naming of the parser's attributes. Check out [NN Archive Parameters](docs/nn_archive_parameters.md). + ## Pre-commit Hooks We use pre-commit hooks to ensure code quality and consistency: diff --git a/docs/nn_archive_parameters.md b/docs/nn_archive_parameters.md new file mode 100644 index 0000000..db42d2c --- /dev/null +++ b/docs/nn_archive_parameters.md @@ -0,0 +1,23 @@ +# NN Archive params + +> The idea behind this document is that the creator of the parser can accidentally name some parser's parameters slightly differently as they are named in the NN archive (num_classes instead of n_classes). Subsequently, DAI will not map the correct parameters. + +Below are listed all the parameters supported in the NN Archive. Each parameter also has a computer vision task where it is required. You can help with the list to better plan and develop new parsers. E.g. if you are adding a classification parser many parameters are already present in NN Archive (n_classes, classes, is_softmax). You should reuse the naming in your parser’s code so DepthAI can automatically map the NN Archive parameters to the Parser. + +### All parameters + +- classes - Names of object classes detected by the model. `Object detection` `Classification` `Segmentation` +- n_classes - Number of object classes detected by the model. `Object detection` `Classification` `Segmentation` +- iou_threshold - Non-max suppression threshold limiting boxes intersection. `Object detection` +- conf_threshold - Confidence score threshold above which a detected object is considered valid. `Object detection` +- max_det - Maximum detections per image. `Object detection` +- anchors - Predefined bounding boxes of different sizes and aspect ratios. The innermost lists are length 2 tuples of box sizes. The middle lists are anchors for each output. The outmost lists go from smallest to largest output. `Object detection` +- is_softmax - True, if output is already softmaxed. `Classification` `Segmentation` `YOLO` +- yolo_outputs - A list of output names for each of the different YOLO grid sizes. `YOLO` +- mask_outputs - A list of output names for each mask output. `YOLO` +- protos_outputs - Output name for the protos. `YOLO` +- keypoints_outputs - A list of output names for the keypoints. `YOLO` +- angles_outputs - A list of output names for the angles. `YOLO` +- subtype - YOLO family decoding subtype (e.g. yolov5, yolov6, yolov7 etc.) `YOLO` +- n_prototypes - Number of prototypes per bbox in YOLO instance segmnetation. `YOLO` +- n_keypoints - Number of keypoints per bbox in YOLO keypoint detection. `YOLO` From e73f404d43999938809e50c65567c28bfb2e1d11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Wed, 14 Aug 2024 12:41:51 +0200 Subject: [PATCH 12/25] Installation by cloning the repository. (#20) --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 23cfb85..2c93102 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,18 @@ To install the package, run: pip install depthai-nodes ``` +Before the official release on PyPI you can install the package from the GitHub repository: + +```bash +git clone git@github.com:luxonis/depthai-nodes.git +``` + +and then install the requirements: + +```bash +pip install -r requirements.txt +``` + ## Contributing If you want to contribute to this project, read the instructions in [CONTRIBUTING.md](./CONTRIBUTING.md) From 4fffe4666fd5c94f7c64e0e48417126cd628be4f Mon Sep 17 00:00:00 2001 From: NicikD Date: Wed, 14 Aug 2024 12:42:38 +0200 Subject: [PATCH 13/25] Added timestamps to outcoming messages (#19) --- depthai_nodes/ml/parsers/image_output.py | 1 + depthai_nodes/ml/parsers/keypoints.py | 1 + depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py | 1 + depthai_nodes/ml/parsers/mediapipe_palm_detection.py | 1 + depthai_nodes/ml/parsers/mlsd.py | 1 + depthai_nodes/ml/parsers/monocular_depth.py | 1 + depthai_nodes/ml/parsers/scrfd.py | 1 + depthai_nodes/ml/parsers/segmentation.py | 1 + depthai_nodes/ml/parsers/superanimal_landmarker.py | 1 + depthai_nodes/ml/parsers/thermal_image.py | 1 + depthai_nodes/ml/parsers/xfeat.py | 1 + depthai_nodes/ml/parsers/yunet.py | 1 + 12 files changed, 12 insertions(+) diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index 534db76..e60bb09 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -75,5 +75,6 @@ def run(self): image=image, is_bgr=self.output_is_bgr, ) + image_message.setTimestamp(output.getTimestamp()) self.out.send(image_message) diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index 4f9f8ce..a501ba1 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -100,5 +100,6 @@ def run(self): keypoints /= self.scale_factor msg = create_keypoints_message(keypoints) + msg.setTimestamp(output.getTimestamp()) self.out.send(msg) diff --git a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py index f358c60..effbc0f 100644 --- a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py +++ b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py @@ -83,4 +83,5 @@ def run(self): hand_landmarks_msg = create_hand_keypoints_message( landmarks, float(handedness), float(hand_score), self.score_threshold ) + hand_landmarks_msg.setTimestamp(output.getTimestamp()) self.out.send(hand_landmarks_msg) diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index 0c992ec..aeacd17 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -116,4 +116,5 @@ def run(self): scores = np.array(scores)[indices] detections_msg = create_detection_message(bboxes, scores, labels=None) + detections_msg.setTimestamp(output.getTimestamp()) self.out.send(detections_msg) diff --git a/depthai_nodes/ml/parsers/mlsd.py b/depthai_nodes/ml/parsers/mlsd.py index e13dadf..128e990 100644 --- a/depthai_nodes/ml/parsers/mlsd.py +++ b/depthai_nodes/ml/parsers/mlsd.py @@ -97,4 +97,5 @@ def run(self): ) message = create_line_detection_message(lines, np.array(scores)) + message.setTimestamp(output.getTimestamp()) self.out.send(message) diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index 21e48d4..b21c36f 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -66,4 +66,5 @@ def run(self): depth_map=depth_map, depth_type=self.depth_type, ) + depth_message.setTimestamp(output.getTimestamp()) self.out.send(depth_message) diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index b2f2411..40c88cc 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -173,4 +173,5 @@ def run(self): scores = scores[indices] detection_msg = create_detection_message(bboxes, scores, None, None) + detection_msg.setTimestamp(output.getTimestamp()) self.out.send(detection_msg) diff --git a/depthai_nodes/ml/parsers/segmentation.py b/depthai_nodes/ml/parsers/segmentation.py index 2f2fc2d..d4f9e0b 100644 --- a/depthai_nodes/ml/parsers/segmentation.py +++ b/depthai_nodes/ml/parsers/segmentation.py @@ -89,4 +89,5 @@ def run(self): ) imgFrame = create_segmentation_message(class_map) + imgFrame.setTimestamp(output.getTimestamp()) self.out.send(imgFrame) diff --git a/depthai_nodes/ml/parsers/superanimal_landmarker.py b/depthai_nodes/ml/parsers/superanimal_landmarker.py index 48fd400..9b0bdee 100644 --- a/depthai_nodes/ml/parsers/superanimal_landmarker.py +++ b/depthai_nodes/ml/parsers/superanimal_landmarker.py @@ -80,5 +80,6 @@ def run(self): keypoints = keypoints[:, :2] / self.scale_factor msg = create_keypoints_message(keypoints, scores, self.score_threshold) + msg.setTimestamp(output.getTimestamp()) self.out.send(msg) diff --git a/depthai_nodes/ml/parsers/thermal_image.py b/depthai_nodes/ml/parsers/thermal_image.py index 44ed955..bc090ef 100644 --- a/depthai_nodes/ml/parsers/thermal_image.py +++ b/depthai_nodes/ml/parsers/thermal_image.py @@ -44,4 +44,5 @@ def run(self): thermal_map = output[0] thermal_message = create_thermal_message(thermal_map=thermal_map) + thermal_message.setTimestamp(output.getTimestamp()) self.out.send(thermal_message) diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 2065cdd..f7ecf63 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -92,6 +92,7 @@ def run(self): if self.previous_results is not None: mkpts0, mkpts1 = match(self.previous_results, result) matched_points = create_tracked_features_message(mkpts0, mkpts1) + matched_points.setTimestamp(output.getTimestamp()) self.out.send(matched_points) else: # save the result from first frame diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index a2d1a4a..c229c99 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -146,5 +146,6 @@ def run(self): labels, keypoints, ) + detections_message.setTimestamp(output.getTimestamp()) self.out.send(detections_message) From bee80cbc6fa8e8cfd8e6cf32c10a6914e37c661f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:03:51 +0200 Subject: [PATCH 14/25] RVC4 support for parsers. (#22) --- depthai_nodes/ml/parsers/image_output.py | 2 +- depthai_nodes/ml/parsers/keypoints.py | 4 +++- .../ml/parsers/mediapipe_hand_landmarker.py | 18 +++++++++++++++--- .../ml/parsers/mediapipe_palm_detection.py | 12 ++++++++++-- depthai_nodes/ml/parsers/segmentation.py | 8 +++++--- .../ml/parsers/superanimal_landmarker.py | 5 ++++- depthai_nodes/ml/parsers/xfeat.py | 13 +++++++++++-- 7 files changed, 49 insertions(+), 13 deletions(-) diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index e60bb09..be25053 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -58,7 +58,7 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) - output = output.getTensor(output_layer_names[0]) + output = output.getTensor(output_layer_names[0], dequantize=True) if len(output.shape) == 4: image = output[0] diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index a501ba1..07d93f9 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -87,7 +87,9 @@ def run(self): f"Expected 1 output layer, got {len(output_layer_names)}." ) - keypoints = output.getTensor(output_layer_names[0]) + keypoints = output.getTensor(output_layer_names[0], dequantize=True).astype( + np.float32 + ) num_coords = int(np.prod(keypoints.shape) / self.num_keypoints) if num_coords not in [2, 3]: diff --git a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py index effbc0f..346b881 100644 --- a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py +++ b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py @@ -71,9 +71,21 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - landmarks = output.getTensor("Identity").reshape(21, 3).astype(np.float32) - hand_score = output.getTensor("Identity_1").reshape(-1).astype(np.float32) - handedness = output.getTensor("Identity_2").reshape(-1).astype(np.float32) + landmarks = ( + output.getTensor("Identity", dequantize=True) + .reshape(21, 3) + .astype(np.float32) + ) + hand_score = ( + output.getTensor("Identity_1", dequantize=True) + .reshape(-1) + .astype(np.float32) + ) + handedness = ( + output.getTensor("Identity_2", dequantize=True) + .reshape(-1) + .astype(np.float32) + ) hand_score = hand_score[0] handedness = handedness[0] diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index aeacd17..2f297d7 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -85,8 +85,16 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - bboxes = output.getTensor("Identity").reshape(2016, 18).astype(np.float32) - scores = output.getTensor("Identity_1").reshape(2016).astype(np.float32) + bboxes = ( + output.getTensor("Identity", dequantize=True) + .reshape(2016, 18) + .astype(np.float32) + ) + scores = ( + output.getTensor("Identity_1", dequantize=True) + .reshape(2016) + .astype(np.float32) + ) decoded_bboxes = generate_anchors_and_decode( bboxes=bboxes, scores=scores, threshold=self.score_threshold, scale=192 diff --git a/depthai_nodes/ml/parsers/segmentation.py b/depthai_nodes/ml/parsers/segmentation.py index d4f9e0b..e141c3a 100644 --- a/depthai_nodes/ml/parsers/segmentation.py +++ b/depthai_nodes/ml/parsers/segmentation.py @@ -62,9 +62,11 @@ def run(self): f"Expected 1 output layer, got {len(output_layer_names)}." ) - segmentation_mask = output.getTensor(output_layer_names[0])[ - 0 - ] # num_clases x H x W + segmentation_mask = output.getTensor(output_layer_names[0], dequantize=True) + if len(segmentation_mask.shape) == 4: + segmentation_mask = segmentation_mask[0] + else: + segmentation_mask = segmentation_mask.transpose(2, 0, 1) if len(segmentation_mask.shape) != 3: raise ValueError( diff --git a/depthai_nodes/ml/parsers/superanimal_landmarker.py b/depthai_nodes/ml/parsers/superanimal_landmarker.py index 9b0bdee..31ed393 100644 --- a/depthai_nodes/ml/parsers/superanimal_landmarker.py +++ b/depthai_nodes/ml/parsers/superanimal_landmarker.py @@ -68,7 +68,10 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - heatmaps = output.getTensor("heatmaps").astype(np.float32) + heatmaps = output.getTensor("heatmaps", dequantize=True).astype(np.float32) + + if len(heatmaps.shape) == 3: + heatmaps = heatmaps.reshape((1,) + heatmaps.shape) heatmaps_scale_factor = ( self.scale_factor / heatmaps.shape[1], diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index f7ecf63..6d4a451 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -82,8 +82,17 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - feats = output.getTensor("feats").astype(np.float32) - keypoints = output.getTensor("keypoints").astype(np.float32) + feats = output.getTensor("feats", dequantize=True).astype(np.float32) + keypoints = output.getTensor("keypoints", dequantize=True).astype( + np.float32 + ) + + if len(feats.shape) == 3: + feats = feats.reshape((1,) + feats.shape).transpose(0, 3, 1, 2) + if len(keypoints.shape) == 3: + keypoints = keypoints.reshape((1,) + keypoints.shape).transpose( + 0, 3, 1, 2 + ) result = detect_and_compute( feats, keypoints, resize_rate_w, resize_rate_h, self.input_size From ceb642c4ee0dbffb903c675760072b2752e1671e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Mon, 19 Aug 2024 10:07:35 +0200 Subject: [PATCH 15/25] Improved SCRFD decoding. (#21) * Improved SCRFD decoding. * Confict error fix. * Variable rename. --- .../ml/messages/creators/detection.py | 6 +- depthai_nodes/ml/messages/img_detections.py | 2 +- depthai_nodes/ml/parsers/scrfd.py | 180 ++++++++++-------- depthai_nodes/ml/parsers/utils/scrfd.py | 174 +++++++++++++++++ 4 files changed, 276 insertions(+), 86 deletions(-) create mode 100644 depthai_nodes/ml/parsers/utils/scrfd.py diff --git a/depthai_nodes/ml/messages/creators/detection.py b/depthai_nodes/ml/messages/creators/detection.py index 69215d5..842f57e 100644 --- a/depthai_nodes/ml/messages/creators/detection.py +++ b/depthai_nodes/ml/messages/creators/detection.py @@ -96,9 +96,9 @@ def create_detection_message( if keypoints is not None and len(keypoints) != 0: if not isinstance(keypoints, List): raise ValueError(f"keypoints should be list, got {type(keypoints)}.") - for pointcloud in keypoints: - for point in pointcloud: - if not isinstance(point, Tuple): + for object_keypoints in keypoints: + for point in object_keypoints: + if not isinstance(point, Tuple) and not isinstance(point, List): raise ValueError( f"keypoint pairs should be list of tuples, got {type(point)}." ) diff --git a/depthai_nodes/ml/messages/img_detections.py b/depthai_nodes/ml/messages/img_detections.py index 9c6114e..0593372 100644 --- a/depthai_nodes/ml/messages/img_detections.py +++ b/depthai_nodes/ml/messages/img_detections.py @@ -39,7 +39,7 @@ def keypoints(self, value: List[Tuple[Union[int, float], Union[int, float]]]): raise TypeError("Keypoints must be a list") for item in value: if ( - not isinstance(item, tuple) + not (isinstance(item, tuple) or isinstance(item, list)) or len(item) != 2 or not all(isinstance(i, (int, float)) for i in item) ): diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index 40c88cc..5b4fa50 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -1,8 +1,8 @@ -import cv2 import depthai as dai import numpy as np from ..messages.creators import create_detection_message +from .utils.scrfd import decode_scrfd class SCRFDParser(dai.node.ThreadedHostNode): @@ -20,6 +20,12 @@ class SCRFDParser(dai.node.ThreadedHostNode): Non-maximum suppression threshold. top_k : int Maximum number of detections to keep. + feat_stride_fpn : tuple + Tuple of the feature strides. + num_anchors : int + Number of anchors. + input_size : tuple + Input size of the model. Output Message/s ---------------- @@ -28,7 +34,15 @@ class SCRFDParser(dai.node.ThreadedHostNode): **Description**: ImgDetections message containing bounding boxes, labels, and confidence scores of detected faces. """ - def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + def __init__( + self, + score_threshold=0.5, + nms_threshold=0.5, + top_k=100, + input_size=(640, 640), + feat_stride_fpn=(8, 16, 32), + num_anchors=2, + ): """Initializes the SCRFDParser node. @param score_threshold: Confidence score threshold for detected faces. @@ -37,6 +51,12 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): @type nms_threshold: float @param top_k: Maximum number of detections to keep. @type top_k: int + @param feat_stride_fpn: List of the feature strides. + @type feat_stride_fpn: tuple + @param num_anchors: Number of anchors. + @type num_anchors: int + @param input_size: Input size of the model. + @type input_size: tuple """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) @@ -46,6 +66,10 @@ def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): self.nms_threshold = nms_threshold self.top_k = top_k + self.feat_stride_fpn = feat_stride_fpn + self.num_anchors = num_anchors + self.input_size = input_size + def setConfidenceThreshold(self, threshold): """Sets the confidence score threshold for detected faces. @@ -70,6 +94,30 @@ def setTopK(self, top_k): """ self.top_k = top_k + def setFeatStrideFPN(self, feat_stride_fpn): + """Sets the feature stride of the FPN. + + @param feat_stride_fpn: Feature stride of the FPN. + @type feat_stride_fpn: list + """ + self.feat_stride_fpn = feat_stride_fpn + + def setInputSize(self, input_size): + """Sets the input size of the model. + + @param input_size: Input size of the model. + @type input_size: list + """ + self.input_size = input_size + + def setNumAnchors(self, num_anchors): + """Sets the number of anchors. + + @param num_anchors: Number of anchors. + @type num_anchors: int + """ + self.num_anchors = num_anchors + def run(self): while self.isRunning(): try: @@ -77,101 +125,69 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - score_8 = output.getTensor("score_8").flatten().astype(np.float32) - score_16 = output.getTensor("score_16").flatten().astype(np.float32) - score_32 = output.getTensor("score_32").flatten().astype(np.float32) + score_8 = ( + output.getTensor("score_8", dequantize=True) + .flatten() + .astype(np.float32) + ) + score_16 = ( + output.getTensor("score_16", dequantize=True) + .flatten() + .astype(np.float32) + ) + score_32 = ( + output.getTensor("score_32", dequantize=True) + .flatten() + .astype(np.float32) + ) bbox_8 = ( - output.getTensor("bbox_8").reshape(len(score_8), 4).astype(np.float32) + output.getTensor("bbox_8", dequantize=True) + .reshape(len(score_8), 4) + .astype(np.float32) ) bbox_16 = ( - output.getTensor("bbox_16").reshape(len(score_16), 4).astype(np.float32) + output.getTensor("bbox_16", dequantize=True) + .reshape(len(score_16), 4) + .astype(np.float32) ) bbox_32 = ( - output.getTensor("bbox_32").reshape(len(score_32), 4).astype(np.float32) + output.getTensor("bbox_32", dequantize=True) + .reshape(len(score_32), 4) + .astype(np.float32) ) kps_8 = ( - output.getTensor("kps_8").reshape(len(score_8), 5, 2).astype(np.float32) + output.getTensor("kps_8", dequantize=True) + .reshape(len(score_8), 10) + .astype(np.float32) ) kps_16 = ( - output.getTensor("kps_16") - .reshape(len(score_16), 5, 2) + output.getTensor("kps_16", dequantize=True) + .reshape(len(score_16), 10) .astype(np.float32) ) kps_32 = ( - output.getTensor("kps_32") - .reshape(len(score_32), 5, 2) + output.getTensor("kps_32", dequantize=True) + .reshape(len(score_32), 10) .astype(np.float32) ) - bboxes = [] - keypoints = [] - - for i in range(len(score_8)): - y = int(np.floor(i / 80)) * 4 - x = (i % 160) * 4 - bbox = bbox_8[i] - xmin = int(x - bbox[0] * 8) - ymin = int(y - bbox[1] * 8) - xmax = int(x + bbox[2] * 8) - ymax = int(y + bbox[3] * 8) - kps = kps_8[i] - kps_batch = [] - for kp in kps: - kpx = int(x + kp[0] * 8) - kpy = int(y + kp[1] * 8) - kps_batch.append([kpx, kpy]) - keypoints.append(kps_batch) - bbox = [xmin, ymin, xmax, ymax] - bboxes.append(bbox) - - for i in range(len(score_16)): - y = int(np.floor(i / 40)) * 8 - x = (i % 80) * 8 - bbox = bbox_16[i] - xmin = int(x - bbox[0] * 16) - ymin = int(y - bbox[1] * 16) - xmax = int(x + bbox[2] * 16) - ymax = int(y + bbox[3] * 16) - kps = kps_16[i] - kps_batch = [] - for kp in kps: - kpx = int(x + kp[0] * 16) - kpy = int(y + kp[1] * 16) - kps_batch.append([kpx, kpy]) - keypoints.append(kps_batch) - bbox = [xmin, ymin, xmax, ymax] - bboxes.append(bbox) - - for i in range(len(score_32)): - y = int(np.floor(i / 20)) * 16 - x = (i % 40) * 16 - bbox = bbox_32[i] - xmin = int(x - bbox[0] * 32) - ymin = int(y - bbox[1] * 32) - xmax = int(x + bbox[2] * 32) - ymax = int(y + bbox[3] * 32) - kps = kps_32[i] - kps_batch = [] - for kp in kps: - kpx = int(x + kp[0] * 32) - kpy = int(y + kp[1] * 32) - kps_batch.append([kpx, kpy]) - keypoints.append(kps_batch) - bbox = [xmin, ymin, xmax, ymax] - bboxes.append(bbox) - - scores = np.concatenate([score_8, score_16, score_32]) - indices = cv2.dnn.NMSBoxes( - bboxes, - list(scores), - self.score_threshold, - self.nms_threshold, - top_k=self.top_k, + bboxes_concatenated = [bbox_8, bbox_16, bbox_32] + scores_concatenated = [score_8, score_16, score_32] + kps_concatenated = [kps_8, kps_16, kps_32] + + bboxes, scores, keypoints = decode_scrfd( + bboxes_concatenated=bboxes_concatenated, + scores_concatenated=scores_concatenated, + kps_concatenated=kps_concatenated, + feat_stride_fpn=self.feat_stride_fpn, + input_size=self.input_size, + num_anchors=self.num_anchors, + score_threshold=self.score_threshold, + nms_threshold=self.nms_threshold, + ) + detection_msg = create_detection_message( + bboxes, scores, None, keypoints.tolist() ) - bboxes = np.array(bboxes)[indices] - keypoints = np.array(keypoints)[indices] - scores = scores[indices] - - detection_msg = create_detection_message(bboxes, scores, None, None) detection_msg.setTimestamp(output.getTimestamp()) + self.out.send(detection_msg) diff --git a/depthai_nodes/ml/parsers/utils/scrfd.py b/depthai_nodes/ml/parsers/utils/scrfd.py new file mode 100644 index 0000000..533f4a0 --- /dev/null +++ b/depthai_nodes/ml/parsers/utils/scrfd.py @@ -0,0 +1,174 @@ +import numpy as np + + +def nms(dets, nms_thresh=0.5): + """Non-maximum suppression. + + @param dets: Bounding boxes and confidence scores. + @type dets: np.ndarray + @return: Indices of the detections to keep. + @rtype: list[int] + """ + thresh = nms_thresh + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep + + +def distance2bbox(points, distance, max_shape=None): + """Decode distance prediction to bounding box. + + @param points: Shape (n, 2), [x, y]. + @type points: np.ndarray + @param distance: Distance from the given point to 4 boundaries (left, top, right, + bottom). + @type distance: np.ndarray + @param max_shape: Shape of the image. + @type max_shape: Tuple[int, int] + @return: Decoded bboxes. + @rtype: np.ndarray + """ + x1 = points[:, 0] - distance[:, 0] + y1 = points[:, 1] - distance[:, 1] + x2 = points[:, 0] + distance[:, 2] + y2 = points[:, 1] + distance[:, 3] + if max_shape is not None: + x1 = x1.clamp(min=0, max=max_shape[1]) + y1 = y1.clamp(min=0, max=max_shape[0]) + x2 = x2.clamp(min=0, max=max_shape[1]) + y2 = y2.clamp(min=0, max=max_shape[0]) + return np.stack([x1, y1, x2, y2], axis=-1) + + +def distance2kps(points, distance, max_shape=None): + """Decode distance prediction to keypoints. + + @param points: Shape (n, 2), [x, y]. + @type points: np.ndarray + @param distance: Distance from the given point to 4 boundaries (left, top, right, + bottom). + @type distance: np.ndarray + @param max_shape: Shape of the image. + @type max_shape: Tuple[int, int] + @return: Decoded keypoints. + @rtype: np.ndarray + """ + preds = [] + for i in range(0, distance.shape[1], 2): + px = points[:, i % 2] + distance[:, i] + py = points[:, i % 2 + 1] + distance[:, i + 1] + if max_shape is not None: + px = px.clamp(min=0, max=max_shape[1]) + py = py.clamp(min=0, max=max_shape[0]) + preds.append(px) + preds.append(py) + return np.stack(preds, axis=-1) + + +def decode_scrfd( + bboxes_concatenated, + scores_concatenated, + kps_concatenated, + feat_stride_fpn, + input_size, + num_anchors, + score_threshold, + nms_threshold, +): + """Decode the detection results of SCRFD. + + @param bboxes_concatenated: List of bounding box predictions for each scale. + @type bboxes_concatenated: list[np.ndarray] + @param scores_concatenated: List of confidence score predictions for each scale. + @type scores_concatenated: list[np.ndarray] + @param kps_concatenated: List of keypoint predictions for each scale. + @type kps_concatenated: list[np.ndarray] + @param feat_stride_fpn: List of feature strides for each scale. + @type feat_stride_fpn: list[int] + @param input_size: Input size of the model. + @type input_size: tuple[int] + @param num_anchors: Number of anchors. + @type num_anchors: int + @param score_threshold: Confidence score threshold. + @type score_threshold: float + @param nms_threshold: Non-maximum suppression threshold. + @type nms_threshold: float + @return: Bounding boxes, confidence scores, and keypoints of detected objects. + @rtype: tuple[np.ndarray, np.ndarray, np.ndarray] + """ + scores_list = [] + bboxes_list = [] + kps_list = [] + + for idx, stride in enumerate(feat_stride_fpn): + scores = scores_concatenated[idx] + bbox_preds = bboxes_concatenated[idx] * stride + kps_preds = kps_concatenated[idx] * stride + + height = input_size[0] // stride + width = input_size[1] // stride + + anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype( + np.float32 + ) + anchor_centers = (anchor_centers * stride).reshape((-1, 2)) + if num_anchors > 1: + anchor_centers = np.stack([anchor_centers] * num_anchors, axis=1).reshape( + (-1, 2) + ) + + pos_inds = np.where(scores >= score_threshold)[0] + bboxes = distance2bbox(anchor_centers, bbox_preds) + pos_scores = scores[pos_inds] + pos_bboxes = bboxes[pos_inds] + scores_list.append(pos_scores.reshape(-1, 1)) + bboxes_list.append(pos_bboxes) + + kpss = distance2kps(anchor_centers, kps_preds) + kpss = kpss.reshape((kpss.shape[0], -1, 2)) + pos_kpss = kpss[pos_inds] + kps_list.append(pos_kpss) + + scores = np.vstack(scores_list) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + bboxes = np.vstack(bboxes_list) + kpss = np.vstack(kps_list) + + pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False) + pre_det = pre_det[order, :] + keep = nms(pre_det, nms_threshold) + det = pre_det[keep, :] + kpss = kpss[order, :, :] + kpss = kpss[keep, :, :] + + scores = det[:, 4] + bboxes = np.int32(det[:, :4]) + keypoints = np.int32(kpss) + keypoints = keypoints.reshape(-1, 5, 2) + + return bboxes, scores, keypoints From 1b8000a0300eee4e5e519cba78623ee291c9d56b Mon Sep 17 00:00:00 2001 From: aljazkonec1 Date: Tue, 20 Aug 2024 09:26:20 +0200 Subject: [PATCH 16/25] Added message timestamp. --- depthai_nodes/ml/parsers/classification_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification_parser.py index c7fc0b7..85db666 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification_parser.py @@ -85,5 +85,6 @@ def run(self): scores = ex / np.sum(ex) msg = create_classification_message(scores, classes) + msg.setTimestamp(output.getTimestamp()) self.out.send(msg) From f6fc00ecc7cb0536759a8e34761737f7975c2cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 20 Aug 2024 12:17:07 +0200 Subject: [PATCH 17/25] SCRFD extension. (#23) --- depthai_nodes/ml/parsers/scrfd.py | 89 ++++++++++++++----------------- 1 file changed, 40 insertions(+), 49 deletions(-) diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index 5b4fa50..e1ce081 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -125,55 +125,46 @@ def run(self): except dai.MessageQueue.QueueException: break # Pipeline was stopped - score_8 = ( - output.getTensor("score_8", dequantize=True) - .flatten() - .astype(np.float32) - ) - score_16 = ( - output.getTensor("score_16", dequantize=True) - .flatten() - .astype(np.float32) - ) - score_32 = ( - output.getTensor("score_32", dequantize=True) - .flatten() - .astype(np.float32) - ) - bbox_8 = ( - output.getTensor("bbox_8", dequantize=True) - .reshape(len(score_8), 4) - .astype(np.float32) - ) - bbox_16 = ( - output.getTensor("bbox_16", dequantize=True) - .reshape(len(score_16), 4) - .astype(np.float32) - ) - bbox_32 = ( - output.getTensor("bbox_32", dequantize=True) - .reshape(len(score_32), 4) - .astype(np.float32) - ) - kps_8 = ( - output.getTensor("kps_8", dequantize=True) - .reshape(len(score_8), 10) - .astype(np.float32) - ) - kps_16 = ( - output.getTensor("kps_16", dequantize=True) - .reshape(len(score_16), 10) - .astype(np.float32) - ) - kps_32 = ( - output.getTensor("kps_32", dequantize=True) - .reshape(len(score_32), 10) - .astype(np.float32) - ) - - bboxes_concatenated = [bbox_8, bbox_16, bbox_32] - scores_concatenated = [score_8, score_16, score_32] - kps_concatenated = [kps_8, kps_16, kps_32] + scores_concatenated = [] + bboxes_concatenated = [] + kps_concatenated = [] + + for stride in self.feat_stride_fpn: + score_layer_name = f"score_{stride}" + bbox_layer_name = f"bbox_{stride}" + kps_layer_name = f"kps_{stride}" + if score_layer_name not in output.getAllLayerNames(): + raise ValueError( + f"Layer {score_layer_name} not found in the model output." + ) + if bbox_layer_name not in output.getAllLayerNames(): + raise ValueError( + f"Layer {bbox_layer_name} not found in the model output." + ) + if kps_layer_name not in output.getAllLayerNames(): + raise ValueError( + f"Layer {kps_layer_name} not found in the model output." + ) + + score_tensor = ( + output.getTensor(score_layer_name, dequantize=True) + .flatten() + .astype(np.float32) + ) + bbox_tensor = ( + output.getTensor(bbox_layer_name, dequantize=True) + .reshape(len(score_tensor), 4) + .astype(np.float32) + ) + kps_tensor = ( + output.getTensor(kps_layer_name, dequantize=True) + .reshape(len(score_tensor), 10) + .astype(np.float32) + ) + + scores_concatenated.append(score_tensor) + bboxes_concatenated.append(bbox_tensor) + kps_concatenated.append(kps_tensor) bboxes, scores, keypoints = decode_scrfd( bboxes_concatenated=bboxes_concatenated, From 8432f20ee2655f4e4574876e20de25fff411cfef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:08:23 +0200 Subject: [PATCH 18/25] Improve M-LSD. (#24) --- depthai_nodes/ml/parsers/mlsd.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/depthai_nodes/ml/parsers/mlsd.py b/depthai_nodes/ml/parsers/mlsd.py index 128e990..b0f9758 100644 --- a/depthai_nodes/ml/parsers/mlsd.py +++ b/depthai_nodes/ml/parsers/mlsd.py @@ -84,12 +84,17 @@ def run(self): while self.isRunning(): try: output: dai.NNData = self.input.get() - nn_passthrough: dai.NNData = self.nn_passthrough.get() except dai.MessageQueue.QueueException: break # Pipeline was stopped - tpMap = nn_passthrough.getTensor("output").astype(np.float32) - heat_np = output.getTensor("heat").astype(np.float32) + tpMap = output.getTensor("tpMap", dequantize=True).astype(np.float32) + heat_np = output.getTensor("heat", dequantize=True).astype(np.float32) + + if len(tpMap.shape) != 4: + raise ValueError("Invalid shape of the tpMap tensor. Should be 4D.") + if tpMap.shape[3] == 9: + # We have NWHC format, transform to NCHW + tpMap = np.transpose(tpMap, (0, 3, 1, 2)) pts, pts_score, vmap = decode_scores_and_points(tpMap, heat_np, self.topk_n) lines, scores = get_lines( From 101be079fe48163036ce98cc374203b120134b50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:38:02 +0200 Subject: [PATCH 19/25] Classification parser corrections. (#25) * Small fixes. * Docstring fix. * Parameter classes required. * Classes only as list. --- .../ml/messages/creators/__init__.py | 2 +- .../ml/messages/creators/classification.py | 81 +++++++++++++++++++ .../creators/classification_message.py | 68 ---------------- depthai_nodes/ml/parsers/__init__.py | 2 +- ...sification_parser.py => classification.py} | 19 +++-- 5 files changed, 96 insertions(+), 76 deletions(-) create mode 100644 depthai_nodes/ml/messages/creators/classification.py delete mode 100644 depthai_nodes/ml/messages/creators/classification_message.py rename depthai_nodes/ml/parsers/{classification_parser.py => classification.py} (87%) diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index f184ffd..3f15b5e 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -1,4 +1,4 @@ -from .classification_message import create_classification_message +from .classification import create_classification_message from .depth import create_depth_message from .detection import create_detection_message, create_line_detection_message from .image import create_image_message diff --git a/depthai_nodes/ml/messages/creators/classification.py b/depthai_nodes/ml/messages/creators/classification.py new file mode 100644 index 0000000..98e0c15 --- /dev/null +++ b/depthai_nodes/ml/messages/creators/classification.py @@ -0,0 +1,81 @@ +from typing import List, Union + +import numpy as np + +from ...messages import Classifications + + +def create_classification_message( + classes: List, scores: Union[np.ndarray, List] +) -> Classifications: + """Create a message for classification. The message contains the class names and + their respective scores, sorted in descending order of scores. + + @param classes: A list containing class names. + @type classes: List + @param scores: A numpy array of shape (n_classes,) containing the probability score of each class. + @type scores: np.ndarray + + @return: A message with attributes `classes` and `scores`. `classes` is a list of classes, sorted in descending order of scores. `scores` is a list of the corresponding scores. + @rtype: Classifications + + @raises ValueError: If the provided classes are None. + @raises ValueError: If the provided classes are not a list. + @raises ValueError: If the provided classes are empty. + @raises ValueError: If the provided scores are None. + @raises ValueError: If the provided scores are not a list or a numpy array. + @raises ValueError: If the provided scores are empty. + @raises ValueError: If the provided scores are not a 1D array. + @raises ValueError: If the provided scores are not of type float. + @raises ValueError: If the provided scores do not sum to 1. + @raises ValueError: If the number of labels and scores mismatch. + """ + + if type(classes) == type(None): + raise ValueError("Classes should not be None.") + + if not isinstance(classes, list): + raise ValueError(f"Classes should be a list, got {type(classes)}.") + + if len(classes) == 0: + raise ValueError("Classes should not be empty.") + + if type(scores) == type(None): + raise ValueError("Scores should not be None.") + + if not isinstance(scores, np.ndarray) and not isinstance(scores, list): + raise ValueError( + f"Scores should be a list or a numpy array, got {type(scores)}." + ) + + if isinstance(scores, list): + scores = np.array(scores) + + if len(scores) == 0: + raise ValueError("Scores should not be empty.") + + if len(scores) != len(scores.flatten()): + raise ValueError(f"Scores should be a 1D array, got {scores.shape}.") + + scores = scores.flatten() + + if not np.issubdtype(scores.dtype, np.floating): + raise ValueError(f"Scores should be of type float, got {scores.dtype}.") + + if not np.isclose(np.sum(scores), 1.0, atol=1e-1): + raise ValueError(f"Scores should sum to 1, got {np.sum(scores)}.") + + if len(scores) != len(classes): + raise ValueError( + f"Number of labels and scores mismatch. Provided {len(scores)} scores and {len(classes)} class names." + ) + + classification_msg = Classifications() + + sorted_args = np.argsort(scores)[::-1] + scores = scores[sorted_args] + + classification_msg.classes = [classes[i] for i in sorted_args] + classification_msg.scores = scores.tolist() + + return classification_msg diff --git a/depthai_nodes/ml/messages/creators/classification_message.py b/depthai_nodes/ml/messages/creators/classification_message.py deleted file mode 100644 index 0577954..0000000 --- a/depthai_nodes/ml/messages/creators/classification_message.py +++ /dev/null @@ -1,68 +0,0 @@ -import depthai as dai -import numpy as np - -from ...messages import Classifications - - -def create_classification_message( - scores: np.ndarray, classes: np.ndarray = None -) -> dai.Buffer: - """Create a message for classification. The message contains the class names and - their respective scores, sorted in descending order of scores. - - Parameters - ---------- - scores : np.ndarray - A numpy array of shape (n_classes,) containing the probability score of each class. - - classes : np.ndarray = [] - A numpy array of shape (n_classes, ), containing class names. If not provided, class names are set to []. - - - Returns - -------- - Classifications : dai.Buffer - A message with parameter `classes` which is a list of shape (n_classes, 2) - where each item is [class_name, probability_score]. - If no class names are provided, class_name is set to None. - """ - - if type(classes) == type(None): - classes = np.array([]) - else: - classes = np.array(classes) - - if len(scores) == 0: - raise ValueError("Scores should not be empty.") - - if len(scores) != len(scores.flatten()): - raise ValueError(f"Scores should be a 1D array, got {scores.shape}.") - - if len(classes) != len(classes.flatten()): - raise ValueError(f"Classes should be a 1D array, got {classes.shape}.") - - scores = scores.flatten() - classes = classes.flatten() - - if not np.issubdtype(scores.dtype, np.floating): - raise ValueError(f"Scores should be of type float, got {scores.dtype}.") - - if not np.isclose(np.sum(scores), 1.0, atol=1e-1): - raise ValueError(f"Scores should sum to 1, got {np.sum(scores)}.") - - if len(scores) != len(classes) and len(classes) != 0: - raise ValueError( - f"Number of labels and scores mismatch. Provided {len(scores)} scores and {len(classes)} class names." - ) - - classification_msg = Classifications() - - sorted_args = np.argsort(scores)[::-1] - scores = scores[sorted_args] - - if len(classes) != 0: - classification_msg.classes = classes[sorted_args].tolist() - - classification_msg.scores = scores.tolist() - - return classification_msg diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py index 89386a4..58dfb43 100644 --- a/depthai_nodes/ml/parsers/__init__.py +++ b/depthai_nodes/ml/parsers/__init__.py @@ -1,4 +1,4 @@ -from .classification_parser import ClassificationParser +from .classification import ClassificationParser from .image_output import ImageOutputParser from .keypoints import KeypointParser from .mediapipe_hand_landmarker import MPHandLandmarkParser diff --git a/depthai_nodes/ml/parsers/classification_parser.py b/depthai_nodes/ml/parsers/classification.py similarity index 87% rename from depthai_nodes/ml/parsers/classification_parser.py rename to depthai_nodes/ml/parsers/classification.py index 85db666..6295e7c 100644 --- a/depthai_nodes/ml/parsers/classification_parser.py +++ b/depthai_nodes/ml/parsers/classification.py @@ -1,3 +1,5 @@ +from typing import List + import depthai as dai import numpy as np @@ -13,7 +15,7 @@ class ClassificationParser(dai.node.ThreadedHostNode): Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. out : Node.Output Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. - classes : list[str] + classes : List[str] List of class names to be used for linking with their respective scores. Expected to be in the same order as Neural Network's output. If not provided, the message will only return sorted scores. is_softmax : bool = True If False, the scores are converted to probabilities using softmax function. @@ -26,7 +28,7 @@ class ClassificationParser(dai.node.ThreadedHostNode): An object with attributes `classes` and `scores`. `classes` is a list of classes, sorted in descending order of scores. `scores` is a list of corresponding scores. """ - def __init__(self, classes: list[str] = None, is_softmax: bool = True): + def __init__(self, classes: List[str] = None, is_softmax: bool = True): """Initializes the ClassificationParser node. @param classes: List of class names to be used for linking with their respective @@ -42,7 +44,7 @@ def __init__(self, classes: list[str] = None, is_softmax: bool = True): self.n_classes = len(self.classes) self.is_softmax = is_softmax - def setClasses(self, classes: list[str]): + def setClasses(self, classes: List[str]): """Sets the class names for the classification model. @param classes: List of class names to be used for linking with their respective @@ -72,9 +74,14 @@ def run(self): f"Expected 1 output layer, got {len(output_layer_names)}." ) - scores = output.getTensor(output_layer_names[0]) + if self.n_classes == 0: + raise ValueError("Classes must be provided for classification.") + + scores = output.getTensor(output_layer_names[0], dequantize=True).astype( + np.float32 + ) scores = np.array(scores).flatten() - classes = np.array(self.classes) + if len(scores) != self.n_classes and self.n_classes != 0: raise ValueError( f"Number of labels and scores mismatch. Provided {self.n_classes} class names and {len(scores)} scores." @@ -84,7 +91,7 @@ def run(self): ex = np.exp(scores) scores = ex / np.sum(ex) - msg = create_classification_message(scores, classes) + msg = create_classification_message(self.classes, scores) msg.setTimestamp(output.getTimestamp()) self.out.send(msg) From 9a49670f8722ca13e113b57de4d0821d315676c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:38:20 +0200 Subject: [PATCH 20/25] Parser's parameters renaming. (#26) * Rename params in Keypoints parser. * Rename params in Palm Detection parser. * Rename params in SCRFD parser. * Rename params in YuNet parser. --- depthai_nodes/ml/parsers/keypoints.py | 24 ++++----- .../ml/parsers/mediapipe_palm_detection.py | 48 ++++++++--------- depthai_nodes/ml/parsers/scrfd.py | 48 ++++++++--------- depthai_nodes/ml/parsers/yunet.py | 52 +++++++++---------- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index 07d93f9..7405a44 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -17,7 +17,7 @@ class KeypointParser(dai.node.ThreadedHostNode): Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. scale_factor : float Scale factor to divide the keypoints by. - num_keypoints : int + n_keypoints : int Number of keypoints the model detects. Output Message/s @@ -38,21 +38,21 @@ class KeypointParser(dai.node.ThreadedHostNode): def __init__( self, scale_factor=1, - num_keypoints=None, + n_keypoints=None, ): """Initializes KeypointParser node. @param scale_factor: Scale factor to divide the keypoints by. @type scale_factor: float - @param num_keypoints: Number of keypoints. - @type num_keypoints: int + @param n_keypoints: Number of keypoints. + @type n_keypoints: int """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) self.scale_factor = scale_factor - self.num_keypoints = num_keypoints + self.n_keypoints = n_keypoints def setScaleFactor(self, scale_factor): """Sets the scale factor to divide the keypoints by. @@ -62,16 +62,16 @@ def setScaleFactor(self, scale_factor): """ self.scale_factor = scale_factor - def setNumKeypoints(self, num_keypoints): + def setNumKeypoints(self, n_keypoints): """Sets the number of keypoints. - @param num_keypoints: Number of keypoints. - @type num_keypoints: int + @param n_keypoints: Number of keypoints. + @type n_keypoints: int """ - self.num_keypoints = num_keypoints + self.n_keypoints = n_keypoints def run(self): - if self.num_keypoints is None: + if self.n_keypoints is None: raise ValueError("Number of keypoints must be specified!") while self.isRunning(): @@ -90,14 +90,14 @@ def run(self): keypoints = output.getTensor(output_layer_names[0], dequantize=True).astype( np.float32 ) - num_coords = int(np.prod(keypoints.shape) / self.num_keypoints) + num_coords = int(np.prod(keypoints.shape) / self.n_keypoints) if num_coords not in [2, 3]: raise ValueError( f"Expected 2 or 3 coordinates per keypoint, got {num_coords}." ) - keypoints = keypoints.reshape(self.num_keypoints, num_coords) + keypoints = keypoints.reshape(self.n_keypoints, num_coords) keypoints /= self.scale_factor diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index 2f297d7..d00e92d 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -17,11 +17,11 @@ class MPPalmDetectionParser(dai.node.ThreadedHostNode): Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. out : Node.Output Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.Parser sends the processed network results to this output in form of messages. It is a linking point from which the processed network results are retrieved. - score_threshold : float + conf_threshold : float Confidence score threshold for detected hands. - nms_threshold : float + iou_threshold : float Non-maximum suppression threshold. - top_k : int + max_det : int Maximum number of detections to keep. Output Message/s @@ -36,23 +36,23 @@ class MPPalmDetectionParser(dai.node.ThreadedHostNode): https://ai.google.dev/edge/mediapipe/solutions/vision/hand_landmarker """ - def __init__(self, score_threshold=0.5, nms_threshold=0.5, top_k=100): + def __init__(self, conf_threshold=0.5, iou_threshold=0.5, max_det=100): """Initializes the MPPalmDetectionParser node. - @param score_threshold: Confidence score threshold for detected hands. - @type score_threshold: float - @param nms_threshold: Non-maximum suppression threshold. - @type nms_threshold: float - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param conf_threshold: Confidence score threshold for detected hands. + @type conf_threshold: float + @param iou_threshold: Non-maximum suppression threshold. + @type iou_threshold: float + @param max_det: Maximum number of detections to keep. + @type max_det: int """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - self.top_k = top_k + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + self.max_det = max_det def setConfidenceThreshold(self, threshold): """Sets the confidence score threshold for detected hands. @@ -60,23 +60,23 @@ def setConfidenceThreshold(self, threshold): @param threshold: Confidence score threshold for detected hands. @type threshold: float """ - self.score_threshold = threshold + self.conf_threshold = threshold - def setNMSThreshold(self, threshold): + def setIOUThreshold(self, threshold): """Sets the non-maximum suppression threshold. @param threshold: Non-maximum suppression threshold. @type threshold: float """ - self.nms_threshold = threshold + self.iou_threshold = threshold - def setTopK(self, top_k): + def setMaxDetections(self, max_det): """Sets the maximum number of detections to keep. - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param max_det: Maximum number of detections to keep. + @type max_det: int """ - self.top_k = top_k + self.max_det = max_det def run(self): while self.isRunning(): @@ -97,7 +97,7 @@ def run(self): ) decoded_bboxes = generate_anchors_and_decode( - bboxes=bboxes, scores=scores, threshold=self.score_threshold, scale=192 + bboxes=bboxes, scores=scores, threshold=self.conf_threshold, scale=192 ) bboxes = [] @@ -116,9 +116,9 @@ def run(self): indices = cv2.dnn.NMSBoxes( bboxes, scores, - self.score_threshold, - self.nms_threshold, - top_k=self.top_k, + self.conf_threshold, + self.iou_threshold, + top_k=self.max_det, ) bboxes = np.array(bboxes)[indices] scores = np.array(scores)[indices] diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index e1ce081..deec679 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -14,11 +14,11 @@ class SCRFDParser(dai.node.ThreadedHostNode): Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. out : Node.Output Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. - score_threshold : float + conf_threshold : float Confidence score threshold for detected faces. - nms_threshold : float + iou_threshold : float Non-maximum suppression threshold. - top_k : int + max_det : int Maximum number of detections to keep. feat_stride_fpn : tuple Tuple of the feature strides. @@ -36,21 +36,21 @@ class SCRFDParser(dai.node.ThreadedHostNode): def __init__( self, - score_threshold=0.5, - nms_threshold=0.5, - top_k=100, + conf_threshold=0.5, + iou_threshold=0.5, + max_det=100, input_size=(640, 640), feat_stride_fpn=(8, 16, 32), num_anchors=2, ): """Initializes the SCRFDParser node. - @param score_threshold: Confidence score threshold for detected faces. - @type score_threshold: float - @param nms_threshold: Non-maximum suppression threshold. - @type nms_threshold: float - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param conf_threshold: Confidence score threshold for detected faces. + @type conf_threshold: float + @param iou_threshold: Non-maximum suppression threshold. + @type iou_threshold: float + @param max_det: Maximum number of detections to keep. + @type max_det: int @param feat_stride_fpn: List of the feature strides. @type feat_stride_fpn: tuple @param num_anchors: Number of anchors. @@ -62,9 +62,9 @@ def __init__( self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - self.top_k = top_k + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + self.max_det = max_det self.feat_stride_fpn = feat_stride_fpn self.num_anchors = num_anchors @@ -76,23 +76,23 @@ def setConfidenceThreshold(self, threshold): @param threshold: Confidence score threshold for detected faces. @type threshold: float """ - self.score_threshold = threshold + self.conf_threshold = threshold - def setNMSThreshold(self, threshold): + def setIOUThreshold(self, threshold): """Sets the non-maximum suppression threshold. @param threshold: Non-maximum suppression threshold. @type threshold: float """ - self.nms_threshold = threshold + self.iou_threshold = threshold - def setTopK(self, top_k): + def setMaxDetections(self, max_det): """Sets the maximum number of detections to keep. - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param max_det: Maximum number of detections to keep. + @type max_det: int """ - self.top_k = top_k + self.max_det = max_det def setFeatStrideFPN(self, feat_stride_fpn): """Sets the feature stride of the FPN. @@ -173,8 +173,8 @@ def run(self): feat_stride_fpn=self.feat_stride_fpn, input_size=self.input_size, num_anchors=self.num_anchors, - score_threshold=self.score_threshold, - nms_threshold=self.nms_threshold, + score_threshold=self.conf_threshold, + nms_threshold=self.iou_threshold, ) detection_msg = create_detection_message( bboxes, scores, None, keypoints.tolist() diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index c229c99..9016b9c 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -17,11 +17,11 @@ class YuNetParser(dai.node.ThreadedHostNode): Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. out : Node.Output Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. - score_threshold : float + conf_threshold : float Confidence score threshold for detected faces. - nms_threshold : float + iou_threshold : float Non-maximum suppression threshold. - top_k : int + max_det : int Maximum number of detections to keep. Output Message/s @@ -33,26 +33,26 @@ class YuNetParser(dai.node.ThreadedHostNode): def __init__( self, - score_threshold=0.6, - nms_threshold=0.3, - top_k=5000, + conf_threshold=0.6, + iou_threshold=0.3, + max_det=5000, ): """Initializes the YuNetParser node. - @param score_threshold: Confidence score threshold for detected faces. - @type score_threshold: float - @param nms_threshold: Non-maximum suppression threshold. - @type nms_threshold: float - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param conf_threshold: Confidence score threshold for detected faces. + @type conf_threshold: float + @param iou_threshold: Non-maximum suppression threshold. + @type iou_threshold: float + @param max_det: Maximum number of detections to keep. + @type max_det: int """ dai.node.ThreadedHostNode.__init__(self) self.input = dai.Node.Input(self) self.out = dai.Node.Output(self) - self.score_threshold = score_threshold - self.nms_threshold = nms_threshold - self.top_k = top_k + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + self.max_det = max_det def setConfidenceThreshold(self, threshold): """Sets the confidence score threshold for detected faces. @@ -60,23 +60,23 @@ def setConfidenceThreshold(self, threshold): @param threshold: Confidence score threshold for detected faces. @type threshold: float """ - self.score_threshold = threshold + self.conf_threshold = threshold - def setNMSThreshold(self, threshold): + def setIOUThreshold(self, threshold): """Sets the non-maximum suppression threshold. @param threshold: Non-maximum suppression threshold. @type threshold: float """ - self.nms_threshold = threshold + self.iou_threshold = threshold - def setTopK(self, top_k): + def setMaxDetections(self, max_det): """Sets the maximum number of detections to keep. - @param top_k: Maximum number of detections to keep. - @type top_k: int + @param max_det: Maximum number of detections to keep. + @type max_det: int """ - self.top_k = top_k + self.max_det = max_det def run(self): while self.isRunning(): @@ -113,7 +113,7 @@ def run(self): detections += decode_detections( input_size, stride, - self.score_threshold, + self.conf_threshold, cls, obj, bbox, @@ -126,9 +126,9 @@ def run(self): indices = cv2.dnn.NMSBoxes( detection_boxes, detection_scores, - self.score_threshold, - self.nms_threshold, - top_k=self.top_k, + self.conf_threshold, + self.iou_threshold, + top_k=self.max_det, ) detections = np.array(detections)[indices] From fe59514241e4516bdd64266ac25ac952b0304aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Fri, 23 Aug 2024 16:09:56 +0200 Subject: [PATCH 21/25] Palm detection - better bbox calculation. (#27) --- depthai_nodes/ml/parsers/mediapipe_palm_detection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index d00e92d..e3d21eb 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -105,10 +105,10 @@ def run(self): for hand in decoded_bboxes: extended_points = hand.rect_points - xmin = int(min(extended_points[0][0], extended_points[1][0])) - ymin = int(min(extended_points[0][1], extended_points[1][1])) - xmax = int(max(extended_points[2][0], extended_points[3][0])) - ymax = int(max(extended_points[2][1], extended_points[3][1])) + xmin = int(min([point[0] for point in extended_points])) + ymin = int(min([point[1] for point in extended_points])) + xmax = int(max([point[0] for point in extended_points])) + ymax = int(max([point[1] for point in extended_points])) bboxes.append([xmin, ymin, xmax, ymax]) scores.append(hand.pd_score) From b6aa1bd4cd1b94f3d4533ff479dc8319b8b02a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:17:00 +0200 Subject: [PATCH 22/25] Bug fix in ImageOutputParser and MonocularDepthParser. (#28) * Bug fix: naming * Bug fix: naming --- depthai_nodes/ml/parsers/image_output.py | 12 ++++++------ depthai_nodes/ml/parsers/monocular_depth.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index be25053..e07d5f7 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -58,15 +58,15 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) - output = output.getTensor(output_layer_names[0], dequantize=True) + output_image = output.getTensor(output_layer_names[0], dequantize=True) - if len(output.shape) == 4: - image = output[0] - elif len(output.shape) == 3: - image = output + if len(output_image.shape) == 4: + image = output_image[0] + elif len(output_image.shape) == 3: + image = output_image else: raise ValueError( - f"Expected 3- or 4-dimensional output, got {len(output.shape)}-dimensional", + f"Expected 3- or 4-dimensional output, got {len(output_image.shape)}-dimensional", ) image = unnormalize_image(image) diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index b21c36f..5e3161d 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -58,9 +58,9 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) - output = output.getTensor(output_layer_names[0]) + depth_map = output.getTensor(output_layer_names[0]) - depth_map = output[0] + depth_map = depth_map[0] depth_message = create_depth_message( depth_map=depth_map, From 257fed98a202a5d28a1e22fa08747e537bb4bc50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:17:24 +0200 Subject: [PATCH 23/25] DAI v3 support. (#29) * Re-defining parser input and output. * Instructions to install DAIv3. --- README.md | 7 +++++++ depthai_nodes/ml/parsers/image_output.py | 4 ++-- depthai_nodes/ml/parsers/keypoints.py | 4 ++-- depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py | 4 ++-- depthai_nodes/ml/parsers/mediapipe_palm_detection.py | 4 ++-- depthai_nodes/ml/parsers/mlsd.py | 8 ++------ depthai_nodes/ml/parsers/monocular_depth.py | 4 ++-- depthai_nodes/ml/parsers/scrfd.py | 4 ++-- depthai_nodes/ml/parsers/segmentation.py | 4 ++-- depthai_nodes/ml/parsers/superanimal_landmarker.py | 4 ++-- depthai_nodes/ml/parsers/thermal_image.py | 4 ++-- depthai_nodes/ml/parsers/xfeat.py | 4 ++-- depthai_nodes/ml/parsers/yunet.py | 4 ++-- requirements.txt | 1 - 14 files changed, 31 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 2c93102..5cafa5b 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,13 @@ The project is in an alpha state, so it may be missing some critical features or ## Installation +The `depthai_nodes` package requires Python 3.8 or later and `depthai v3` installed. +While the `depthai v3` is not yet released on PyPI, you can install it with the following command: + +```bash +pip install --extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-release-local/ depthai==3.0.0a2 +``` + The `depthai_nodes` package is hosted on PyPI, so you can install it with `pip`. To install the package, run: diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index e07d5f7..b159303 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -37,8 +37,8 @@ def __init__(self, output_is_bgr=False): @type output_is_bgr: bool """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.output_is_bgr = output_is_bgr diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py index 7405a44..f03a64a 100644 --- a/depthai_nodes/ml/parsers/keypoints.py +++ b/depthai_nodes/ml/parsers/keypoints.py @@ -48,8 +48,8 @@ def __init__( @type n_keypoints: int """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.scale_factor = scale_factor self.n_keypoints = n_keypoints diff --git a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py index 346b881..009827c 100644 --- a/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py +++ b/depthai_nodes/ml/parsers/mediapipe_hand_landmarker.py @@ -42,8 +42,8 @@ def __init__(self, score_threshold=0.5, scale_factor=224): @type scale_factor: float """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.score_threshold = score_threshold self.scale_factor = scale_factor diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py index e3d21eb..7b29f0e 100644 --- a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py +++ b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py @@ -47,8 +47,8 @@ def __init__(self, conf_threshold=0.5, iou_threshold=0.5, max_det=100): @type max_det: int """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold diff --git a/depthai_nodes/ml/parsers/mlsd.py b/depthai_nodes/ml/parsers/mlsd.py index b0f9758..376128d 100644 --- a/depthai_nodes/ml/parsers/mlsd.py +++ b/depthai_nodes/ml/parsers/mlsd.py @@ -16,9 +16,6 @@ class MLSDParser(dai.node.ThreadedHostNode): Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. out : Node.Output Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. - nn_passthrough : Node.Input - Node's 2nd input. It accepts the passthrough of the Neural Network node. This is required for parsing the output of the M-LSD model. - It is a linking point to which the Neural Network's passthrough (network's input accutualy) is linked. topk_n : int Number of top candidates to keep. score_thr : float @@ -49,9 +46,8 @@ def __init__( @type dist_thr: float """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.nn_passthrough = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.topk_n = topk_n self.score_thr = score_thr self.dist_thr = dist_thr diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index 5e3161d..c76a8b6 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -33,8 +33,8 @@ def __init__(self, depth_type="relative"): @type depth_type: str """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.depth_type = depth_type diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py index deec679..12f387c 100644 --- a/depthai_nodes/ml/parsers/scrfd.py +++ b/depthai_nodes/ml/parsers/scrfd.py @@ -59,8 +59,8 @@ def __init__( @type input_size: tuple """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold diff --git a/depthai_nodes/ml/parsers/segmentation.py b/depthai_nodes/ml/parsers/segmentation.py index e141c3a..2f55c5d 100644 --- a/depthai_nodes/ml/parsers/segmentation.py +++ b/depthai_nodes/ml/parsers/segmentation.py @@ -36,8 +36,8 @@ def __init__(self, background_class=False): @type background_class: bool """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.background_class = background_class def setBackgroundClass(self, background_class): diff --git a/depthai_nodes/ml/parsers/superanimal_landmarker.py b/depthai_nodes/ml/parsers/superanimal_landmarker.py index 31ed393..45b2dbb 100644 --- a/depthai_nodes/ml/parsers/superanimal_landmarker.py +++ b/depthai_nodes/ml/parsers/superanimal_landmarker.py @@ -39,8 +39,8 @@ def __init__( @type scale_factor: float """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.score_threshold = score_threshold self.scale_factor = scale_factor diff --git a/depthai_nodes/ml/parsers/thermal_image.py b/depthai_nodes/ml/parsers/thermal_image.py index bc090ef..6bb579e 100644 --- a/depthai_nodes/ml/parsers/thermal_image.py +++ b/depthai_nodes/ml/parsers/thermal_image.py @@ -24,8 +24,8 @@ class ThermalImageParser(dai.node.ThreadedHostNode): def __init__(self): """Initializes the ThermalImageParser node.""" dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() def run(self): while self.isRunning(): diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 6d4a451..7a061fc 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -47,8 +47,8 @@ def __init__( @type input_size: Tuple[float, float] """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.original_size = original_size self.input_size = input_size self.previous_results = None diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index 9016b9c..4db4ca9 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -47,8 +47,8 @@ def __init__( @type max_det: int """ dai.node.ThreadedHostNode.__init__(self) - self.input = dai.Node.Input(self) - self.out = dai.Node.Output(self) + self.input = self.createInput() + self.out = self.createOutput() self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold diff --git a/requirements.txt b/requirements.txt index d2259b9..1db7aea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -depthai==2.25.0.0.dev+e59ddcdee91982f28d37b336ddfad2dc676c398b opencv-python \ No newline at end of file From d6dc43d4f7ffc8a3497d9b9fd5b9016ccad92833 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ja=C5=A1a=20Kerec?= <61207502+kkeroo@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:07:00 +0200 Subject: [PATCH 24/25] XFeat parser improvement. (#30) * Supporting no matches. * Add max_keypoints parameter. --- depthai_nodes/ml/parsers/utils/xfeat.py | 3 +++ depthai_nodes/ml/parsers/xfeat.py | 35 +++++++++++++++++++++---- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/depthai_nodes/ml/parsers/utils/xfeat.py b/depthai_nodes/ml/parsers/utils/xfeat.py index 5d2d62e..364d9b6 100644 --- a/depthai_nodes/ml/parsers/utils/xfeat.py +++ b/depthai_nodes/ml/parsers/utils/xfeat.py @@ -225,6 +225,9 @@ def detect_and_compute( grid = 2.0 * (mkpts / div_array) - 1.0 grid = np.expand_dims(grid, axis=2) + if grid.size == 0: + return None + # Numpy implementation of F.grid_sample map_x = grid[..., 0].reshape(-1).astype(np.float32) map_y = grid[..., 1].reshape(-1).astype(np.float32) diff --git a/depthai_nodes/ml/parsers/xfeat.py b/depthai_nodes/ml/parsers/xfeat.py index 7a061fc..72db15e 100644 --- a/depthai_nodes/ml/parsers/xfeat.py +++ b/depthai_nodes/ml/parsers/xfeat.py @@ -20,6 +20,8 @@ class XFeatParser(dai.node.ThreadedHostNode): Original image size. input_size : Tuple[float, float] Input image size. + max_keypoints : int + Maximum number of keypoints to keep. previous_results : np.ndarray Previous results from the model. Previous results are used to match keypoints between two frames. @@ -38,6 +40,7 @@ def __init__( self, original_size: Tuple[float, float] = None, input_size: Tuple[float, float] = (640, 352), + max_keypoints: int = 4096, ): """Initializes the XFeatParser node. @@ -51,6 +54,7 @@ def __init__( self.out = self.createOutput() self.original_size = original_size self.input_size = input_size + self.max_keypoints = max_keypoints self.previous_results = None def setOriginalSize(self, original_size): @@ -69,6 +73,14 @@ def setInputSize(self, input_size): """ self.input_size = input_size + def setMaxKeypoints(self, max_keypoints): + """Sets the maximum number of keypoints to keep. + + @param max_keypoints: Maximum number of keypoints. + @type max_keypoints: int + """ + self.max_keypoints = max_keypoints + def run(self): if self.original_size is None: raise ValueError("Original image size must be specified!") @@ -95,14 +107,27 @@ def run(self): ) result = detect_and_compute( - feats, keypoints, resize_rate_w, resize_rate_h, self.input_size - )[0] + feats, + keypoints, + resize_rate_w, + resize_rate_h, + self.input_size, + self.max_keypoints, + ) + + if result is not None: + result = result[0] + else: + matched_points = dai.TrackedFeatures() + matched_points.setTimestamp(output.getTimestamp()) + self.out.send(matched_points) + continue if self.previous_results is not None: mkpts0, mkpts1 = match(self.previous_results, result) matched_points = create_tracked_features_message(mkpts0, mkpts1) matched_points.setTimestamp(output.getTimestamp()) self.out.send(matched_points) - else: - # save the result from first frame - self.previous_results = result + + # save the result from first frame + self.previous_results = result From 1043366a97bf81aa3ddcbafdaedcdfd163ada435 Mon Sep 17 00:00:00 2001 From: jkbmrz <74824974+jkbmrz@users.noreply.github.com> Date: Wed, 28 Aug 2024 08:11:03 +0200 Subject: [PATCH 25/25] Add parsers for HRNet and AgeGender models. (#16) * feat: add support for age_gender model * feat: add support for HRNet model * fix: formatting and structure * fix: AgeGenderParser formatting and convert age to years * fix: HRNetParser formatting, remove comments, add normalization * fix: add timestamps to outgoing messages * Pre-commit fix. * Add Classifications msg to AgeGender. * Docstrings fix. --------- Co-authored-by: kkeroo <61207502+kkeroo@users.noreply.github.com> --- depthai_nodes/ml/messages/__init__.py | 2 + .../ml/messages/creators/__init__.py | 2 + depthai_nodes/ml/messages/creators/misc.py | 39 +++++++++ depthai_nodes/ml/messages/misc.py | 34 ++++++++ depthai_nodes/ml/parsers/__init__.py | 4 + depthai_nodes/ml/parsers/age_gender.py | 43 ++++++++++ depthai_nodes/ml/parsers/hrnet.py | 80 +++++++++++++++++++ 7 files changed, 204 insertions(+) create mode 100644 depthai_nodes/ml/messages/creators/misc.py create mode 100644 depthai_nodes/ml/messages/misc.py create mode 100644 depthai_nodes/ml/parsers/age_gender.py create mode 100644 depthai_nodes/ml/parsers/hrnet.py diff --git a/depthai_nodes/ml/messages/__init__.py b/depthai_nodes/ml/messages/__init__.py index 81d5876..9cc2183 100644 --- a/depthai_nodes/ml/messages/__init__.py +++ b/depthai_nodes/ml/messages/__init__.py @@ -2,6 +2,7 @@ from .img_detections import ImgDetectionsWithKeypoints, ImgDetectionWithKeypoints from .keypoints import HandKeypoints, Keypoints from .lines import Line, Lines +from .misc import AgeGender __all__ = [ "ImgDetectionWithKeypoints", @@ -11,4 +12,5 @@ "Line", "Lines", "Classifications", + "AgeGender", ] diff --git a/depthai_nodes/ml/messages/creators/__init__.py b/depthai_nodes/ml/messages/creators/__init__.py index 3f15b5e..6750e48 100644 --- a/depthai_nodes/ml/messages/creators/__init__.py +++ b/depthai_nodes/ml/messages/creators/__init__.py @@ -3,6 +3,7 @@ from .detection import create_detection_message, create_line_detection_message from .image import create_image_message from .keypoints import create_hand_keypoints_message, create_keypoints_message +from .misc import create_age_gender_message from .segmentation import create_segmentation_message from .thermal import create_thermal_message from .tracked_features import create_tracked_features_message @@ -18,4 +19,5 @@ "create_keypoints_message", "create_thermal_message", "create_classification_message", + "create_age_gender_message", ] diff --git a/depthai_nodes/ml/messages/creators/misc.py b/depthai_nodes/ml/messages/creators/misc.py new file mode 100644 index 0000000..464b309 --- /dev/null +++ b/depthai_nodes/ml/messages/creators/misc.py @@ -0,0 +1,39 @@ +from typing import List + +from ...messages import AgeGender, Classifications + + +def create_age_gender_message(age: float, gender_prob: List[float]) -> AgeGender: + """Create a DepthAI message for the age and gender probability. + + @param age: Detected person age (must be multiplied by 100 to get years). + @type age: float + @param gender_prob: Detected person gender probability [female, male]. + @type gender_prob: List[float] + @return: AgeGender message containing the predicted person's age and Classifications + message containing the classes and probabilities of the predicted gender. + @rtype: AgeGender + @raise ValueError: If age is not a float. + @raise ValueError: If gender_prob is not a list. + @raise ValueError: If each item in gender_prob is not a float. + """ + + if not isinstance(age, float): + raise ValueError(f"age should be float, got {type(age)}.") + + if not isinstance(gender_prob, List): + raise ValueError(f"gender_prob should be list, got {type(gender_prob)}.") + for item in gender_prob: + if not isinstance(item, float): + raise ValueError( + f"gender_prob list values must be of type float, instead got {type(item)}." + ) + + age_gender_message = AgeGender() + age_gender_message.age = age + gender = Classifications() + gender.classes = ["female", "male"] + gender.scores = gender_prob + age_gender_message.gender = gender + + return age_gender_message diff --git a/depthai_nodes/ml/messages/misc.py b/depthai_nodes/ml/messages/misc.py new file mode 100644 index 0000000..c3aaf83 --- /dev/null +++ b/depthai_nodes/ml/messages/misc.py @@ -0,0 +1,34 @@ +import depthai as dai + +from ..messages import Classifications + + +class AgeGender(dai.Buffer): + def __init__(self): + super().__init__() + self._age: float = None + self._gender = Classifications() + + @property + def age(self) -> float: + return self._age + + @age.setter + def age(self, value: float): + if not isinstance(value, float): + raise TypeError( + f"start_point must be of type float, instead got {type(value)}." + ) + self._age = value + + @property + def gender(self) -> Classifications: + return self._gender + + @gender.setter + def gender(self, value: Classifications): + if not isinstance(value, Classifications): + raise TypeError( + f"gender must be of type Classifications, instead got {type(value)}." + ) + self._gender = value diff --git a/depthai_nodes/ml/parsers/__init__.py b/depthai_nodes/ml/parsers/__init__.py index 58dfb43..e167653 100644 --- a/depthai_nodes/ml/parsers/__init__.py +++ b/depthai_nodes/ml/parsers/__init__.py @@ -1,4 +1,6 @@ +from .age_gender import AgeGenderParser from .classification import ClassificationParser +from .hrnet import HRNetParser from .image_output import ImageOutputParser from .keypoints import KeypointParser from .mediapipe_hand_landmarker import MPHandLandmarkParser @@ -26,4 +28,6 @@ "XFeatParser", "ThermalImageParser", "ClassificationParser", + "AgeGenderParser", + "HRNetParser", ] diff --git a/depthai_nodes/ml/parsers/age_gender.py b/depthai_nodes/ml/parsers/age_gender.py new file mode 100644 index 0000000..212e7e4 --- /dev/null +++ b/depthai_nodes/ml/parsers/age_gender.py @@ -0,0 +1,43 @@ +import depthai as dai + +from ..messages.creators import create_age_gender_message + + +class AgeGenderParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the Age-Gender regression model. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + + Output Message/s + ---------------- + **Type**: AgeGender + + **Description**: Message containing the detected person age and Classfications object for storing information about the detected person's gender. + """ + + def __init__(self): + """Initializes the AgeGenderParser node.""" + dai.node.ThreadedHostNode.__init__(self) + self.input = dai.Node.Input(self) + self.out = dai.Node.Output(self) + + def run(self): + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + age = output.getTensor("age_conv3", dequantize=True).item() + age *= 100 # convert to years + prob = output.getTensor("prob", dequantize=True).flatten().tolist() + + age_gender_message = create_age_gender_message(age=age, gender_prob=prob) + age_gender_message.setTimestamp(output.getTimestamp()) + + self.out.send(age_gender_message) diff --git a/depthai_nodes/ml/parsers/hrnet.py b/depthai_nodes/ml/parsers/hrnet.py new file mode 100644 index 0000000..be91d1c --- /dev/null +++ b/depthai_nodes/ml/parsers/hrnet.py @@ -0,0 +1,80 @@ +import depthai as dai +import numpy as np + +from ..messages.creators import create_keypoints_message + + +class HRNetParser(dai.node.ThreadedHostNode): + """Parser class for parsing the output of the HRNet pose estimation model. The code is inspired by https://github.com/ibaiGorordo/ONNX-HRNET-Human-Pose-Estimation. + + Attributes + ---------- + input : Node.Input + Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node. + out : Node.Output + Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved. + score_threshold : float + Confidence score threshold for detected keypoints. + + Output Message/s + ---------------- + **Type**: Keypoints + + **Description**: Keypoints message containing detected body keypoints. + """ + + def __init__(self, score_threshold=0.5): + """Initializes the HRNetParser node. + + @param score_threshold: Confidence score threshold for detected keypoints. + @type score_threshold: float + """ + dai.node.ThreadedHostNode.__init__(self) + self.input = dai.Node.Input(self) + self.out = dai.Node.Output(self) + + self.score_threshold = score_threshold + + def setScoreThreshold(self, threshold): + """Sets the confidence score threshold for the detected body keypoints. + + @param threshold: Confidence score threshold for detected keypoints. + @type threshold: float + """ + self.score_threshold = threshold + + def run(self): + while self.isRunning(): + try: + output: dai.NNData = self.input.get() + except dai.MessageQueue.QueueException: + break # Pipeline was stopped + + heatmaps = output.getTensor("heatmaps", dequantize=True) + + if len(heatmaps.shape) == 4: + heatmaps = heatmaps[0] + if heatmaps.shape[2] == 16: # HW_ instead of _HW + heatmaps = heatmaps.transpose(2, 0, 1) + _, map_h, map_w = heatmaps.shape + + scores = np.array([np.max(heatmap) for heatmap in heatmaps]) + keypoints = np.array( + [ + np.unravel_index(heatmap.argmax(), heatmap.shape) + for heatmap in heatmaps + ] + ) + keypoints = keypoints.astype(np.float32) + keypoints = keypoints[:, ::-1] / np.array( + [map_w, map_h] + ) # normalize keypoints to [0, 1] + + keypoints_message = create_keypoints_message( + keypoints=keypoints, + scores=scores, + confidence_threshold=self.score_threshold, + ) + keypoints_message.setTimestamp(output.getTimestamp()) + + self.out.send(keypoints_message)