luxonis · aljazkonec1 · Sep 11, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
@@ -1,4 +1,4 @@
-from typing import List
+from typing import Sequence
 
 import depthai as dai
 
@@ -18,29 +18,29 @@ def __init__(self):
         """Initializes the Classifications object and sets the classes and scores to
         empty lists."""
         dai.Buffer.__init__(self)
-        self._classes: List[str] = []
-        self._scores: List[float] = []
+        self._classes: Sequence[str] = []
+        self._scores: Sequence[float] = []
 
     @property
-    def classes(self) -> List:
+    def classes(self) -> Sequence:
         """Returns the list of classes."""
         return self._classes
 
     @property
-    def scores(self) -> List:
+    def scores(self) -> Sequence:
         """Returns the list of scores."""
         return self._scores
 
     @classes.setter
-    def classes(self, class_names: List[str]):
+    def classes(self, class_names: Sequence[str]):
         """Sets the list of classes.
 
         @param classes: A list of class names.
         """
         self._classes = class_names
 
     @scores.setter
-    def scores(self, scores: List[float]):
+    def scores(self, scores: Sequence[float]):
         """Sets the list of scores.
 
         @param scores: A list of scores.

@@ -1,4 +1,5 @@
 from .classification import create_classification_message
+from .classification_sequence import create_classification_sequence_message
 from .clusters import create_cluster_message
 from .detection import create_detection_message, create_line_detection_message
 from .image import create_image_message
@@ -20,5 +21,6 @@
     "create_sam_message",
     "create_age_gender_message",
     "create_map_message",
+    "create_classification_sequence_message",
     "create_cluster_message",
 ]
@@ -0,0 +1,116 @@
+from typing import List, Union
+
+import numpy as np
+
+from .. import Classifications
+
+
+def create_classification_sequence_message(
+    classes: List[str],
+    scores: Union[np.ndarray, List],
+    ignored_indexes: List[int] = None,
+    remove_duplicates: bool = False,
+    concatenate_text: bool = False,
+) -> Classifications:
+    """Creates a message for a multi-class sequence. The 'scores' array is a sequence of
+    probabilities for each class at each position in the sequence. The message contains
+    the class names and their respective scores, ordered according to the sequence.
+
+    @param classes: A list of class names, with length 'n_classes'.
+    @type classes: List
+    @param scores: A numpy array of shape (sequence_length, n_classes) containing the (row-wise) probability distributions over the classes.
+    @type scores: np.ndarray
+    @param ignored_indexes: A list of indexes to ignore during classification generation (e.g., background class, padding class)
+    @type ignored_indexes: List[int]
+    @param remove_duplicates: If True, removes consecutive duplicates from the sequence.
+    @type remove_duplicates: bool
+    @param concatenate_text: If True, concatenates consecutive words based on the space character.
+    @type concatenate_text: bool
+    @return: A Classification message with attributes `classes` and `scores`, where `classes` is a list of class names and `scores` is a list of corresponding scores.
+    @rtype: Classifications
+    @raises ValueError: If 'classes' is not a list of strings.
+    @raises ValueError: If 'scores' is not a 2D array of list of shape (sequence_length, n_classes).
+    @raises ValueError: If the number of classes does not match the number of columns in 'scores'.
+    @raises ValueError: If any score is not in the range [0, 1].
+    @raises ValueError: If the probabilities in any row of 'scores' do not sum to 1.
+    @raises ValueError: If 'ignored_indexes' in not None or a list of valid indexes within the range [0, n_classes - 1].
+    """
+
+    if not isinstance(classes, List):
+        raise ValueError(f"Classes should be a list, got {type(classes)}.")
+
+    if isinstance(scores, List):
+        scores = np.array(scores)
+
+    if len(scores.shape) != 2:
+        raise ValueError(f"Scores should be a 2D array, got {scores.shape}.")
+
+    if scores.shape[1] != len(classes):
+        raise ValueError(
+            f"Number of classes and scores mismatch. Provided {len(classes)} class names and {scores.shape[1]} scores."
+        )
+
+    if np.any(scores < 0) or np.any(scores > 1):
+        raise ValueError("Scores should be in the range [0, 1].")
+
+    if np.any(~np.isclose(scores.sum(axis=1), 1.0, atol=1e-2)):
+        raise ValueError("Each row of scores should sum to 1.")
+
+    if ignored_indexes is not None:
+        if not isinstance(ignored_indexes, List):
+            raise ValueError(
+                f"Ignored indexes should be a list, got {type(ignored_indexes)}."
+            )
+        if not all(isinstance(index, int) for index in ignored_indexes):
+            raise ValueError("Ignored indexes should be integers.")
+        if np.any(np.array(ignored_indexes) < 0) or np.any(
+            np.array(ignored_indexes) >= len(classes)
+        ):
+            raise ValueError(
+                "Ignored indexes should be integers in the range [0, num_classes -1]."
+            )
+
+    selection = np.ones(len(scores), dtype=bool)
+    indexes = np.argmax(scores, axis=1)
+
+    if remove_duplicates:
+        selection[1:] = indexes[1:] != indexes[:-1]
+
+    if ignored_indexes is not None:
+        selection &= np.array([index not in ignored_indexes for index in indexes])
+
+    class_list = [classes[i] for i in indexes[selection]]
+    score_list = np.max(scores, axis=1)[selection]
+
+    if (
+        concatenate_text
+        and len(class_list) > 1
+        and all(len(word) <= 1 for word in class_list)
+    ):
+        concatenated_scores = []
+        concatenated_words = "".join(class_list).split()
+        cumsumlist = np.cumsum([len(word) for word in concatenated_words])
+
+        start_index = 0
+        for num_spaces, end_index in enumerate(cumsumlist):
+            word_scores = score_list[start_index + num_spaces : end_index + num_spaces]
+            concatenated_scores.append(np.mean(word_scores))
+            start_index = end_index
+
+        class_list = concatenated_words
+        score_list = np.array(concatenated_scores)
+
+    elif (
+        concatenate_text
+        and len(class_list) > 1
+        and any(len(word) >= 2 for word in class_list)
+    ):
+        class_list = [" ".join(class_list)]
+        score_list = np.mean(score_list)
+
+    classification_msg = Classifications()
+
+    classification_msg.classes = class_list
+    classification_msg.scores = score_list.tolist()
+
+    return classification_msg
@@ -10,6 +10,7 @@
 from .mediapipe_palm_detection import MPPalmDetectionParser
 from .mlsd import MLSDParser
 from .ppdet import PPTextDetectionParser
+from .ppocr import PaddleOCRParser
 from .scrfd import SCRFDParser
 from .segmentation import SegmentationParser
 from .superanimal_landmarker import SuperAnimalParser
@@ -35,5 +36,6 @@
     "HRNetParser",
     "PPTextDetectionParser",
     "MapOutputParser",
+    "PaddleOCRParser",
     "LaneDetectionParser",
 ]
@@ -0,0 +1,143 @@
+from typing import List
+
+import depthai as dai
+import numpy as np
+
+from ..messages.creators import create_classification_sequence_message
+from .classification import ClassificationParser
+
+
+class PaddleOCRParser(ClassificationParser):
+    """Postprocessing logic for PaddleOCR text recognition model.
+
+    Attributes
+    ----------
+    input : Node.Input
+        Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
+    out : Node.Output
+        Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
+    characters: List[str]
+        List of available characters for the text recognition model.
+    ignored_indexes: List[int]
+        List of indexes to ignore during classification generation (e.g., background class, blank space).
+    remove_duplicates: bool
+        If True, removes consecutive duplicates from the sequence.
+    concatenate_text: bool
+        If True, concatenates consecutive words based on the predicted spaces.
+    is_softmax: bool
+        If False, the scores are converted to probabilities using softmax function.
+
+    Output Message/s
+    ----------------
+    **Type**: Classifications(dai.Buffer)
+
+    **Description**: An object with attributes `classes` and `scores`. `classes` is a list containing the predicted text. `scores` is a list of corresponding probability scores.
+
+    See also
+    --------
+    Official PaddleOCR repository:
+    https://github.com/PaddlePaddle/PaddleOCR
+    """
+
+    def __init__(
+        self,
+        characters: List[str] = None,
+        ignored_indexes: List[int] = None,
+        remove_duplicates: bool = False,
+        concatenate_text: bool = True,
+        is_softmax: bool = True,
+    ):
+        """Initializes the PaddleOCR Parser node.
+
+        @param characters: List of available characters for the text recognition model.
+        @type characters: List[str]
+        @param ignored_indexes: List of indexes to ignore during classification
+            generation (e.g., background class, blank space).
+        @type ignored_indexes: List[int]
+        @param remove_duplicates: If True, removes consecutive duplicates from the
+            sequence.
+        @type remove_duplicates: bool
+        @param concatenate_text: If True, concatenates consecutive words based on the
+            predicted spaces.
+        @type concatenate_text: bool
+        @param is_softmax: If False, the scores are converted to probabilities using
+            softmax function.
+        """
+        super().__init__(characters, is_softmax)
+        self.ignored_indexes = [0] if ignored_indexes is None else ignored_indexes
+        self.remove_duplicates = remove_duplicates
+        self.concatenate_text = concatenate_text
+
+    def setRemoveDuplicates(self, remove_duplicates: bool):
+        """Sets the remove_duplicates flag for the classification sequence model.
+
+        @param remove_duplicates: If True, removes consecutive duplicates from the
+            sequence.
+        """
+        self.remove_duplicates = remove_duplicates
+
+    def setIgnoredIndexes(self, ignored_indexes: List[int]):
+        """Sets the ignored_indexes for the classification sequence model.
+
+        @param ignored_indexes: A list of indexes to ignore during classification
+            generation.
+        """
+        self.ignored_indexes = ignored_indexes
+
+    def setConcatenateText(self, concatenate_text: bool):
+        """Sets the concatenate_text flag for the classification sequence model.
+
+        @param concatenate_text: If True, concatenates consecutive words based on
+            predicted spaces.
+        """
+        self.concatenate_text = concatenate_text
+
+    def run(self):
+        while self.isRunning():
+            try:
+                output: dai.NNData = self.input.get()
+
+            except dai.MessageQueue.QueueException:
+                break
+
+            output_layer_names = output.getAllLayerNames()
+            if len(output_layer_names) != 1:
+                raise ValueError(
+                    f"Expected 1 output layer, got {len(output_layer_names)}."
+                )
+
+            if self.n_classes == 0:
+                raise ValueError("Classes must be provided for classification.")
+
+            if any([len(ch) > 1 for ch in self.classes]):
+                raise ValueError("Each character should only be a single character.")
+
+            scores = output.getTensor(output_layer_names[0], dequantize=True).astype(
+                np.float32
+            )
+
+            if len(scores.shape) != 3:
+                raise ValueError(f"Scores should be a 3D array, got {scores.shape}.")
+
+            if scores.shape[0] == 1:
+                scores = scores[0]
+            elif scores.shape[2] == 1:
+                scores = scores[:, :, 0]
+            else:
+                raise ValueError(
+                    "Scores should be a 3D array of shape (1, sequence_length, n_classes) or (sequence_length, n_classes, 1)."
+                )
+
+            if not self.is_softmax:
+                scores = np.exp(scores) / np.sum(np.exp(scores), axis=1, keepdims=True)
+
+            msg = create_classification_sequence_message(
+                classes=self.classes,
+                scores=scores,
+                remove_duplicates=self.remove_duplicates,
+                ignored_indexes=self.ignored_indexes,
+                concatenate_text=self.concatenate_text,
+            )
+            msg.setTimestamp(output.getTimestamp())
+
+            self.out.send(msg)