Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paddle OCR Parser #64

Merged
merged 10 commits into from
Sep 11, 2024
14 changes: 7 additions & 7 deletions depthai_nodes/ml/messages/classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Sequence

import depthai as dai

Expand All @@ -18,29 +18,29 @@ def __init__(self):
"""Initializes the Classifications object and sets the classes and scores to
empty lists."""
dai.Buffer.__init__(self)
self._classes: List[str] = []
self._scores: List[float] = []
self._classes: Sequence[str] = []
self._scores: Sequence[float] = []

@property
def classes(self) -> List:
def classes(self) -> Sequence:
"""Returns the list of classes."""
return self._classes

@property
def scores(self) -> List:
def scores(self) -> Sequence:
"""Returns the list of scores."""
return self._scores

@classes.setter
def classes(self, class_names: List[str]):
def classes(self, class_names: Sequence[str]):
"""Sets the list of classes.

@param classes: A list of class names.
"""
self._classes = class_names

@scores.setter
def scores(self, scores: List[float]):
def scores(self, scores: Sequence[float]):
"""Sets the list of scores.

@param scores: A list of scores.
Expand Down
2 changes: 2 additions & 0 deletions depthai_nodes/ml/messages/creators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .classification import create_classification_message
from .classification_sequence import create_classification_sequence_message
from .clusters import create_cluster_message
from .detection import create_detection_message, create_line_detection_message
from .image import create_image_message
Expand All @@ -20,5 +21,6 @@
"create_sam_message",
"create_age_gender_message",
"create_map_message",
"create_classification_sequence_message",
"create_cluster_message",
]
116 changes: 116 additions & 0 deletions depthai_nodes/ml/messages/creators/classification_sequence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from typing import List, Union

import numpy as np

from .. import Classifications


def create_classification_sequence_message(
classes: List[str],
scores: Union[np.ndarray, List],
ignored_indexes: List[int] = None,
remove_duplicates: bool = False,
concatenate_text: bool = False,
) -> Classifications:
"""Creates a message for a multi-class sequence. The 'scores' array is a sequence of
probabilities for each class at each position in the sequence. The message contains
the class names and their respective scores, ordered according to the sequence.

@param classes: A list of class names, with length 'n_classes'.
@type classes: List
@param scores: A numpy array of shape (sequence_length, n_classes) containing the (row-wise) probability distributions over the classes.
@type scores: np.ndarray
@param ignored_indexes: A list of indexes to ignore during classification generation (e.g., background class, padding class)
@type ignored_indexes: List[int]
@param remove_duplicates: If True, removes consecutive duplicates from the sequence.
@type remove_duplicates: bool
@param concatenate_text: If True, concatenates consecutive words based on the space character.
@type concatenate_text: bool
@return: A Classification message with attributes `classes` and `scores`, where `classes` is a list of class names and `scores` is a list of corresponding scores.
@rtype: Classifications
@raises ValueError: If 'classes' is not a list of strings.
@raises ValueError: If 'scores' is not a 2D array of list of shape (sequence_length, n_classes).
@raises ValueError: If the number of classes does not match the number of columns in 'scores'.
@raises ValueError: If any score is not in the range [0, 1].
@raises ValueError: If the probabilities in any row of 'scores' do not sum to 1.
@raises ValueError: If 'ignored_indexes' in not None or a list of valid indexes within the range [0, n_classes - 1].
"""

if not isinstance(classes, List):
raise ValueError(f"Classes should be a list, got {type(classes)}.")

if isinstance(scores, List):
scores = np.array(scores)

if len(scores.shape) != 2:
raise ValueError(f"Scores should be a 2D array, got {scores.shape}.")

if scores.shape[1] != len(classes):
raise ValueError(
f"Number of classes and scores mismatch. Provided {len(classes)} class names and {scores.shape[1]} scores."
)

if np.any(scores < 0) or np.any(scores > 1):
raise ValueError("Scores should be in the range [0, 1].")

if np.any(~np.isclose(scores.sum(axis=1), 1.0, atol=1e-2)):
raise ValueError("Each row of scores should sum to 1.")

if ignored_indexes is not None:
if not isinstance(ignored_indexes, List):
raise ValueError(
f"Ignored indexes should be a list, got {type(ignored_indexes)}."
)
if not all(isinstance(index, int) for index in ignored_indexes):
raise ValueError("Ignored indexes should be integers.")
if np.any(np.array(ignored_indexes) < 0) or np.any(
np.array(ignored_indexes) >= len(classes)
):
raise ValueError(
"Ignored indexes should be integers in the range [0, num_classes -1]."
)

selection = np.ones(len(scores), dtype=bool)
indexes = np.argmax(scores, axis=1)

if remove_duplicates:
selection[1:] = indexes[1:] != indexes[:-1]

if ignored_indexes is not None:
selection &= np.array([index not in ignored_indexes for index in indexes])

class_list = [classes[i] for i in indexes[selection]]
score_list = np.max(scores, axis=1)[selection]

if (
concatenate_text
and len(class_list) > 1
and all(len(word) <= 1 for word in class_list)
):
concatenated_scores = []
concatenated_words = "".join(class_list).split()
cumsumlist = np.cumsum([len(word) for word in concatenated_words])

start_index = 0
for num_spaces, end_index in enumerate(cumsumlist):
word_scores = score_list[start_index + num_spaces : end_index + num_spaces]
concatenated_scores.append(np.mean(word_scores))
start_index = end_index

class_list = concatenated_words
score_list = np.array(concatenated_scores)

elif (
concatenate_text
and len(class_list) > 1
and any(len(word) >= 2 for word in class_list)
):
class_list = [" ".join(class_list)]
score_list = np.mean(score_list)

classification_msg = Classifications()

classification_msg.classes = class_list
classification_msg.scores = score_list.tolist()

return classification_msg
2 changes: 2 additions & 0 deletions depthai_nodes/ml/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .mediapipe_palm_detection import MPPalmDetectionParser
from .mlsd import MLSDParser
from .ppdet import PPTextDetectionParser
from .ppocr import PaddleOCRParser
from .scrfd import SCRFDParser
from .segmentation import SegmentationParser
from .superanimal_landmarker import SuperAnimalParser
Expand All @@ -35,5 +36,6 @@
"HRNetParser",
"PPTextDetectionParser",
"MapOutputParser",
"PaddleOCRParser",
"LaneDetectionParser",
]
143 changes: 143 additions & 0 deletions depthai_nodes/ml/parsers/ppocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from typing import List

import depthai as dai
import numpy as np

from ..messages.creators import create_classification_sequence_message
from .classification import ClassificationParser


class PaddleOCRParser(ClassificationParser):
"""Postprocessing logic for PaddleOCR text recognition model.

Attributes
----------
input : Node.Input
Node's input. It is a linking point to which the Neural Network's output is linked. It accepts the output of the Neural Network node.
out : Node.Output
Parser sends the processed network results to this output in a form of DepthAI message. It is a linking point from which the processed network results are retrieved.
characters: List[str]
List of available characters for the text recognition model.
ignored_indexes: List[int]
List of indexes to ignore during classification generation (e.g., background class, blank space).
remove_duplicates: bool
If True, removes consecutive duplicates from the sequence.
concatenate_text: bool
If True, concatenates consecutive words based on the predicted spaces.
is_softmax: bool
If False, the scores are converted to probabilities using softmax function.

Output Message/s
----------------
**Type**: Classifications(dai.Buffer)

**Description**: An object with attributes `classes` and `scores`. `classes` is a list containing the predicted text. `scores` is a list of corresponding probability scores.

See also
--------
Official PaddleOCR repository:
https://github.com/PaddlePaddle/PaddleOCR
"""

def __init__(
self,
characters: List[str] = None,
ignored_indexes: List[int] = None,
remove_duplicates: bool = False,
concatenate_text: bool = True,
is_softmax: bool = True,
):
"""Initializes the PaddleOCR Parser node.

@param characters: List of available characters for the text recognition model.
@type characters: List[str]
@param ignored_indexes: List of indexes to ignore during classification
generation (e.g., background class, blank space).
@type ignored_indexes: List[int]
@param remove_duplicates: If True, removes consecutive duplicates from the
sequence.
@type remove_duplicates: bool
@param concatenate_text: If True, concatenates consecutive words based on the
predicted spaces.
@type concatenate_text: bool
@param is_softmax: If False, the scores are converted to probabilities using
softmax function.
"""
super().__init__(characters, is_softmax)
self.ignored_indexes = [0] if ignored_indexes is None else ignored_indexes
self.remove_duplicates = remove_duplicates
self.concatenate_text = concatenate_text

def setRemoveDuplicates(self, remove_duplicates: bool):
"""Sets the remove_duplicates flag for the classification sequence model.

@param remove_duplicates: If True, removes consecutive duplicates from the
sequence.
"""
self.remove_duplicates = remove_duplicates

def setIgnoredIndexes(self, ignored_indexes: List[int]):
"""Sets the ignored_indexes for the classification sequence model.

@param ignored_indexes: A list of indexes to ignore during classification
generation.
"""
self.ignored_indexes = ignored_indexes

def setConcatenateText(self, concatenate_text: bool):
"""Sets the concatenate_text flag for the classification sequence model.

@param concatenate_text: If True, concatenates consecutive words based on
predicted spaces.
"""
self.concatenate_text = concatenate_text

def run(self):
while self.isRunning():
try:
output: dai.NNData = self.input.get()

except dai.MessageQueue.QueueException:
break

output_layer_names = output.getAllLayerNames()
if len(output_layer_names) != 1:
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
)

if self.n_classes == 0:
raise ValueError("Classes must be provided for classification.")

if any([len(ch) > 1 for ch in self.classes]):
raise ValueError("Each character should only be a single character.")

scores = output.getTensor(output_layer_names[0], dequantize=True).astype(
np.float32
)

if len(scores.shape) != 3:
raise ValueError(f"Scores should be a 3D array, got {scores.shape}.")

if scores.shape[0] == 1:
scores = scores[0]
elif scores.shape[2] == 1:
scores = scores[:, :, 0]
else:
raise ValueError(
"Scores should be a 3D array of shape (1, sequence_length, n_classes) or (sequence_length, n_classes, 1)."
)

if not self.is_softmax:
scores = np.exp(scores) / np.sum(np.exp(scores), axis=1, keepdims=True)

msg = create_classification_sequence_message(
classes=self.classes,
scores=scores,
remove_duplicates=self.remove_duplicates,
ignored_indexes=self.ignored_indexes,
concatenate_text=self.concatenate_text,
)
msg.setTimestamp(output.getTimestamp())

self.out.send(msg)
4 changes: 2 additions & 2 deletions media/coverage_badge.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading