Skip to content

Commit

Permalink
Minor release preparation. (#93)
Browse files Browse the repository at this point in the history
* Bigger tolerance for classification scores.

* Improved MultiClassificationParser.

* Warnings in Keypoints and Segmentation parsers.

* Examples for new models.

* Adjusted tests due to tolerance change.

* Add XFeat Mono and Stereo to examples.

* XFeat demo instructions.

* Remove warp drawing in stereo mode.
  • Loading branch information
kkeroo authored Oct 4, 2024
1 parent 9212a21 commit 3f3e360
Show file tree
Hide file tree
Showing 15 changed files with 429 additions and 32 deletions.
2 changes: 1 addition & 1 deletion depthai_nodes/ml/messages/creators/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def create_classification_message(
f"Scores list must contain probabilities between 0 and 1, instead got {scores}."
)

if not np.isclose(np.sum(scores), 1.0, atol=1e-2):
if not np.isclose(np.sum(scores), 1.0, atol=1e-1):
raise ValueError(f"Scores should sum to 1, got {np.sum(scores)}.")

if len(scores) != len(classes):
Expand Down
25 changes: 23 additions & 2 deletions depthai_nodes/ml/parsers/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ class MultiClassificationParser(dai.node.ThreadedHostNode):

def __init__(
self,
classification_attributes: List[str],
classification_labels: List[List[str]],
classification_attributes: List[str] = None,
classification_labels: List[List[str]] = None,
):
"""Initializes the MultipleClassificationParser node."""
dai.node.ThreadedHostNode.__init__(self)
Expand All @@ -133,7 +133,28 @@ def __init__(
self.classification_attributes: List[str] = classification_attributes
self.classification_labels: List[List[str]] = classification_labels

def setClassificationAttributes(self, classification_attributes: List[str]):
"""Sets the classification attributes for the multiple classification model.
@param classification_attributes: List of attributes to be classified.
@type classification_attributes: List[str]
"""
self.classification_attributes = classification_attributes

def setClassificationLabels(self, classification_labels: List[List[str]]):
"""Sets the classification labels for the multiple classification model.
@param classification_labels: List of class labels for each attribute.
@type classification_labels: List[List[str]]
"""
self.classification_labels = classification_labels

def run(self):
if not self.classification_attributes:
raise ValueError("Classification attributes must be provided.")
if not self.classification_labels:
raise ValueError("Classification labels must be provided.")

while self.isRunning():
try:
output: dai.NNData = self.input.get()
Expand Down
8 changes: 5 additions & 3 deletions depthai_nodes/ml/parsers/keypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(

self.scale_factor = scale_factor
self.n_keypoints = n_keypoints
self._warned = False

def setScaleFactor(self, scale_factor):
"""Sets the scale factor to divide the keypoints by.
Expand Down Expand Up @@ -82,10 +83,11 @@ def run(self):

output_layer_names = output.getAllLayerNames()

if len(output_layer_names) != 1:
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
if len(output_layer_names) != 1 and not self._warned:
print(
f"Expected 1 output layer, got {len(output_layer_names)}, will take the first one."
)
self._warned = True

keypoints = output.getTensor(output_layer_names[0], dequantize=True).astype(
np.float32
Expand Down
4 changes: 3 additions & 1 deletion depthai_nodes/ml/parsers/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, background_class=False):
self.input = self.createInput()
self.out = self.createOutput()
self.background_class = background_class
self._warned = False

def setBackgroundClass(self, background_class):
"""Sets the background class.
Expand All @@ -57,10 +58,11 @@ def run(self):

output_layer_names = output.getAllLayerNames()

if len(output_layer_names) != 1:
if len(output_layer_names) != 1 and not self._warned:
print(
f"Expected 1 output layer, got {len(output_layer_names)}. Will take the first one."
)
self._warned = True

segmentation_mask = output.getTensor(output_layer_names[0], dequantize=True)
if len(segmentation_mask.shape) == 4:
Expand Down
4 changes: 4 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ python main.py -s yolov6-nano:coco-416x416 -fps 28
Some models have small input sizes and requesting small image size from `Camera` is problematic so we request 4x bigger frame and resize it back down. During visualization image frame is resized back so some image quality is lost - only for visualization.

The parser is obtained from NN archive along with other important parameters for the parser. So, make sure your NN archive is well-defined.

### XFeat

If you want to run xfeat demo you have two options available - to run it in `Stereo` mode or `Mono` mode depending on the nn archive you provided. If the NN archive requires `XFeatMonoParser` then the mono mode will be used, otherwise the stereo mode will be used (`XFeatStereoParser`). For the stereo mode you need OAK camera which has left and right cameras, if not the error will be raised. If you use mono mode you can set the reference frame to which all the other frames will be compared to. The reference frame is set by triggering - pressing `S` key.
9 changes: 7 additions & 2 deletions examples/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from utils.arguments import initialize_argparser, parse_fps_limit, parse_model_slug
from utils.model import get_input_shape, get_model_from_hub, get_parser
from utils.parser import setup_parser
from utils.xfeat import xfeat_mono, xfeat_stereo
from visualization.visualize import visualize

# Initialize the argument parser
Expand All @@ -18,8 +19,12 @@
parser_class, parser_name = get_parser(nn_archive)
input_shape = get_input_shape(nn_archive)

if parser_name == "XFeatParser":
raise NotImplementedError("XFeatParser is not supported in this script yet.")
if parser_name == "XFeatMonoParser":
xfeat_mono(nn_archive, input_shape, fps_limit)
exit(0)
elif parser_name == "XFeatStereoParser":
xfeat_stereo(nn_archive, input_shape, fps_limit)
exit(0)

# Create the pipeline
with dai.Pipeline() as pipeline:
Expand Down
45 changes: 30 additions & 15 deletions examples/utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
LaneDetectionParser,
MapOutputParser,
MPPalmDetectionParser,
MultiClassificationParser,
PaddleOCRParser,
SCRFDParser,
SegmentationParser,
XFeatParser,
YOLOExtendedParser,
)

Expand Down Expand Up @@ -76,18 +77,6 @@ def setup_map_output_parser(parser: MapOutputParser, params: dict):
)


def setup_xfeat_parser(parser: XFeatParser, params: dict):
"""Setup the XFeat parser with the required metadata."""
try:
input_size = params["input_size"]
parser.setInputSize(input_size)
parser.setOriginalSize(input_size)
except Exception:
print(
"This NN archive does not have required metadata for XFeatParser. Skipping setup..."
)


def setup_yolo_extended_parser(parser: YOLOExtendedParser, params: dict):
"""Setup the YOLO parser with the required metadata."""
try:
Expand Down Expand Up @@ -142,6 +131,30 @@ def setup_fastsam_parser(parser: FastSAMParser, params: dict):
)


def setup_paddleocr_parser(parser: PaddleOCRParser, params: dict):
"""Setup the PaddleOCR parser with the required metadata."""
try:
classes = params["classes"]
parser.setClasses(classes)
except Exception:
print(
"This NN archive does not have required metadata for PaddleOCRParser. Skipping setup..."
)


def setup_multi_classification_parser(parser: MultiClassificationParser, params: dict):
"""Setup the Multi Classification parser with the required metadata."""
try:
classification_attributes = params["classification_attributes"]
classification_labels = params["classification_labels"]
parser.setClassificationAttributes(classification_attributes)
parser.setClassificationLabels(classification_labels)
except Exception:
print(
"This NN archive does not have required metadata for MultiClassificationParser. Skipping setup..."
)


def setup_parser(parser: dai.ThreadedNode, nn_archive: dai.NNArchive, parser_name: str):
"""Setup the parser with the NN archive."""

Expand All @@ -159,8 +172,6 @@ def setup_parser(parser: dai.ThreadedNode, nn_archive: dai.NNArchive, parser_nam
setup_classification_parser(parser, extraParams)
elif parser_name == "MapOutputParser":
setup_map_output_parser(parser, extraParams)
elif parser_name == "XFeatParser":
setup_xfeat_parser(parser, extraParams)
elif parser_name == "YOLOExtendedParser":
setup_yolo_extended_parser(parser, extraParams)
elif parser_name == "MPPalmDetectionParser":
Expand All @@ -169,3 +180,7 @@ def setup_parser(parser: dai.ThreadedNode, nn_archive: dai.NNArchive, parser_nam
setup_land_detection_parser(parser, extraParams)
elif parser_name == "FastSAMParser":
setup_fastsam_parser(parser, extraParams)
elif parser_name == "PaddleOCRParser":
setup_paddleocr_parser(parser, extraParams)
elif parser_name == "MultiClassificationParser":
setup_multi_classification_parser(parser, extraParams)
145 changes: 145 additions & 0 deletions examples/utils/xfeat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from typing import List

import cv2
import depthai as dai
from visualization.visualizers import xfeat_visualizer

from depthai_nodes.ml.parsers import XFeatMonoParser, XFeatStereoParser


def xfeat_mono(nn_archive: dai.NNArchive, input_shape: List[int], fps_limit: int):
"""Run the XFeatMonoParser on a single camera.
It lets you set the reference frame by pressing S-key.
"""
previous_frame = None
with dai.Pipeline() as pipeline:
# Set up camera
cam = pipeline.create(dai.node.Camera).build()

# Set up the neural network
network = pipeline.create(dai.node.NeuralNetwork).build(
cam.requestOutput(
input_shape, type=dai.ImgFrame.Type.BGR888p, fps=fps_limit
),
nn_archive,
)

# Set up parser
parser = XFeatMonoParser()
parser.setOriginalSize(input_shape)
parser.setInputSize(input_shape)
parser.setMaxKeypoints(2048)

# Linking
network.out.link(parser.input)

# Set up queue
camera_queue = network.passthrough.createOutputQueue()
parser_queue = parser.out.createOutputQueue()

pipeline.start()

while pipeline.isRunning():
frame: dai.ImgFrame = camera_queue.get().getCvFrame()
message: dai.TrackedFeatures = (
parser_queue.get()
) # get message from the queue
features = message.trackedFeatures
if previous_frame is not None:
resulting_frame = xfeat_visualizer(previous_frame, frame, features)
else:
resulting_frame = frame
number_of_matches = len(features) // 2
cv2.putText(
resulting_frame,
f"Number of matches: {number_of_matches}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
1,
)
cv2.imshow("XFeat", resulting_frame)

key_pressed = cv2.waitKey(1)
if key_pressed == ord("s"):
parser.setTrigger() # trigger to set the reference frame
previous_frame = frame
if key_pressed == ord("q"):
cv2.destroyAllWindows()
pipeline.stop()
break


def xfeat_stereo(nn_archive: dai.NNArchive, input_shape: List[int], fps_limit: int):
"""Run the XFeatStereoParser on stereo cameras - left and right - and match the features."""
with dai.Pipeline() as pipeline:
device: dai.Device = pipeline.getDefaultDevice()
available_cameras = [
camera.name for camera in device.getConnectedCameraFeatures()
]

if "left" not in available_cameras or "right" not in available_cameras:
raise RuntimeError(
f"Stereo cameras are not available! Available cameras: {available_cameras}"
)

left_cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_B)
right_cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_C)

left_network = pipeline.create(dai.node.NeuralNetwork).build(
left_cam.requestOutput(
input_shape, type=dai.ImgFrame.Type.RGB888p, fps=fps_limit
),
nn_archive,
)
left_network.setNumInferenceThreads(2)

right_network = pipeline.create(dai.node.NeuralNetwork).build(
right_cam.requestOutput(
input_shape, type=dai.ImgFrame.Type.RGB888p, fps=fps_limit
),
nn_archive,
)
right_network.setNumInferenceThreads(2)

parser = pipeline.create(XFeatStereoParser)
parser.setOriginalSize(input_shape)
parser.setInputSize(input_shape)
parser.setMaxKeypoints(512)

left_network.out.link(parser.reference_input)
right_network.out.link(parser.target_input)

left_cam_queue = left_network.passthrough.createOutputQueue()
right_cam_queue = right_network.passthrough.createOutputQueue()
parser_queue = parser.out.createOutputQueue()

pipeline.start()

while pipeline.isRunning():
left_frame: dai.ImgFrame = left_cam_queue.get().getCvFrame()
right_frame: dai.ImgFrame = right_cam_queue.get().getCvFrame()
features: dai.TrackedFeatures = parser_queue.get()
features = features.trackedFeatures

resulting_frame = xfeat_visualizer(
left_frame, right_frame, features, draw_warp_corners=False
)
number_of_matches = len(features) // 2
cv2.putText(
resulting_frame,
f"Number of matches: {number_of_matches}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
1,
)
cv2.imshow("XFeat Stereo", resulting_frame)

if cv2.waitKey(1) == ord("q"):
cv2.destroyAllWindows()
pipeline.stop()
break
6 changes: 6 additions & 0 deletions examples/visualization/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
visualize_lane_detections,
visualize_line_detections,
visualize_map,
visualize_multi_classification,
visualize_segmentation,
visualize_text_detection,
visualize_text_recognition,
visualize_yolo_extended,
)

Expand All @@ -33,6 +36,9 @@
"YOLOExtendedParser": visualize_yolo_extended,
"LaneDetectionParser": visualize_lane_detections,
"FastSAMParser": visualize_fastsam,
"PPTextDetectionParser": visualize_text_detection,
"PaddleOCRParser": visualize_text_recognition,
"MultiClassificationParser": visualize_multi_classification,
}


Expand Down
Loading

0 comments on commit 3f3e360

Please sign in to comment.