Detect-skills-debugging (#211)

m-barker · jws-1 · web-flow · commit 97a70836eb00 · 2024-06-12T11:16:51.000+01:00
* fix: incorrect remapping keys for handover

* fix: remove speech server in launch file

* feat: add visualisation of detect skill

* feat: add visualisation of detect 3D detections

* feat: add publishing of 3D polygons

* feat: add publishing of markers for detect 3D skill

* feat: add publishing of text label of markerrs

* Update common/helpers/markers/src/markers/__init__.py

Co-authored-by: Jared Swift &lt;jared.swift@kcl.ac.uk&gt;

---------

Co-authored-by: Jared Swift &lt;jared.swift@kcl.ac.uk&gt;
diff --git a/common/helpers/markers/src/markers/__init__.py b/common/helpers/markers/src/markers/__init__.py
@@ -5,7 +5,7 @@
 
 from collections import defaultdict
 
-from typing import Union
+from typing import Union, Optional
 
 publisher_counts = defaultdict(int)
 
@@ -16,12 +16,13 @@ def create_marker(
     r: float = 0.0,
     g: float = 1.0,
     b: float = 0.0,
+    name: Optional[str] = None,
 ):
     marker_msg = Marker()
+    marker_msg.type = Marker.SPHERE
     marker_msg.header.frame_id = point_stamped.header.frame_id
     marker_msg.header.stamp = point_stamped.header.stamp
     marker_msg.id = idx
-    marker_msg.type = Marker.SPHERE
     marker_msg.action = Marker.ADD
     marker_msg.pose.position = point_stamped.point
     marker_msg.pose.orientation.w = 1.0
@@ -32,6 +33,10 @@ def create_marker(
     marker_msg.color.r = r
     marker_msg.color.g = g
     marker_msg.color.b = b
+
+    if name is not None:
+        marker_msg.type = Marker.TEXT_VIEW_FACING
+        marker_msg.text = name
     return marker_msg
 
 
@@ -42,11 +47,19 @@ def create_and_publish_marker(
     r: float = 0.0,
     g: float = 1.0,
     b: float = 0.0,
+    name: Optional[str] = None,
 ):
     if idx is None:
         global publisher_counts
         idx = publisher_counts[publisher]
         publisher_counts[publisher] += 1
-
     marker_msg = create_marker(point_stamped, idx, r, g, b)
     publisher.publish(marker_msg)
+    rospy.sleep(2)  # Needed to prevent markers from being overwritten
+    if name is not None:
+        name_location = point_stamped.point
+        name_location.z += 0.1
+        idx = publisher_counts[publisher]
+        publisher_counts[publisher] += 1
+        marker_name_msg = create_marker(point_stamped, idx, r, g, b, name)
+        publisher.publish(marker_name_msg)
diff --git a/common/vision/lasr_vision_deepface/requirements.txt b/common/vision/lasr_vision_deepface/requirements.txt
@@ -1,9 +1,9 @@
 absl-py==2.1.0            # via tensorboard, tensorflow
 astunparse==1.6.3         # via tensorflow
 beautifulsoup4==4.12.3    # via gdown
-blinker==1.8.1            # via flask
+blinker==1.8.2            # via flask
 cachetools==5.3.3         # via google-auth
-certifi==2024.2.2         # via requests
+certifi==2024.6.2         # via requests
 charset-normalizer==3.3.2  # via requests
 click==8.1.7              # via flask
 deepface==0.0.91          # via -r requirements.in
@@ -12,17 +12,17 @@ fire==0.6.0               # via deepface
 flask==3.0.3              # via deepface
 flatbuffers==1.12         # via tensorflow
 gast==0.4.0               # via tensorflow
-gdown==5.1.0              # via deepface, retina-face
+gdown==5.2.0              # via deepface, retina-face
 google-auth==2.29.0       # via google-auth-oauthlib, tensorboard
 google-auth-oauthlib==0.4.6  # via tensorboard
 google-pasta==0.2.0       # via tensorflow
-grpcio==1.63.0            # via tensorboard, tensorflow
+grpcio==1.64.1            # via tensorboard, tensorflow
 gunicorn==22.0.0          # via deepface
 h5py==3.11.0              # via tensorflow
 idna==3.7                 # via requests
 importlib-metadata==7.1.0  # via flask, markdown
 itsdangerous==2.2.0       # via flask
-jinja2==3.1.3             # via flask
+jinja2==3.1.4             # via flask
 keras==2.9.0              # via deepface, mtcnn, tensorflow
 keras-preprocessing==1.1.2  # via tensorflow
 libclang==18.1.1          # via tensorflow
@@ -31,7 +31,7 @@ markupsafe==2.1.5         # via jinja2, werkzeug
 mtcnn==0.1.1              # via deepface
 numpy==1.24.4             # via -r requirements.in, deepface, h5py, keras-preprocessing, opencv-python, opt-einsum, pandas, retina-face, tensorboard, tensorflow
 oauthlib==3.2.2           # via requests-oauthlib
-opencv-python==4.9.0.80   # via deepface, mtcnn, retina-face
+opencv-python==4.10.0.82  # via deepface, mtcnn, retina-face
 opt-einsum==3.3.0         # via tensorflow
 packaging==24.0           # via gunicorn, tensorflow
 pandas==2.0.3             # via deepface
@@ -42,7 +42,7 @@ pyasn1-modules==0.4.0     # via google-auth
 pysocks==1.7.1            # via requests
 python-dateutil==2.9.0.post0  # via pandas
 pytz==2024.1              # via pandas
-requests[socks]==2.31.0   # via deepface, gdown, requests-oauthlib, tensorboard
+requests[socks]==2.32.3   # via deepface, gdown, requests-oauthlib, tensorboard
 requests-oauthlib==2.0.0  # via google-auth-oauthlib
 retina-face==0.0.17       # via deepface
 rsa==4.9                  # via google-auth
@@ -56,13 +56,13 @@ tensorflow-estimator==2.9.0  # via tensorflow
 tensorflow-io-gcs-filesystem==0.34.0  # via tensorflow
 termcolor==2.4.0          # via fire, tensorflow
 tqdm==4.66.4              # via deepface, gdown
-typing-extensions==4.11.0  # via tensorflow
+typing-extensions==4.12.1  # via tensorflow
 tzdata==2024.1            # via pandas
 urllib3==2.2.1            # via requests
-werkzeug==3.0.2           # via flask, tensorboard
+werkzeug==3.0.3           # via flask, tensorboard
 wheel==0.43.0             # via astunparse, tensorboard
 wrapt==1.16.0             # via tensorflow
-zipp==3.18.1              # via importlib-metadata
+zipp==3.19.1              # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
diff --git a/common/vision/lasr_vision_msgs/msg/Detection.msg b/common/vision/lasr_vision_msgs/msg/Detection.msg
@@ -5,6 +5,7 @@ string name
 float32 confidence
 
 # Bounding box mask defined in pixel-space
+# X and Y are the midpoints of the bounding box.
 int32[] xywh
 
 # Segmentation mask defined in pixel-space
diff --git a/skills/src/lasr_skills/detect.py b/skills/src/lasr_skills/detect.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-
+import cv2
+import cv2_img
 import rospy
 import smach
 from sensor_msgs.msg import Image
-
 from lasr_vision_msgs.srv import YoloDetection
 
 from typing import List, Union
@@ -13,10 +13,11 @@ class Detect(smach.State):
     def __init__(
         self,
         image_topic: str = "/xtion/rgb/image_raw",
-        model: str = "yolov8n.pt",
+        model: str = "yolov8x.pt",
         filter: Union[List[str], None] = None,
         confidence: float = 0.5,
         nms: float = 0.3,
+        debug_publisher: str = "/skills/detect/debug",
     ):
         smach.State.__init__(
             self,
@@ -30,16 +31,58 @@ def __init__(
         self.nms = nms
         self.yolo = rospy.ServiceProxy("/yolov8/detect", YoloDetection)
         self.yolo.wait_for_service()
+        self.debug_pub = rospy.Publisher(debug_publisher, Image, queue_size=1)
 
     def execute(self, userdata):
         img_msg = rospy.wait_for_message(self.image_topic, Image)
+        img_cv2 = cv2_img.msg_to_cv2_img(img_msg)
         try:
             result = self.yolo(img_msg, self.model, self.confidence, self.nms)
-            result.detected_objects = [
-                det for det in result.detected_objects if det.name in self.filter
-            ]
+            if len(self.filter) > 0:
+                result.detected_objects = [
+                    det for det in result.detected_objects if det.name in self.filter
+                ]
             userdata.detections = result
+
+            # Annotate the image with the detected objects
+            for det in result.detected_objects:
+                x, y, w, h = det.xywh[0], det.xywh[1], det.xywh[2], det.xywh[3]
+                cv2.rectangle(
+                    img_cv2,
+                    (x - (w // 2), y - (h // 2)),
+                    (x + (w // 2), y + (h // 2)),
+                    (0, 255, 0),
+                    2,
+                )
+                cv2.putText(
+                    img_cv2,
+                    f"{det.name} ({det.confidence:.2f})",
+                    (x - 50, y - (h // 2) - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.9,
+                    (0, 255, 0),
+                    2,
+                )
+
+            self.debug_pub.publish(cv2_img.cv2_img_to_msg(img_cv2))
+
             return "succeeded"
         except rospy.ServiceException as e:
             rospy.logwarn(f"Unable to perform inference. ({str(e)})")
             return "failed"
+
+
+if __name__ == "__main__":
+    rospy.init_node("detect")
+    while not rospy.is_shutdown():
+        detect = Detect(
+            image_topic="/usb_cam/image_raw",
+        )
+        sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+        with sm:
+            smach.StateMachine.add(
+                "DETECT",
+                detect,
+                transitions={"succeeded": "succeeded", "failed": "failed"},
+            )
+        sm.execute()
diff --git a/skills/src/lasr_skills/detect_3d.py b/skills/src/lasr_skills/detect_3d.py
@@ -1,8 +1,13 @@
+#!/usr/bin/env python3
 import rospy
 import smach
+import numpy as np
 
 from sensor_msgs.msg import PointCloud2
+from visualization_msgs.msg import Marker
+from geometry_msgs.msg import PointStamped, Point
 from lasr_vision_msgs.srv import YoloDetection3D
+from markers import create_and_publish_marker
 
 from typing import List, Union
 
@@ -11,10 +16,11 @@ class Detect3D(smach.State):
     def __init__(
         self,
         depth_topic: str = "/xtion/depth_registered/points",
-        model: str = "yolov8n-seg.pt",
+        model: str = "yolov8x-seg.pt",
         filter: Union[List[str], None] = None,
         confidence: float = 0.5,
         nms: float = 0.3,
+        debug_publisher: str = "/skills/detect3d/debug",
     ):
         smach.State.__init__(
             self,
@@ -28,16 +34,43 @@ def __init__(
         self.nms = nms
         self.yolo = rospy.ServiceProxy("/yolov8/detect3d", YoloDetection3D)
         self.yolo.wait_for_service()
+        self.debug_pub = rospy.Publisher(debug_publisher, Marker, queue_size=1)
 
     def execute(self, userdata):
         pcl_msg = rospy.wait_for_message(self.depth_topic, PointCloud2)
         try:
             result = self.yolo(pcl_msg, self.model, self.confidence, self.nms)
-            result.detected_objects = [
-                det for det in result.detected_objects if det.name in self.filter
-            ]
+            if len(self.filter) > 0:
+                result.detected_objects = [
+                    det for det in result.detected_objects if det.name in self.filter
+                ]
             userdata.detections_3d = result
+
+            for det in result.detected_objects:
+                point_stamped = PointStamped()
+                point_stamped.header.frame_id = "map"
+                point_stamped.point = det.point
+                rospy.loginfo(f"Detected point: {point_stamped}")
+                if np.isnan(det.point.x).any():
+                    rospy.loginfo(f"No depth detected, object likely too far away")
+                    continue
+                create_and_publish_marker(self.debug_pub, point_stamped, name=det.name)
+
             return "succeeded"
         except rospy.ServiceException as e:
             rospy.logwarn(f"Unable to perform inference. ({str(e)})")
             return "failed"
+
+
+if __name__ == "__main__":
+    rospy.init_node("detect")
+    while not rospy.is_shutdown():
+        detect = Detect3D()
+        sm = smach.StateMachine(outcomes=["succeeded", "failed"])
+        with sm:
+            smach.StateMachine.add(
+                "DETECT",
+                detect,
+                transitions={"succeeded": "succeeded", "failed": "failed"},
+            )
+        sm.execute()
diff --git a/skills/src/lasr_skills/detect_3d_in_area.py b/skills/src/lasr_skills/detect_3d_in_area.py
@@ -1,27 +1,40 @@
 import smach
-
+import rospy
 from lasr_skills import Detect3D
-
 from typing import List, Union
 
+from geometry_msgs.msg import Polygon, Point, Point32
 from shapely.geometry import Point
 from shapely.geometry.polygon import Polygon
 
 
 class Detect3DInArea(smach.StateMachine):
     class FilterDetections(smach.State):
-        def __init__(self, area_polygon: Polygon):
+        def __init__(
+            self,
+            area_polygon: Polygon,
+            debug_publisher: str = "/skills/detect3d_in_area/debug",
+        ):
             smach.State.__init__(
                 self,
                 outcomes=["succeeded", "failed"],
                 input_keys=["detections_3d"],
                 output_keys=["detections_3d"],
             )
             self.area_polygon = area_polygon
+            self.debug_publisher = rospy.Publisher(
+                debug_publisher, Polygon, queue_size=1
+            )
 
         def execute(self, userdata):
             detected_objects = userdata["detections_3d"].detected_objects
-
+            # publish polygon for debugging
+            polygon_msg = Polygon()
+            polygon_msg.points = [
+                Point32(x=point.x, y=point.y, z=point.z)
+                for point in self.area_polygon.exterior.coords
+            ]
+            self.debug_publisher.publish(polygon_msg)
             satisfied_points = [
                 self.area_polygon.contains(Point(object.point.x, object.point.y))
                 for object in detected_objects
@@ -39,7 +52,7 @@ def __init__(
         self,
         area_polygon: Polygon,
         depth_topic: str = "/xtion/depth_registered/points",
-        model: str = "yolov8n-seg.pt",
+        model: str = "yolov8x-seg.pt",
         filter: Union[List[str], None] = None,
         confidence: float = 0.5,
         nms: float = 0.3,
diff --git a/skills/src/lasr_skills/detect_gesture.py b/skills/src/lasr_skills/detect_gesture.py
@@ -20,6 +20,7 @@ def __init__(
         self,
         gesture_to_detect: Optional[str] = None,
         buffer_width: int = 50,
+        debug_publisher: str = "/skills/gesture_detection/debug",
     ):
         """Optionally stores the gesture to detect. If None, it will infer the gesture from the keypoints."""
         smach.State.__init__(
@@ -28,12 +29,9 @@ def __init__(
             input_keys=["img_msg"],
             output_keys=["gesture_detected"],
         )
-        self.debug = debug
         self.gesture_to_detect = gesture_to_detect
         self.body_pix_client = rospy.ServiceProxy("/bodypix/detect", BodyPixDetection)
-        self.debug_publisher = rospy.Publisher(
-            "/gesture_detection/debug", Image, queue_size=1
-        )
+        self.debug_publisher = rospy.Publisher(debug_publisher, Image, queue_size=1)
         self.buffer_width = buffer_width
 
     def execute(self, userdata):