From 35a252862a9d33ec02a8be066e3ca888bd8dc798 Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Wed, 1 May 2024 18:37:01 -0700 Subject: [PATCH 1/7] Move Spot to Quest's location --- .../episodic_memory_robotic_fetch_quest3_c.py | 272 ++++++++++++++++++ .../perception_and_utils/utils/math_utils.py | 2 +- .../configs/ros_frame_names.yaml | 3 +- 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py diff --git a/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py b/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py new file mode 100644 index 00000000..182beb0d --- /dev/null +++ b/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py @@ -0,0 +1,272 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import logging +from typing import Any, Dict, List, Optional, Tuple + +import click +import numpy as np +import rospy +import sophus as sp +from geometry_msgs.msg import PoseStamped +from perception_and_utils.utils.conversions import ( + ros_PoseStamped_to_sophus_SE3, + ros_TransformStamped_to_sophus_SE3, + sophus_SE3_to_ros_PoseStamped, + sophus_SE3_to_ros_TransformStamped, + xyt_to_sophus_SE3, +) +from spot_rl.envs.skill_manager import SpotSkillManager +from spot_rl.utils.utils import ros_frames as rf +from spot_wrapper.spot import Spot, SpotCamIds +from spot_wrapper.spot_qr_detector import SpotQRDetector +from std_msgs.msg import Bool + +# from spot_rl.envs.skill_manager import SpotSkillManager +# from spot_rl.utils.utils import ros_frames as rf +# from spot_wrapper.spot import Spot, SpotCamIds +# from spot_wrapper.spot_qr_detector import SpotQRDetector +# from std_msgs.msg import Bool +from tf2_ros import ( + ConnectivityException, + ExtrapolationException, + LookupException, + StaticTransformBroadcaster, +) +from tf2_ros.buffer import Buffer +from tf2_ros.transform_listener import TransformListener + +# For information on useful reference frame, please check https://github.com/facebookresearch/spot-sim2real?tab=readme-ov-file#eyeglasses-run-spot-aria-project-code + +# Waypoints on left of the dock to observe the QR code clearly +SPOT_DOCK_OBSERVER_WAYPOINT_LEFT = [ + 0.3840912007597151, + -0.5816728569741766, + 149.58030524832756, +] +# Waypoints on right of the dock to observe the QR code clearly +SPOT_DOCK_OBSERVER_WAYPOINT_RIGHT = [ + 0.5419185005639034, + 0.5319243891865243, + -154.1506754378722, +] + + +class EpisodicMemoryRoboticFetchQuest3: + """ + This class handles the core logic of the Episodic Memory Robotic Fetch + It begines with Spot looking at the marker and detecting it which results + in a static tf of marker w.r.t spotWorld frame being published. + + Then it waits for Aria to detect marker and publish ariaWorld w.r.t marker + as a static tf. This is done by AriaLiveReader node. This step ensures both + Spot & Aria worlds are now mutually transformable which is necessary for the + rest of the logic. + + It constantly listens to Aria's current pose w.r.t ariaWorld frame. + It also waits for Aria to detect the object and publish that pose of interest + (i.e. aria's cpf frame when it saw the object) w.r.t ariaWorld frame. + + Once both the poses are received, it waits for the spot_fetch trigger to be True + which will be set by the user via `rostopic pub` on cli. + + Once the trigger is True, it will run the following steps: + 1. Nav to Pose of interest (Aria's cpf frame when it saw the object) + 2. Pick the object (object is hardcoded to be bottle) + 3. Nav to Pose of interest (Aria wearer's last location before triggering spot_fetch) + 4. Place the object (object is hardcoded to be bottle) + + Args: + spot: Spot object + verbose: bool indicating whether to print debug logs + + Warning: Do not create objects of this class directly. It is a standalone node + """ + + def __init__(self, spot: Spot, verbose: bool = True, use_policies: bool = True): + # @FIXME: This is initializing a ros-node silently (at core of inheritence in + # SpotRobotSubscriberMixin). Make it explicit + self.skill_manager = SpotSkillManager( + spot=spot, + verbose=verbose, + use_policies=use_policies, + ) + + # Navigate Spot to SPOT_DOCK_OBSERVER_WAYPOINT_LEFT so that it can see the marker + status, msg = self.skill_manager.nav( + SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[0], + SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[1], + np.deg2rad(SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[2]), + ) + if not status: + rospy.logerr( + f"Failed to navigate to spot dock observer waypoint. Error: {msg}. Exiting..." + ) + return + + # Sit Spot down + self.skill_manager.sit() + + # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms + self.static_tf_broadcaster = StaticTransformBroadcaster() + + self._spot_fetch_trigger_sub = rospy.Subscriber( + "/spot_fetch_trigger", Bool, self.spot_fetch_trigger_callback + ) + self._spot_fetch_flag = False + + # Detect the marker and get the average pose of marker w.r.t spotWorld frame + cam_id = SpotCamIds.HAND_COLOR + spot_qr = SpotQRDetector(spot=spot, cam_ids=[cam_id]) + avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) + + # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz + start_time = rospy.Time.now() + while rospy.Time.now() - start_time < rospy.Duration(2.0): + self.static_tf_broadcaster.sendTransform( + sophus_SE3_to_ros_TransformStamped( + sp_se3=avg_spotWorld_T_marker, + parent_frame=rf.SPOT_WORLD, + child_frame=rf.MARKER, + ) + ) + rospy.sleep(0.5) + + # Instantiate publisher for publishing go-to pose for handoff + self._nav_PoseStamped_pub_for_place = rospy.Publisher( + "/nav_pose_for_place_viz", PoseStamped, queue_size=10 + ) + self.nav_xyt_for_handoff = None + + # Initialize static transform subscriber for listening to ariaWorld w.r.t marker transform + self._tf_buffer = Buffer() + self._tf_listener = TransformListener(self._tf_buffer) + + # Wait for transform from camera to spotWorld to be available + while not rospy.is_shutdown() and not self._spot_fetch_flag: + while not rospy.is_shutdown() and not self._tf_buffer.can_transform( + target_frame=rf.SPOT_WORLD, source_frame="camera", time=rospy.Time() + ): + rospy.logwarn_throttle( + 5.0, "Waiting for transform from camera to spotWorld" + ) + rospy.sleep(0.5) + try: + transform_stamped_spotWorld_T_camera = self._tf_buffer.lookup_transform( + target_frame=rf.SPOT_WORLD, + source_frame="camera", + time=rospy.Time(0), + ) + except (LookupException, ConnectivityException, ExtrapolationException): + raise RuntimeError( + "Unable to lookup transform from ariaWorld to spotWorld" + ) + spotWorld_T_camera = ros_TransformStamped_to_sophus_SE3( + ros_trf_stamped=transform_stamped_spotWorld_T_camera + ) + + self.nav_xyt_for_handoff = self.get_nav_xyt_to_wearer(spotWorld_T_camera) + # Publish place pose in spotWorld frame so it can be seen in rviz until spot_fetch is triggered + self._nav_PoseStamped_pub_for_place.publish( + sophus_SE3_to_ros_PoseStamped( + sp_se3=xyt_to_sophus_SE3( + np.asarray(self.nav_xyt_for_handoff, dtype=np.float32) + ), + parent_frame=rf.SPOT_WORLD, + ) + ) + + rospy.logwarn_throttle( + 5.0, "Waiting for demo trigger on /spot_fetch_trigger topic" + ) + rospy.sleep(1.0) + + # Set run status to true in the beginning + run_status = True + + if run_status: + # Nav to x,y,theta of interest (Aria's cpf frame when it saw the object) + nav_xyt_for_handoff = self.nav_xyt_for_handoff + rospy.loginfo(f"Nav 2D location for pick: {nav_xyt_for_handoff}") + + self.skill_manager.nav( + nav_xyt_for_handoff[0], nav_xyt_for_handoff[1], nav_xyt_for_handoff[2] + ) + + self.skill_manager.sit() + + def spot_fetch_trigger_callback(self, msg): + """ + Save _spot_fetch_flag as True when spot fetch is to be triggered + + Args: + msg (Bool): Message received on /spot_fetch_trigger topic + + Updates: + self._spot_fetch_flag (bool): True when spot fetch is to be triggered + """ + self._spot_fetch_flag = True + + def get_nav_xyt_to_wearer( + self, aria_pose: sp.SE3, shift_offset: float = 0.6 + ) -> Tuple[float, float, float]: + """ + Converts Sophus SE3 aria current pose to Tuple of x, y, theta, then flips it by 180 degrees and shifts it by a given offset. + At the end, the final x, y, theta is such that the robot will face the wearer. + + Args: + aria_pose (sp.SE3): Sophus SE3 object for aria's current pose (as cpf frame) w.r.t spotWorld frame + + Returns: + Tuple[float, float, float]: Tuple of x,y,theta as floats representing nav target for robot + """ + # ARROW STARTS FROM ORIGIN + + # get position and rotation as x, y, theta + position = aria_pose.translation() + # Find the angle made by CPF's z axis with spotWorld's x axis + # as robot should orient to the CPF's z axis. First 3 elements of + # column 3 from spotWorld_T_cpf represents cpf's z axis in spotWorld frame + cpf_z_axis_in_spotWorld = aria_pose.matrix()[:3, 2] + x_component_of_z_axis = cpf_z_axis_in_spotWorld[0] + y_component_of_z_axis = cpf_z_axis_in_spotWorld[1] + rotation = float( + np.arctan2( + y_component_of_z_axis, + x_component_of_z_axis, + ) + ) # tan^-1(y/x) + x, y, theta = position[0], position[1], rotation + + # push fwd this point along theta + x += shift_offset * np.cos(theta) + y += shift_offset * np.sin(theta) + # rotate theta by pi + theta += np.pi + + return (x, y, theta) + + +@click.command() +@click.option("--verbose", type=bool, default=True) +@click.option("--use-policies", type=bool, default=False) +def main(verbose: bool, use_policies: bool): + # rospy.init_node("episode") + # rospy.logwarn("Starting up ROS node") + + spot = Spot("EpisodicMemoryRoboticFetchQuest3Node") + with spot.get_lease(hijack=True) as lease: + if lease is None: + raise RuntimeError("Could not get lease") + else: + rospy.loginfo("Acquired lease") + + _ = EpisodicMemoryRoboticFetchQuest3( + spot=spot, verbose=verbose, use_policies=use_policies + ) + + +if __name__ == "__main__": + main() diff --git a/perception_and_utils_root/perception_and_utils/utils/math_utils.py b/perception_and_utils_root/perception_and_utils/utils/math_utils.py index 740e38d2..61be50c4 100644 --- a/perception_and_utils_root/perception_and_utils/utils/math_utils.py +++ b/perception_and_utils_root/perception_and_utils/utils/math_utils.py @@ -100,7 +100,7 @@ def get_running_avg_a_T_b( a_T_b_position_list.append(b_position) - # Ensure quaternion's w is always positive for effective averaging as multiple quaternions can represent the same rotation + # Ensure quaternion's w is always negative for effective averaging as multiple quaternions can represent the same rotation quat = Rotation.from_matrix(a_T_b.rotationMatrix()).as_quat() if quat[3] > 0: quat = -1.0 * quat diff --git a/spot_rl_experiments/configs/ros_frame_names.yaml b/spot_rl_experiments/configs/ros_frame_names.yaml index 3461933f..7aebaec2 100644 --- a/spot_rl_experiments/configs/ros_frame_names.yaml +++ b/spot_rl_experiments/configs/ros_frame_names.yaml @@ -4,7 +4,8 @@ SPOT: "spot" ARIA_WORLD: "ariaWorld" ARIA: "aria" # aria in cpf frame ARIA_DEVICE: "ariaDevice" # aria in device frame - +QUEST3_WORLD: "quest3World" +QUEST3_CAMERA: "camera" MARKER: "marker" From 8624e02bc4cfd4c6cf083c2119a60f206a9f013a Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Tue, 11 Jun 2024 15:54:32 -0700 Subject: [PATCH 2/7] LSC-Quest3 MVP --- .../aria_data_utils/rviz/spot-quest3.rviz | 147 ++++++++++ spot_rl_experiments/spot_rl/envs/lsc_hitl.py | 267 ++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 aria_data_loaders/aria_data_utils/rviz/spot-quest3.rviz create mode 100644 spot_rl_experiments/spot_rl/envs/lsc_hitl.py diff --git a/aria_data_loaders/aria_data_utils/rviz/spot-quest3.rviz b/aria_data_loaders/aria_data_utils/rviz/spot-quest3.rviz new file mode 100644 index 00000000..1960ddfb --- /dev/null +++ b/aria_data_loaders/aria_data_utils/rviz/spot-quest3.rviz @@ -0,0 +1,147 @@ +Panels: + - Class: rviz/Displays + Help Height: 78 + Name: Displays + Property Tree Widget: + Expanded: + - /Global Options1 + - /Status1 + - /TF1 + - /TF1/Frames1 + Splitter Ratio: 0.5 + Tree Height: 719 + - Class: rviz/Selection + Name: Selection + - Class: rviz/Tool Properties + Expanded: + - /2D Pose Estimate1 + - /2D Nav Goal1 + - /Publish Point1 + Name: Tool Properties + Splitter Ratio: 0.5886790156364441 + - Class: rviz/Views + Expanded: + - /Current View1 + Name: Views + Splitter Ratio: 0.5 + - Class: rviz/Time + Name: Time + SyncMode: 0 + SyncSource: "" +Preferences: + PromptSaveOnExit: true +Toolbars: + toolButtonStyle: 2 +Visualization Manager: + Class: "" + Displays: + - Alpha: 0.5 + Cell Size: 1 + Class: rviz/Grid + Color: 160; 160; 164 + Enabled: true + Line Style: + Line Width: 0.029999999329447746 + Value: Lines + Name: Grid + Normal Cell Count: 0 + Offset: + X: 0 + Y: 0 + Z: 0 + Plane: XY + Plane Cell Count: 10 + Reference Frame: + Value: true + - Class: rviz/TF + Enabled: true + Filter (blacklist): "" + Filter (whitelist): "" + Frame Timeout: 15 + Frames: + All Enabled: false + map: + Value: false + spot: + Value: true + spotWorld: + Value: true + Marker Alpha: 1 + Marker Scale: 1 + Name: TF + Show Arrows: true + Show Axes: true + Show Names: true + Tree: + map: + spotWorld: + spot: + {} + Update Interval: 0 + Value: true + Enabled: true + Global Options: + Background Color: 48; 48; 48 + Default Light: true + Fixed Frame: map + Frame Rate: 30 + Name: root + Tools: + - Class: rviz/Interact + Hide Inactive Objects: true + - Class: rviz/MoveCamera + - Class: rviz/Select + - Class: rviz/FocusCamera + - Class: rviz/Measure + - Class: rviz/SetInitialPose + Theta std deviation: 0.2617993950843811 + Topic: /initialpose + X std deviation: 0.5 + Y std deviation: 0.5 + - Class: rviz/SetGoal + Topic: /move_base_simple/goal + - Class: rviz/PublishPoint + Single click: true + Topic: /clicked_point + Value: true + Views: + Current: + Class: rviz/Orbit + Distance: 2.778170108795166 + Enable Stereo Rendering: + Stereo Eye Separation: 0.05999999865889549 + Stereo Focal Distance: 1 + Swap Stereo Eyes: false + Value: false + Field of View: 0.7853981852531433 + Focal Point: + X: 0.4939827620983124 + Y: 0.11082470417022705 + Z: -0.30550795793533325 + Focal Shape Fixed Size: true + Focal Shape Size: 0.05000000074505806 + Invert Z Axis: false + Name: Current View + Near Clip Distance: 0.009999999776482582 + Pitch: 1.1453975439071655 + Target Frame: + Yaw: 6.2535834312438965 + Saved: ~ +Window Geometry: + Displays: + collapsed: false + Height: 1016 + Hide Left Dock: false + Hide Right Dock: true + QMainWindow State: 000000ff00000000fd0000000400000000000001560000035afc0200000008fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005c00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000003d0000035a000000c900fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261000000010000010f000002b0fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073000000003d000002b0000000a400fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000007800000003efc0100000002fb0000000800540069006d00650100000000000007800000033700fffffffb0000000800540069006d00650100000000000004500000000000000000000006240000035a00000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000 + Selection: + collapsed: false + Time: + collapsed: false + Tool Properties: + collapsed: false + Views: + collapsed: true + Width: 1920 + X: 0 + Y: 27 diff --git a/spot_rl_experiments/spot_rl/envs/lsc_hitl.py b/spot_rl_experiments/spot_rl/envs/lsc_hitl.py new file mode 100644 index 00000000..d2890ec3 --- /dev/null +++ b/spot_rl_experiments/spot_rl/envs/lsc_hitl.py @@ -0,0 +1,267 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +import subprocess +import time +from collections import Counter +from typing import Any, Dict, Tuple + +import numpy as np +import rospy +import sophus as sp +from hydra import compose, initialize +from perception_and_utils.utils.conversions import ( + ros_PoseStamped_to_sophus_SE3, + ros_TransformStamped_to_sophus_SE3, + sophus_SE3_to_ros_PoseStamped, + sophus_SE3_to_ros_TransformStamped, + xyt_to_sophus_SE3, +) +from spot_rl.envs.skill_manager import SpotSkillManager +from spot_rl.llm.src.rearrange_llm import RearrangeEasyChain +from spot_rl.models.sentence_similarity import SentenceSimilarity +from spot_rl.utils.utils import construct_config, get_default_parser, get_waypoint_yaml +from spot_rl.utils.utils import ros_frames as rf +from spot_rl.utils.whisper_translator import WhisperTranslator +from spot_wrapper.spot import Spot, SpotCamIds +from spot_wrapper.spot_qr_detector import SpotQRDetector +from tf2_ros import ( + ConnectivityException, + ExtrapolationException, + LookupException, + StaticTransformBroadcaster, +) +from tf2_ros.buffer import Buffer +from tf2_ros.transform_listener import TransformListener + +DOCK_ID = int(os.environ.get("SPOT_DOCK_ID", 549)) + +# Waypoints on left of the dock to observe the QR code clearly +SPOT_DOCK_OBSERVER_WAYPOINT_LEFT = [ + 0.3840912007597151, + -0.5816728569741766, + 149.58030524832756, +] +# Waypoints on right of the dock to observe the QR code clearly +SPOT_DOCK_OBSERVER_WAYPOINT_RIGHT = [ + 0.5419185005639034, + 0.5319243891865243, + -154.1506754378722, +] + + +def get_nav_xyt_to_wearer( + pose: sp.SE3, shift_offset: float = 0.6 +) -> Tuple[float, float, float]: + """ + Converts Sophus SE3 aria current pose to Tuple of x, y, theta, then flips it by 180 degrees and shifts it by a given offset. + At the end, the final x, y, theta is such that the robot will face the wearer. + + Args: + aria_pose (sp.SE3): Sophus SE3 object for aria's current pose (as cpf frame) w.r.t spotWorld frame + + Returns: + Tuple[float, float, float]: Tuple of x,y,theta as floats representing nav target for robot + """ + # ARROW STARTS FROM ORIGIN + + # get position and rotation as x, y, theta + position = pose.translation() + # Find the angle made by CPF's z axis with spotWorld's x axis + # as robot should orient to the CPF's z axis. First 3 elements of + # column 3 from spotWorld_T_cpf represents cpf's z axis in spotWorld frame + cpf_z_axis_in_spotWorld = pose.matrix()[:3, 2] + x_component_of_z_axis = cpf_z_axis_in_spotWorld[0] + y_component_of_z_axis = cpf_z_axis_in_spotWorld[1] + rotation = float( + np.arctan2( + y_component_of_z_axis, + x_component_of_z_axis, + ) + ) # tan^-1(y/x) + x, y, theta = position[0], position[1], rotation + + # push fwd this point along theta + x += shift_offset * np.cos(theta) + y += shift_offset * np.sin(theta) + # rotate theta by pi + theta += np.pi + + return (x, y, theta) + + +def get_place_xyz_to_wearer(pose: sp.SE3) -> Tuple[float, float, float]: + """ + Returns the xyz coordinates of the place pose in spot's local frame. The x,y,z coordinates + are present in the local frame of the robot. Make sure to pass the "is_local" flag as true to skill_manager.place() + + Args: + aria_pose (sp.SE3): Sophus SE3 object for aria's current pose (as cpf frame) w.r.t spotWorld frame + + Returns: + Tuple[float, float, float]: Tuple of x,y,z as floats representing place pose in spot's local frame + """ + return ( + 0.7, + 0.0, + 0.2, + ) + + +def get_handoff_to_human_pose(tf_buffer: Buffer, source: str, target: str) -> sp.SE3: + while not rospy.is_shutdown() and not tf_buffer.can_transform( + target_frame=target, source_frame=source, time=rospy.Time() + ): + rospy.logwarn_throttle(5.0, f"Waiting for transform from {source} to {target}") + rospy.sleep(0.5) + try: + transform_stamped_spotWorld_T_camera = tf_buffer.lookup_transform( + target_frame=target, + source_frame=source, + time=rospy.Time(0), + ) + except (LookupException, ConnectivityException, ExtrapolationException): + raise RuntimeError(f"Unable to lookup transform from {source} to {target}") + target_T_source = ros_TransformStamped_to_sophus_SE3( + ros_trf_stamped=transform_stamped_spotWorld_T_camera + ) + return target_T_source + + +def main(spot, config): + + spot = SpotSkillManager(use_mobile_pick=True) + + vp = SPOT_DOCK_OBSERVER_WAYPOINT_RIGHT + status, msg = spot.nav( + vp[0], + vp[1], + np.deg2rad(vp[2]), + ) + if not status: + rospy.logerr( + f"Failed to navigate to spot dock observer waypoint. Error: {msg}. Exiting..." + ) + return + # Sit Spot down + spot.sit() + + cam_id = SpotCamIds.HAND_COLOR + spot_qr = SpotQRDetector(spot=spot.spot, cam_ids=[cam_id]) + avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) + + # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz + # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms + static_tf_broadcaster = StaticTransformBroadcaster() + tf_buffer = Buffer() + _ = TransformListener(tf_buffer) + start_time = rospy.Time.now() + while rospy.Time.now() - start_time < rospy.Duration(2.0): + static_tf_broadcaster.sendTransform( + sophus_SE3_to_ros_TransformStamped( + sp_se3=avg_spotWorld_T_marker, + parent_frame=rf.SPOT_WORLD, + child_frame=rf.MARKER, + ) + ) + rospy.sleep(0.5) + + # Reset the viz params + rospy.set_param("/viz_pick", "None") + rospy.set_param("/viz_object", "None") + rospy.set_param("/viz_place", "None") + + # Check if robot should return to base + return_to_base = config.RETURN_TO_BASE + + # Get the waypoints from waypoints.yaml + waypoints_yaml_dict = get_waypoint_yaml() + + audio_to_text = WhisperTranslator() + sentence_similarity = SentenceSimilarity() + with initialize(config_path="../llm/src/conf"): + llm_config = compose(config_name="config") + llm = RearrangeEasyChain(llm_config) + + print( + "I am ready to take instructions!\n Sample Instructions : take the rubik cube from the dining table to the hamper" + ) + print("-" * 100) + pre_in_dock = False + while True: + audio_transcription_success = False + while not audio_transcription_success: + # try: + input("Are you ready?") + audio_to_text.record() + instruction = audio_to_text.translate() + print("Transcribed instructions : ", instruction) + + # Use LLM to convert user input to an instructions set + # Eg: nav_1, pick, nav_2 = 'bowl_counter', "container", 'coffee_counter' + t1 = time.time() + nav_1, pick, nav_2, _ = llm.parse_instructions(instruction) + print("PARSED", nav_1, pick, nav_2, f"Time {time.time()- t1} secs") + + # Find closest nav_targets to the ones robot knows locations of + nav_1 = sentence_similarity.get_most_similar_in_list( + nav_1, list(waypoints_yaml_dict["nav_targets"].keys()) + ) + nav_2 = sentence_similarity.get_most_similar_in_list( + nav_2, list(waypoints_yaml_dict["nav_targets"].keys()) + ) + print("MOST SIMILAR: ", nav_1, pick, nav_2) + audio_transcription_success = True + # except Exception as e: + # print(f"Exception encountered in Speech to text : {e} \n\n Retrying...") + # Used for Owlvit + rospy.set_param("object_target", pick) + + # Used for Visualizations + rospy.set_param("viz_pick", nav_1) + rospy.set_param("viz_object", pick) + rospy.set_param("viz_place", nav_2) + + if pre_in_dock: + spot.spot.power_robot() + + # Do LSC by calling skills + slow_down = 0.5 # slow down time to increase the skill execution stability + spot.nav(nav_1) + time.sleep(slow_down) + spot.pick(pick) + time.sleep(slow_down) + if nav_2 == "human": + human_pose = get_handoff_to_human_pose( + tf_buffer, source="camera", target="spotWorld" + ) + handoff_xyt = get_nav_xyt_to_wearer(human_pose) + spot.nav(handoff_xyt[0], handoff_xyt[1], handoff_xyt[2]) + time.sleep(slow_down) + + handoff_xyz = get_place_xyz_to_wearer(human_pose) + spot.place(handoff_xyz[0], handoff_xyz[1], handoff_xyz[2], is_local=True) + # spot.place( is_local=True, visualization=True) + time.sleep(slow_down) + else: + spot.nav(nav_2) + time.sleep(slow_down) + spot.place(nav_2) + time.sleep(slow_down) + if return_to_base: + spot.dock() + pre_in_dock = True + + +if __name__ == "__main__": + parser = get_default_parser() + parser.add_argument("-m", "--use-mixer", action="store_true") + parser.add_argument("--output") + args = parser.parse_args() + config = construct_config(opts=args.opts) + + spot = Spot("LSC_HITLNode") + main(None, config) From c16acc15864d1aa1f889e86546b52291413a9003 Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Tue, 2 Jul 2024 18:04:54 -0700 Subject: [PATCH 3/7] Review these changes --- .../episodic_memory_robotic_fetch_quest3_c.py | 138 +++++++++--------- spot_rl_experiments/spot_rl/envs/lsc_hitl.py | 5 + 2 files changed, 73 insertions(+), 70 deletions(-) diff --git a/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py b/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py index 182beb0d..4bb37c1e 100644 --- a/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py +++ b/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py @@ -85,54 +85,54 @@ class EpisodicMemoryRoboticFetchQuest3: Warning: Do not create objects of this class directly. It is a standalone node """ - def __init__(self, spot: Spot, verbose: bool = True, use_policies: bool = True): - # @FIXME: This is initializing a ros-node silently (at core of inheritence in - # SpotRobotSubscriberMixin). Make it explicit - self.skill_manager = SpotSkillManager( - spot=spot, - verbose=verbose, - use_policies=use_policies, - ) + def __init__(self): + # # @FIXME: This is initializing a ros-node silently (at core of inheritence in + # # SpotRobotSubscriberMixin). Make it explicit + # self.skill_manager = SpotSkillManager( + # spot=spot, + # verbose=verbose, + # use_policies=use_policies, + # ) # Navigate Spot to SPOT_DOCK_OBSERVER_WAYPOINT_LEFT so that it can see the marker - status, msg = self.skill_manager.nav( - SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[0], - SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[1], - np.deg2rad(SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[2]), - ) - if not status: - rospy.logerr( - f"Failed to navigate to spot dock observer waypoint. Error: {msg}. Exiting..." - ) - return - - # Sit Spot down - self.skill_manager.sit() - - # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms - self.static_tf_broadcaster = StaticTransformBroadcaster() - - self._spot_fetch_trigger_sub = rospy.Subscriber( - "/spot_fetch_trigger", Bool, self.spot_fetch_trigger_callback - ) - self._spot_fetch_flag = False + # status, msg = self.skill_manager.nav( + # SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[0], + # SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[1], + # np.deg2rad(SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[2]), + # ) + # if not status: + # rospy.logerr( + # f"Failed to navigate to spot dock observer waypoint. Error: {msg}. Exiting..." + # ) + # return + + # # Sit Spot down + # self.skill_manager.sit() + + # # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms + # self.static_tf_broadcaster = StaticTransformBroadcaster() + + # self._spot_fetch_trigger_sub = rospy.Subscriber( + # "/spot_fetch_trigger", Bool, self.spot_fetch_trigger_callback + # ) + # self._spot_fetch_flag = False # Detect the marker and get the average pose of marker w.r.t spotWorld frame - cam_id = SpotCamIds.HAND_COLOR - spot_qr = SpotQRDetector(spot=spot, cam_ids=[cam_id]) - avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) - - # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz - start_time = rospy.Time.now() - while rospy.Time.now() - start_time < rospy.Duration(2.0): - self.static_tf_broadcaster.sendTransform( - sophus_SE3_to_ros_TransformStamped( - sp_se3=avg_spotWorld_T_marker, - parent_frame=rf.SPOT_WORLD, - child_frame=rf.MARKER, - ) - ) - rospy.sleep(0.5) + # cam_id = SpotCamIds.HAND_COLOR + # spot_qr = SpotQRDetector(spot=spot, cam_ids=[cam_id]) + # avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) + + # # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz + # start_time = rospy.Time.now() + # while rospy.Time.now() - start_time < rospy.Duration(2.0): + # self.static_tf_broadcaster.sendTransform( + # sophus_SE3_to_ros_TransformStamped( + # sp_se3=avg_spotWorld_T_marker, + # parent_frame=rf.SPOT_WORLD, + # child_frame=rf.MARKER, + # ) + # ) + # rospy.sleep(0.5) # Instantiate publisher for publishing go-to pose for handoff self._nav_PoseStamped_pub_for_place = rospy.Publisher( @@ -145,7 +145,7 @@ def __init__(self, spot: Spot, verbose: bool = True, use_policies: bool = True): self._tf_listener = TransformListener(self._tf_buffer) # Wait for transform from camera to spotWorld to be available - while not rospy.is_shutdown() and not self._spot_fetch_flag: + while not rospy.is_shutdown(): while not rospy.is_shutdown() and not self._tf_buffer.can_transform( target_frame=rf.SPOT_WORLD, source_frame="camera", time=rospy.Time() ): @@ -178,24 +178,24 @@ def __init__(self, spot: Spot, verbose: bool = True, use_policies: bool = True): ) ) - rospy.logwarn_throttle( - 5.0, "Waiting for demo trigger on /spot_fetch_trigger topic" - ) + # rospy.logwarn_throttle( + # 5.0, "Waiting for demo trigger on /spot_fetch_trigger topic" + # ) rospy.sleep(1.0) # Set run status to true in the beginning - run_status = True + # run_status = True - if run_status: - # Nav to x,y,theta of interest (Aria's cpf frame when it saw the object) - nav_xyt_for_handoff = self.nav_xyt_for_handoff - rospy.loginfo(f"Nav 2D location for pick: {nav_xyt_for_handoff}") + # if run_status: + # # Nav to x,y,theta of interest (Aria's cpf frame when it saw the object) + # nav_xyt_for_handoff = self.nav_xyt_for_handoff + # rospy.loginfo(f"Nav 2D location for pick: {nav_xyt_for_handoff}") - self.skill_manager.nav( - nav_xyt_for_handoff[0], nav_xyt_for_handoff[1], nav_xyt_for_handoff[2] - ) + # self.skill_manager.nav( + # nav_xyt_for_handoff[0], nav_xyt_for_handoff[1], nav_xyt_for_handoff[2] + # ) - self.skill_manager.sit() + # self.skill_manager.sit() def spot_fetch_trigger_callback(self, msg): """ @@ -253,19 +253,17 @@ def get_nav_xyt_to_wearer( @click.option("--verbose", type=bool, default=True) @click.option("--use-policies", type=bool, default=False) def main(verbose: bool, use_policies: bool): - # rospy.init_node("episode") - # rospy.logwarn("Starting up ROS node") - - spot = Spot("EpisodicMemoryRoboticFetchQuest3Node") - with spot.get_lease(hijack=True) as lease: - if lease is None: - raise RuntimeError("Could not get lease") - else: - rospy.loginfo("Acquired lease") - - _ = EpisodicMemoryRoboticFetchQuest3( - spot=spot, verbose=verbose, use_policies=use_policies - ) + rospy.init_node("episode") + rospy.logwarn("Starting up ROS node") + + # spot = Spot("EpisodicMemoryRoboticFetchQuest3Node") + # with spot.get_lease(hijack=True) as lease: + # if lease is None: + # raise RuntimeError("Could not get lease") + # else: + # rospy.loginfo("Acquired lease") + + _ = EpisodicMemoryRoboticFetchQuest3() if __name__ == "__main__": diff --git a/spot_rl_experiments/spot_rl/envs/lsc_hitl.py b/spot_rl_experiments/spot_rl/envs/lsc_hitl.py index d2890ec3..1ae7e961 100644 --- a/spot_rl_experiments/spot_rl/envs/lsc_hitl.py +++ b/spot_rl_experiments/spot_rl/envs/lsc_hitl.py @@ -12,6 +12,7 @@ import numpy as np import rospy import sophus as sp +from geometry_msgs.msg import PoseStamped from hydra import compose, initialize from perception_and_utils.utils.conversions import ( ros_PoseStamped_to_sophus_SE3, @@ -153,6 +154,9 @@ def main(spot, config): spot_qr = SpotQRDetector(spot=spot.spot, cam_ids=[cam_id]) avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) + nav_PoseStamped_pub_for_place = rospy.Publisher( + "/nav_pose_for_place_viz", PoseStamped, queue_size=10 + ) # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms static_tf_broadcaster = StaticTransformBroadcaster() @@ -228,6 +232,7 @@ def main(spot, config): if pre_in_dock: spot.spot.power_robot() + input("Start LSC-HITL?") # Do LSC by calling skills slow_down = 0.5 # slow down time to increase the skill execution stability spot.nav(nav_1) From c1929ccddfc40ff176de8368021f4ab13d83c9b4 Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Thu, 25 Jul 2024 15:52:34 -0700 Subject: [PATCH 4/7] Q3Streamer - Laptop : WIP --- .gitignore | 2 + .../aria_data_utils/aria_live_streamer.py | 5 +- .../aria_data_utils/aria_sdk_utils.py | 17 +- .../human_sensor_data_streamer_interface.py | 225 ++++++++++++ .../aria_data_utils/quest3_data_streamer.py | 332 ++++++++++++++++++ .../unified_quest_camera_interface.py | 59 ++++ .../detector_wrappers/april_tag_detector.py | 19 +- .../generic_detector_interface.py | 4 +- .../human_motion_detector.py | 95 +++++ .../detector_wrappers/object_detector.py | 11 +- .../perception_and_utils/utils/conversions.py | 5 + .../perception_and_utils/utils/data_frame.py | 20 ++ .../perception_and_utils/utils/math_utils.py | 16 +- perception_and_utils_root/setup.py | 2 +- .../configs/ros_frame_names.yaml | 1 + 15 files changed, 789 insertions(+), 24 deletions(-) create mode 100644 aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py create mode 100644 aria_data_loaders/aria_data_utils/quest3_data_streamer.py create mode 100644 aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py create mode 100644 perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py create mode 100644 perception_and_utils_root/perception_and_utils/utils/data_frame.py diff --git a/.gitignore b/.gitignore index 40c00829..14974780 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ intel_realsense_payload_for_spotsim2real/dist/* intel_realsense_payload_for_spotsim2real/build/* +aria_data_loaders/aria_data_utils/quest3_streamer/** + bd_spot_wrapper/spot_wrapper.egg-info/** bd_spot_wrapper/spot_wrapper/home.txt spot_rl_experiments/spot_rl.egg-info/** diff --git a/aria_data_loaders/aria_data_utils/aria_live_streamer.py b/aria_data_loaders/aria_data_utils/aria_live_streamer.py index 5d6355ce..cad2f9c8 100644 --- a/aria_data_loaders/aria_data_utils/aria_live_streamer.py +++ b/aria_data_loaders/aria_data_utils/aria_live_streamer.py @@ -19,7 +19,7 @@ except Exception as e: print(f"Cannot import sophuspy due to {e}. Import sophus instead") import sophus as sp -from aria_data_utils.aria_sdk_utils import update_iptables +from aria_data_utils.aria_sdk_utils import update_iptables_aria from geometry_msgs.msg import Pose, PoseStamped from nav_msgs.msg import Odometry from perception_and_utils.perception.detector_wrappers.april_tag_detector import ( @@ -85,7 +85,6 @@ class AriaLiveReader: """ def __init__(self, verbose: bool = False) -> None: - super().__init__() self.verbose = verbose self._in_index = 0 self._out_index = 0 @@ -576,7 +575,7 @@ def main(do_update_iptables: bool, read_only: bool, debug: bool, hz: int): rospy.init_node("aria_live_reader", log_level=_log_level) rospy.logwarn("Starting up ROS node") if do_update_iptables: - update_iptables() + update_iptables_aria() else: rospy.logwarn("Not updating iptables") object_name = "bottle" diff --git a/aria_data_loaders/aria_data_utils/aria_sdk_utils.py b/aria_data_loaders/aria_data_utils/aria_sdk_utils.py index ab7f2ef1..a58fd5ba 100644 --- a/aria_data_loaders/aria_data_utils/aria_sdk_utils.py +++ b/aria_data_loaders/aria_data_utils/aria_sdk_utils.py @@ -15,19 +15,19 @@ import subprocess -def update_iptables() -> None: +def update_iptables(protocol:str="tcp", direction:str="INPUT") -> None: """ - Update firewall to permit incoming UDP connections for DDS + Update firewall to permit incoming tcp / udp connections for DDS """ update_iptables_cmd = [ "sudo", "iptables", "-A", - "INPUT", + direction, "-p", - "udp", + protocol, "-m", - "udp", + protocol, "--dport", "7000:8000", "-j", @@ -36,3 +36,10 @@ def update_iptables() -> None: print("Running the following command to update iptables:") print(update_iptables_cmd) subprocess.run(update_iptables_cmd) + +def update_iptables_aria(): + update_iptables(protocol="udp", direction="INPUT") + +def update_iptables_quest3(): + update_iptables(protocol="tcp", direction="INPUT") + update_iptables(protocol="tcp", direction="OUTPUT") diff --git a/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py new file mode 100644 index 00000000..d6642499 --- /dev/null +++ b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py @@ -0,0 +1,225 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import os +from typing import Any, Dict, List, Optional, Tuple + +import click +import cv2 +import numpy as np +import rospy +import sophus as sp +from perception_and_utils.perception.detector_wrappers.april_tag_detector import ( + AprilTagDetectorWrapper, +) +from perception_and_utils.perception.detector_wrappers.object_detector import ( + ObjectDetectorWrapper, +) +from perception_and_utils.perception.detector_wrappers.human_motion_detector import ( + HumanMotionDetector, +) + +from spot_rl.utils.utils import ros_frames as rf + +# ROS imports +from geometry_msgs.msg import Pose, PoseStamped +from nav_msgs.msg import Odometry +from tf2_ros import StaticTransformBroadcaster +from visualization_msgs.msg import Marker + +from perception_and_utils.utils.data_frame import DataFrame + +class CameraParams: + """ + CameraParams class to store camera parameters like focal length, principal point etc. + It stores the image, timestamp and frame number of each received image + """ + def __init__(self, focal_lengths: Tuple[float, float], principal_point: Tuple[float, float]): + self._focal_lengths = focal_lengths # type: Optional[Tuple[float, float]] + self._principal_point = principal_point # type: Optional[Tuple[float, float]] + +class HumanSensorDataStreamerInterface: + def __init__(self, verbose: bool = False) -> None: + self.verbose = verbose + + self._is_connected = False # TODO : Maybe not needed + + # TODO: Update this later with PerceptionPipeline + # Create all detectors + self.april_tag_detector = AprilTagDetectorWrapper() + self.object_detector = ObjectDetectorWrapper() + self.human_motion_detector = HumanMotionDetector() + + # ROS publishers & broadcaster + self.static_tf_broadcaster = StaticTransformBroadcaster() + + # self.pose_of_interest_publisher = rospy.Publisher( + # "/pose_of_interest", PoseStamped, queue_size=10 + # ) # pose of device when object was detected from rgb image + # self.pose_of_interest_marker_publisher = rospy.Publisher( + # "/pose_of_interest_marker", Marker, queue_size=10 + # ) + # self.odom_publisher = rospy.Publisher( + # "/odom", Odometry, queue_size=10 + # ) + # self.human_pose_publisher = rospy.Publisher( + # "/current_pose", PoseStamped, queue_size=10 + # ) + # self.human_handoff_pose_publisher = rospy.Publisher( + # "/human_handoff_pose", PoseStamped, queue_size=10 + # ) + + # TODO: Define DEVICE FRAME as a visual frame of reference i.e. front facing frame of reference + self.device_T_camera: Optional[sp.SE3] = None # TODO: Should be initialized by implementations of this class + + # Maintain a list of all poses where qr code is detected (w.r.t deviceWorld) + self.marker_positions_list = ( + [] + ) # type: List[np.ndarray] # List of position as np.ndarray (x, y, z) + self.marker_quaternion_list = ( + [] + ) # type: List[np.ndarray] # List of quaternions as np.ndarray (x, y, z, w) + self.avg_deviceWorld_T_marker = None # type: Optional[sp.SE3] + + def connect(self): + """ + Connect to Device + """ + raise NotImplementedError + + def disconnect(self): + """ + Disconnects from Device cleanly + """ + raise NotImplementedError + + def _setup_device(self) -> Any: + """ + Setup device with appropriate configurations for live streaming + + Updates: + - self.device_T_camera - Sophus SE3 transform from device frame (left-slam camera) to rgb-camera frame + + Returns: + Any : TODO: Decide what to return + """ + raise NotImplementedError + + def get_latest_data_frame(self) -> Optional[DataFrame]: + raise NotImplementedError + + def process_frame( + self, + frame: dict, + outputs: dict, + detect_qr: bool = False, + detect_objects: bool = False, + object_label="milk bottle", + ) -> Tuple[np.ndarray, dict]: + raise NotImplementedError + + def initialize_april_tag_detector(self, outputs: dict = {}): + """ + Initialize the april tag detector + + Args: + outputs (dict, optional): Dictionary of outputs from the april tag detector. Defaults to {}. + + Updates: + - self.april_tag_detector: AprilTagDetectorWrapper object + + Returns: + outputs (dict): Dictionary of outputs from the april tag detector with following keys: + - "tag_image_list" - List of np.ndarrays of images with detections + - "tag_image_metadata_list" - List of image metadata + - "tag_base_T_marker_list" - List of Sophus SE3 transforms from base frame to marker + where base is "device" frame for aria + """ + raise NotImplementedError + + def initialize_object_detector( + self, outputs: dict = {}, object_labels: list = [], meta_objects: List[str] = [] + ): + """ + Initialize the object detector + + Args: + outputs (dict, optional): Dictionary of outputs from the object detector. Defaults to {}. + object_labels (list, optional): List of object labels to detect. Defaults to []. + meta_objects (List[str], optional): List of other objects to be detected in the image + apart from the object_labels; meta_objects are then + used internally to detect intersection of bbox with + objects of interest (eg: hand with bottle) + + Updates: + - self.object_detector: ObjectDetectorWrapper object + + Returns: + outputs (dict): Dictionary of outputs from the object detector with following keys: + - "object_image_list" - List of np.ndarrays of images with detections + - "object_image_metadata_list" - List of image metadata + - "object_image_segment" - List of Int signifying which segment the image + belongs to; smaller number means latter the segment time-wise + - "object_score_list" - List of Float signifying the detection score + """ + raise NotImplementedError + + def initialize_human_motion_detector(self, outputs: dict = {}): + """ + """ + raise NotImplementedError + + ##### ROS Publishers ##### + def publish_human_pose(self, data_frame: DataFrame): + """ + Publishes current pose of Device as a pose of interest for Spot + + Args: + data_frame (DataFrame): DataFrame object containing data packet with rgb, depth + and all necessary transformations + + Publishes: + - /human_pose_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame + """ + # Publish as pose for detail + pose_stamped = PoseStamped() + pose_stamped.header.stamp = rospy.Time().now() + pose_stamped.header.seq = data_frame._frame_number + pose_stamped.header.frame_id = rf.QUEST3_CAMERA + pose_stamped.pose = data_frame._deviceWorld_T_camera_rgb # TODO: Use a VIZ frame which is forward facing + self.human_pose_publisher.publish(pose_stamped) + + def publish_marker(self, data_frame: DataFrame, marker_scale: float=0.1): + """ + Publishes marker at pose of interest + + Args: + data_frame (DataFrame): DataFrame object containing data packet with rgb, depth + and all necessary transformations + + Publishes: + - /pose_of_interest_marker_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame + """ + # Publish as marker for interpretability + marker = Marker() + marker.header.stamp = rospy.Time().now() + marker.header.frame_id = rf.rf.QUEST3_CAMERA + marker.id = self._frame_number + + marker.type = Marker.SPHERE + marker.action = Marker.ADD + + marker.pose = data_frame._deviceWorld_T_camera_rgb + + marker.scale.x = marker_scale + marker.scale.y = marker_scale + marker.scale.z = marker_scale + + marker.color.a = 1.0 + marker.color.r = 0.45 + marker.color.g = 0.95 + marker.color.b = 0.2 + + self.pose_of_interest_marker_publisher.publish(marker) diff --git a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py new file mode 100644 index 00000000..d7d8c122 --- /dev/null +++ b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py @@ -0,0 +1,332 @@ +from aria_data_utils.human_sensor_data_streamer_interface import CameraParams, DataFrame, HumanSensorDataStreamerInterface +from aria_data_utils.aria_sdk_utils import update_iptables_quest3 +from perception_and_utils.utils.math_utils import get_running_avg_a_T_b +from quest3_streamer.unified_quest_camera import UnifiedQuestCamera # From internal repo + +import rospy +import numpy as np +import sophus as sp +import cv2 +from typing import Any, Dict, Optional, Tuple, List +import click +import logging +import time +from perception_and_utils.utils.conversions import ( + sophus_SE3_to_ros_PoseStamped, + sophus_SE3_to_ros_TransformStamped, + xyt_to_sophus_SE3, +) +from spot_rl.utils.utils import ros_frames as rf + +class Quest3DataStreamer(HumanSensorDataStreamerInterface): + def __init__(self, verbose: bool = False) -> None: + super().__init__(verbose) + + # Init Logging + self.logger = logging.getLogger("Quest3DataStreamer") + self.logger.setLevel(logging.INFO) + handler = logging.StreamHandler() + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + self.logger.addHandler(handler) + + self.unified_quest3_camera = UnifiedQuestCamera() + self.rgb_cam_params : CameraParams = None + self.depth_cam_params : CameraParams = None + self._frame_number = -1 + self._setup_device() + + def connect(self): + """ + Connect to Device + """ + self._is_connected = True + self.logger("Quest3DataStreamer :: Connected") + + def disconnect(self): + """ + Disconnects from Device cleanly + """ + self._is_connected = False + self.logger("Quest3DataStreamer :: Disconnected") + + def is_connected(self) -> bool: + return self._is_connected # TODO: Return device connection status from Bruno's APIs + + def _setup_device(self) -> Any: + # Setup RGB camera params + while self.rgb_cam_params is None: + rgb_fl = self.unified_quest3_camera.get_rbg_focal_lengths() + rgb_pp = self.unified_quest3_camera.get_rgb_principal_point() + + if rgb_fl is not None and rgb_pp is not None: + rospy.logwarn("RGB Camera Params: focal lengths = {}, principal point = {}".format(rgb_fl, rgb_pp)) + self.rgb_cam_params = CameraParams(focal_lengths=rgb_fl, principal_point=rgb_pp) + else: + self.logger.info("Waiting for RGB camera params to be set...") + rospy.logwarn("Waiting for RGB camera params to be set...") + time.sleep(0.1) + rospy.loginfo("RGB Camera Params: focal lengths = {}, principal point = {}".format(self.rgb_cam_params._focal_lengths, self.rgb_cam_params._principal_point)) + + # Setup Depth camera params + while self.depth_cam_params is None: + depth_fl = self.unified_quest3_camera.get_depth_focal_lengths() + depth_pp = self.unified_quest3_camera.get_depth_principal_point() + + if depth_fl is not None and depth_pp is not None: + rospy.logwarn("Depth Camera Params: focal lengths = {}, principal point = {}".format(depth_fl, depth_pp)) + self.depth_cam_params = CameraParams(focal_lengths=depth_fl, principal_point=depth_pp) + else: + self.logger.info("Waiting for Depth camera params to be set...") + rospy.logwarn("Waiting for Depth camera params to be set...") + time.sleep(0.1) + print("Depth Camera Params: focal lengths = {}, principal point = {}".format(self.depth_cam_params._focal_lengths, self.depth_cam_params._principal_point)) + + def get_latest_data_frame(self) -> Optional[DataFrame]: + # Create DataFrame object + data_frame = DataFrame() + + # Populate rgb frame data iff all the required data is available; else return None + rgb_frame = self.unified_quest3_camera.get_rgb() + deviceWorld_T_rgbCam = self.unified_quest3_camera.get_deviceWorld_T_rgbCamera() + device_T_rgbCam = self.unified_quest3_camera.get_device_T_rgbCamera() + if rgb_frame is None or deviceWorld_T_rgbCam is None or device_T_rgbCam is None: + rospy.logwarn("Returning None, as rgb_frame or deviceWorld_T_rgbCam or device_T_rgbCam is None.") + return None + else: + data_frame._rgb_frame = rgb_frame + data_frame._deviceWorld_T_camera_rgb = deviceWorld_T_rgbCam + data_frame._device_T_camera_rgb = device_T_rgbCam + + # Populate depth frame data iff all the required data is available; else return None + depth_frame = self.unified_quest3_camera.get_depth() + deviceWorld_T_depthCam = self.unified_quest3_camera.get_deviceWorld_T_depthCamera() + device_T_depthCam = self.unified_quest3_camera.get_device_T_depthCamera() + if depth_frame is None or deviceWorld_T_depthCam is None: + rospy.logwarn("Returning None as depth_frame or deviceWorld_T_depthCam or device_T_depthCam is None.") + return None + else: + data_frame._depth_frame = depth_frame + data_frame._deviceWorld_T_camera_depth = deviceWorld_T_depthCam + data_frame._device_T_camera_depth = device_T_depthCam + + # Update frame number and timestamp of DataFrame + self._frame_number += 1 + data_frame._frame_number = self._frame_number + data_frame._timestamp_s = time.time() + + data_frame._avg_rgb_fps = self.unified_quest3_camera.get_avg_fps_rgb() + data_frame._avg_depth_fps = self.unified_quest3_camera.get_avg_fps_depth() + + # Align depth frame with rgb frame + data_frame._aligned_depth_frame = None # TODO: Add this logic + return data_frame + + def process_frame( + self, + data_frame: DataFrame, + outputs: dict, + detect_qr: bool = False, + detect_objects: bool = False, + detect_human_motion: bool = False, + object_label="milk bottle", + ) -> Tuple[np.ndarray, dict]: + # Initialize object_scores to empty dict for current image frame + # object_scores = {} # type: Dict[str, Any] + + # Get rgb image from frame + viz_img = data_frame._rgb_frame + if viz_img is None: + rospy.logwarn("No image found in frame") + + if detect_qr: + (viz_img, camera_T_marker) = self.april_tag_detector.process_frame(frame=data_frame) # type: ignore + + deviceWorld_T_camera = data_frame._deviceWorld_T_camera_rgb + # Broadcast camera frame wrt deviceWorld + self.static_tf_broadcaster.sendTransform( + sophus_SE3_to_ros_TransformStamped( + sp_se3=deviceWorld_T_camera, + parent_frame=rf.QUEST3_WORLD, + child_frame=rf.QUEST3_CAMERA, + ) + ) + + if camera_T_marker is not None: + # Frame: a = deviceWorld (quest3World for Quest3 & ariaWorld for Aria) + # Frame: intermediate = camera (quest3's rgb camera frame & cpf frame for Aria) + # Frame: b = marker / qr code + ( + self.marker_positions_list, + self.marker_quaternion_list, + self.avg_deviceWorld_T_marker, + ) = get_running_avg_a_T_b( + current_avg_a_T_b=self.avg_deviceWorld_T_marker, + a_T_b_position_list=self.marker_positions_list, + a_T_b_quaternion_list=self.marker_quaternion_list, + a_T_intermediate=deviceWorld_T_camera, + intermediate_T_b=camera_T_marker, + ) + + # Publish marker pose in ariaWorld frame + if self.avg_deviceWorld_T_marker is not None: + avg_marker_T_deviceWorld = self.avg_deviceWorld_T_marker.inverse() + self.static_tf_broadcaster.sendTransform( + sophus_SE3_to_ros_TransformStamped( + sp_se3=avg_marker_T_deviceWorld, + parent_frame=rf.MARKER, + child_frame=rf.QUEST3_WORLD, + ) + ) + + viz_img, outputs = self.april_tag_detector.get_outputs( + frame=data_frame, + outputs=outputs, + base_T_marker=camera_T_marker, + timestamp=data_frame._timestamp_s, + img_metadata=None, + viz_img=viz_img, + ) + + if detect_human_motion: + _, activity_str = self.human_motion_detector.process_frame(frame=data_frame) + viz_img, _ = self.human_motion_detector.get_outputs(viz_img, {"activity": activity_str}) + print(f"Activity: {activity_str}") + rospy.set_param("human_activity", activity_str) + + # if detect_objects: + # viz_img, object_scores = self.object_detector.process_frame(viz_img) + # if object_label in object_scores.keys(): + # if self.verbose: + # plt.imsave(f"frame_{self.frame_number}.jpg", viz_img) + # # self.publish_pose_of_interest(frame.get("ros_pose")) + + return viz_img, outputs + + def initialize_april_tag_detector(self, outputs: dict = {}): + """ + Initialize the april tag detector + + Args: + outputs (dict, optional): Dictionary of outputs from the april tag detector. Defaults to {}. + + Updates: + - self.april_tag_detector: AprilTagDetectorWrapper object + + Returns: + outputs (dict): Dictionary of outputs from the april tag detector with following keys: + - "tag_image_list" - List of np.ndarrays of images with detections + - "tag_image_metadata_list" - List of image metadata + - "tag_base_T_marker_list" - List of Sophus SE3 transforms from base frame to marker + where base is "device" frame for aria + """ + + focal_lengths = self.rgb_cam_params._focal_lengths # type: ignore + principal_point = self.rgb_cam_params._principal_point # type: ignore + + outputs.update( + self.april_tag_detector._init_april_tag_detector( + focal_lengths=focal_lengths, principal_point=principal_point + ) + ) + return outputs + + def initialize_object_detector( + self, outputs: dict = {}, object_labels: list = [], meta_objects: List[str] = [] + ): + """ + Initialize the object detector + + Args: + outputs (dict, optional): Dictionary of outputs from the object detector. Defaults to {}. + object_labels (list, optional): List of object labels to detect. Defaults to []. + meta_objects (List[str], optional): List of other objects to be detected in the image + apart from the object_labels; meta_objects are then + used internally to detect intersection of bbox with + objects of interest (eg: hand with bottle) + + Updates: + - self.object_detector: ObjectDetectorWrapper object + + Returns: + outputs (dict): Dictionary of outputs from the object detector with following keys: + - "object_image_list" - List of np.ndarrays of images with detections + - "object_image_metadata_list" - List of image metadata + - "object_image_segment" - List of Int signifying which segment the image + belongs to; smaller number means latter the segment time-wise + - "object_score_list" - List of Float signifying the detection score + """ + outputs.update( + self.object_detector._init_object_detector( + object_labels + meta_objects, + verbose=self.verbose, + version=2, + ) + ) + self.object_detector._core_objects = object_labels + self.object_detector._meta_objects = meta_objects + + return outputs + + def initialize_human_motion_detector(self): + self.human_motion_detector._init_human_motion_detector() + + +@click.command() +@click.option("--do-update-iptables", is_flag=True, type=bool, default=False) +@click.option("--debug", is_flag=True, default=False) +@click.option("--hz", type=int, default=100) +def main(do_update_iptables:bool, debug:bool, hz:int): + if debug: + _log_level = rospy.DEBUG + else: + _log_level = rospy.INFO + + rospy.init_node("quest3_data_streamer", log_level=_log_level) + rospy.loginfo("Starting quest3_data_streamer node") + + if do_update_iptables: + update_iptables_quest3() + else: + rospy.logwarn("Not updating iptables") + + rate = rospy.Rate(hz) + outputs: Dict[str, Any] = {} + data_streamer = None + cv2.namedWindow("Image", cv2.WINDOW_NORMAL) + try: + data_streamer = Quest3DataStreamer() + + outputs = data_streamer.initialize_april_tag_detector(outputs=outputs) + outputs = data_streamer.initialize_object_detector(outputs=outputs, object_labels=["milk bottle"]) + data_streamer.initialize_human_motion_detector() + # data_streamer.connect() + while not rospy.is_shutdown(): + data_frame = data_streamer.get_latest_data_frame() + if data_frame is not None: + if debug: + rospy.loginfo("Received data frame") + viz_img, outputs = data_streamer.process_frame( + data_frame=data_frame, + outputs=outputs, + detect_qr=True, + detect_objects=False, + detect_human_motion=True, + ) + # data_streamer.publish_human_pose(data_frame=data_frame) + cv2.imshow("Image", viz_img) + cv2.waitKey(1) + else: + print("No data frame received.") + except Exception as e: + print("Ending script.") + rospy.logwarn(f"Exception: {e}") + if data_streamer is not None: + data_streamer.disconnect() + + cv2.destroyAllWindows() + rospy.logwarn("Disconnecting and shutting down the node") + + +if __name__ == "__main__": + main() diff --git a/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py b/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py new file mode 100644 index 00000000..a6108b00 --- /dev/null +++ b/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py @@ -0,0 +1,59 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Tuple +import sophus as sp +import numpy as np + +""" +This file defines the interface to for Quest3 streaming receiver API calls + +The implementation for this Interface is available for Meta employees only at the moment + +TODO: Define the steps to get implementation file +""" + + +class UnifiedQuest3CameraInterface: + def __init__(self): + pass + + def get_rgb(self) -> Optional[np.ndarray]: + raise NotImplementedError + + def get_depth(self) -> Optional[np.ndarray]: + raise NotImplementedError + + def get_rbg_focal_lengths(self) -> Optional[Tuple[float, float]]: + raise NotImplementedError + + def get_rgb_principal_point(self) -> Optional[Tuple[float, float]]: + raise NotImplementedError + + def get_depth_focal_lengths(self) -> Optional[Tuple[float, float]]: + raise NotImplementedError + + def get_depth_principal_point(self) -> Optional[Tuple[float, float]]: + raise NotImplementedError + + def get_deviceWorld_T_rgbCamera(self) -> Optional[sp.SE3]: + raise NotImplementedError + + def get_device_T_rgbCamera(self) -> Optional[sp.SE3]: + raise NotImplementedError + + def get_deviceWorld_T_depthCamera(self) -> Optional[sp.SE3]: + raise NotImplementedError + + def get_device_T_depthCamera(self) -> Optional[sp.SE3]: + raise NotImplementedError + + def get_avg_fps_rgb(self): + raise NotImplementedError + + def get_avg_fps_depth(self): + raise NotImplementedError + + def is_connected(): + raise NotImplementedError diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/april_tag_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/april_tag_detector.py index 74eaecdd..1c427dcc 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/april_tag_detector.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/april_tag_detector.py @@ -21,6 +21,8 @@ GenericDetector, ) from perception_and_utils.utils.image_utils import decorate_img_with_text_for_qr +from perception_and_utils.utils.data_frame import DataFrame + DOCK_ID = int(os.environ.get("SPOT_DOCK_ID", 520)) @@ -103,7 +105,7 @@ def _init_april_tag_detector( def process_frame( self, - img_frame: np.ndarray, + frame: DataFrame, ) -> Tuple[np.ndarray, sp.SE3]: """ Process image frame to detect QR code and estimate marker pose wrt camera @@ -115,6 +117,8 @@ def process_frame( updated_img_frame (np.ndarray) : Image frame with detections and text for visualization camera_T_marker (sp.SE3) : SE3 matrix representing marker frame as detected in camera frame """ + img_frame = frame._rgb_frame + # Do nothing if detector is not enabled if self.is_enabled is False: self._logger.warning( @@ -131,11 +135,12 @@ def process_frame( def get_outputs( self, - img_frame: np.ndarray, + frame: DataFrame, outputs: Dict, base_T_marker: sp.SE3, timestamp: int, img_metadata: Any, + viz_img: np.ndarray, ) -> Tuple[np.ndarray, Dict]: """ Update the outputs dictionary with the processed image frame and pose data @@ -156,18 +161,20 @@ def get_outputs( - tag_image_metadata_list (List[Any]) : List of image metadata """ + img_frame = frame._rgb_frame + # Decorate image with text for visualization - img = decorate_img_with_text_for_qr( - img=img_frame, + viz_img = decorate_img_with_text_for_qr( + img=viz_img, frame_name_str="device", qr_position=base_T_marker.translation(), ) self._logger.debug(f"Time stamp with AprilTag Detections- {timestamp}") # Append data to lists for return - outputs["tag_image_list"].append(img) + outputs["tag_image_list"].append(viz_img) outputs["tag_base_T_marker_list"].append(base_T_marker) if img_metadata is not None: outputs["tag_image_metadata_list"].append(img_metadata) - return img, outputs + return viz_img, outputs diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/generic_detector_interface.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/generic_detector_interface.py index f17aa8ea..1ee2236e 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/generic_detector_interface.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/generic_detector_interface.py @@ -6,7 +6,7 @@ from typing import Any, Dict, Tuple import numpy as np - +from perception_and_utils.utils.data_frame import DataFrame class GenericDetector: def __init__(self): @@ -18,5 +18,5 @@ def enable_detector(self): def disable_detector(self): self.is_enabled = False - def process_frame(self, img_frame: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]: + def process_frame(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, Any]]: raise NotImplementedError diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py new file mode 100644 index 00000000..c39d0235 --- /dev/null +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py @@ -0,0 +1,95 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import logging +import os +from typing import Any, Dict, Tuple + +import numpy as np +import sophus as sp +from perception_and_utils.perception.detector_wrappers.generic_detector_interface import ( + GenericDetector, +) +from perception_and_utils.utils.image_utils import label_img +from perception_and_utils.utils.data_frame import DataFrame + +class HumanMotionDetector(GenericDetector): + """ + """ + + def __init__(self): + super().__init__() + self._logger = logging.getLogger("HumanMotionDetector") + + def _init_human_motion_detector(self, velocity_threshold=0.75, time_horizon_ns=1000000000): + """ + Initialize the human motion detector + """ + self._logger.info("Initializing Human Motion Detector") + self.enable_detector() + self._velocity_threshold = velocity_threshold + + self._time_horizon_ns = time_horizon_ns # 1 second + self._previous_frames = [] + self._len_prev_frame_cache = 100 # Keep this value larger than fps + + def process_frame( + self, + frame: DataFrame, + ) -> Tuple[np.ndarray, str]: + # Do nothing if detector is not enabled + if self.is_enabled is False: + self._logger.warning( + "Human Motion detector is disabled... Skipping processing of current frame." + ) + return None, "Disabled" + + # Calculate the current position from the transformation matrix + current_position = frame._deviceWorld_T_camera_rgb.matrix()[:3, 3] + + # Add the current frame to the cache + self._previous_frames.append((current_position, frame._timestamp_s)) + + if len(self._previous_frames) < self._len_prev_frame_cache: + return None, "Standing" + + # If the cache is full, remove the oldest frame + if len(self._previous_frames) > self._len_prev_frame_cache: + self._previous_frames.pop(0) + + # Find an index in "_previous_positions" which is 1 sec before current value + index = len(self._previous_frames) - 60 + # If the index is out of range, return "Standing" + if index < 0: + return None, "Standing" + + # Calculate the Euclidean distance between now & then + print(f"Current position: {current_position} | Previous position: {self._previous_frames[index][0]}") + distance = np.linalg.norm(current_position - self._previous_frames[index][0]) + time = frame._timestamp_s - self._previous_frames[index][1] # as we are considering x frames per second so our window is 1 sec + print(f"Distance: {distance} m | Time: {time} sec") + avg_velocity = distance / time + + print(f"Avg velocity: {avg_velocity} m/s") + # Determine the activity based on the velocity threshold + if avg_velocity > self._velocity_threshold: + return None, "Walking" + + return None, "Standing" + + def get_outputs( + self, + img_frame: np.ndarray, + outputs: Dict, + ) -> Tuple[np.ndarray, Dict]: + """ + """ + # Decorate image with text for visualization + label_img( + img=img_frame, + text=f"Activity: {outputs['activity']}", + org=(50, 200), + ) + return img_frame, outputs diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py index 8d6db717..27540a81 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py @@ -14,6 +14,7 @@ centered_object_detection_heuristic, check_bbox_intersection, ) +from perception_and_utils.utils.data_frame import DataFrame from spot_rl.models.owlvit import OwlVit @@ -198,7 +199,7 @@ def _aria_fetch_demo_heuristics( return valid, score, stop - def process_frame(self, img_frame: np.ndarray) -> Tuple[np.ndarray, Dict[str, Any]]: + def process_frame(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, Any]]: """ Process image frame to detect object instances and score them based on heuristics @@ -211,6 +212,8 @@ def process_frame(self, img_frame: np.ndarray) -> Tuple[np.ndarray, Dict[str, An updated_img_frame (np.ndarray) : Image frame with detections and text for visualization object_scores (Dict[str, float]) : Dictionary of scores for each object in the image frame """ + img_frame = frame._rgb_frame + # Do nothing if detector is not enabled if self.is_enabled is False: self._logger.warning( @@ -223,7 +226,7 @@ def process_frame(self, img_frame: np.ndarray) -> Tuple[np.ndarray, Dict[str, An ) return updated_img_frame, object_scores - def process_frame_offline(self, img_frame: np.ndarray): + def process_frame_offline(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, Any]]: """ Process image frame to detect object instances and score them based on heuristics @@ -287,7 +290,7 @@ def process_frame_offline(self, img_frame: np.ndarray): def get_outputs( self, - img_frame: np.ndarray, + frame: DataFrame, outputs: Dict, object_scores: Dict, img_metadata: Any, @@ -310,6 +313,8 @@ def get_outputs( - object_image_segment_list (List[int]) : List of image segment ids - object_score_list (List[float]) : List of scores for each image frame """ + img_frame = frame._rgb_frame + if object_scores is not {}: for object_name in object_scores.keys(): outputs["object_image_list"][object_name].append(img_frame) diff --git a/perception_and_utils_root/perception_and_utils/utils/conversions.py b/perception_and_utils_root/perception_and_utils/utils/conversions.py index c720b616..530f00ec 100644 --- a/perception_and_utils_root/perception_and_utils/utils/conversions.py +++ b/perception_and_utils_root/perception_and_utils/utils/conversions.py @@ -205,6 +205,11 @@ def sophus_SE3_to_ros_TransformStamped( ros_trf_stamped.transform.translation.z = float(trans[2]) quat = Rotation.from_matrix(sp_se3.rotationMatrix()).as_quat() + quat = quat/np.linalg.norm(quat) + # Ensure quaternion's w is always positive for effective averaging as multiple quaternions can represent the same rotation + if quat[3] > 0: + quat = -1.0 * quat + ros_trf_stamped.transform.rotation.x = float(quat[0]) ros_trf_stamped.transform.rotation.y = float(quat[1]) ros_trf_stamped.transform.rotation.z = float(quat[2]) diff --git a/perception_and_utils_root/perception_and_utils/utils/data_frame.py b/perception_and_utils_root/perception_and_utils/utils/data_frame.py new file mode 100644 index 00000000..6cccf06c --- /dev/null +++ b/perception_and_utils_root/perception_and_utils/utils/data_frame.py @@ -0,0 +1,20 @@ +import numpy as np +import sophus as sp + +class DataFrame: + """ + DataFrame class to store instantaneous data from Human Sensor stream. + It stores the image, timestamp and frame number of each received image + """ + def __init__(self): + self._frame_number = -1 + self._timestamp_s = -1 + self._avg_rgb_fps = None # type: Optional[float] + self._avg_depth_fps = None # type: Optional[float] + self._rgb_frame = None # type: Optional[np.ndarray] + self._depth_frame = None # type: Optional[np.ndarray] + self._aligned_depth_frame = None # type: Optional[np.ndarray] + self._deviceWorld_T_camera_rgb = None # type: Optional[sp.SE3] + self._deviceWorld_T_camera_depth = None # type: Optional[sp.SE3] + self._device_T_camera_rgb = None # type: Optional[sp.SE3] + self._device_T_camera_depth = None # type: Optional[sp.SE3] diff --git a/perception_and_utils_root/perception_and_utils/utils/math_utils.py b/perception_and_utils_root/perception_and_utils/utils/math_utils.py index 61be50c4..1b9f0e66 100644 --- a/perception_and_utils_root/perception_and_utils/utils/math_utils.py +++ b/perception_and_utils_root/perception_and_utils/utils/math_utils.py @@ -15,7 +15,7 @@ from scipy.spatial.transform import Rotation FILTER_DIST = 2.4 # in meters (distance for valid detection) -FIXED_LIST_LENGTH = 10 # Fixed length of the a_T_b_{position,orientation} lists used for average computation +FIXED_LIST_LENGTH = 1 # Fixed length of the a_T_b_{position,orientation} lists used for average computation def compute_avg_sophus_SE3_from_nplist( @@ -40,9 +40,11 @@ def compute_avg_sophus_SE3_from_nplist( a_T_b_position_np = np.array(a_T_b_position_list) avg_a_T_b_position = np.mean(a_T_b_position_np, axis=0) - a_T_b_quaternion_np = np.array(a_T_b_quaternion_list) + a_T_b_quaternion_np = np.array(a_T_b_quaternion_list).reshape(-1, 4) avg_a_T_b_quaternion = np.mean(a_T_b_quaternion_np, axis=0) - + # avg_a_T_b_quaternion = average_quaternion_numpy( + # a_T_b_quaternion_np, W=None + # ) avg_sp_se3 = sp.SE3( Rotation.from_quat(avg_a_T_b_quaternion).as_matrix(), avg_a_T_b_position ) @@ -102,7 +104,7 @@ def get_running_avg_a_T_b( # Ensure quaternion's w is always negative for effective averaging as multiple quaternions can represent the same rotation quat = Rotation.from_matrix(a_T_b.rotationMatrix()).as_quat() - if quat[3] > 0: + if quat[3] < 0.0: quat = -1.0 * quat a_T_b_quaternion_list.append(quat) @@ -119,3 +121,9 @@ def get_running_avg_a_T_b( ) return a_T_b_position_list, a_T_b_quaternion_list, avg_a_T_b + +def average_quaternion_numpy(Q, W=None): + if W is not None: + Q *= W[:, None] + eigvals, eigvecs = np.linalg.eig(Q.T@Q) + return eigvecs[:, eigvals.argmax()] diff --git a/perception_and_utils_root/setup.py b/perception_and_utils_root/setup.py index 25790bad..88700029 100644 --- a/perception_and_utils_root/setup.py +++ b/perception_and_utils_root/setup.py @@ -10,7 +10,7 @@ version="0.0", author="Kavit Shah", author_email="kavits98@gmail.com", - description="SIRo friendly perception and utils for Aria and Spot", + description="SIRo friendly perception and utils for Aria+Quest3 and Spot", packages=setuptools.find_packages(), install_requires=[ "click", diff --git a/spot_rl_experiments/configs/ros_frame_names.yaml b/spot_rl_experiments/configs/ros_frame_names.yaml index 7aebaec2..a6bdbf95 100644 --- a/spot_rl_experiments/configs/ros_frame_names.yaml +++ b/spot_rl_experiments/configs/ros_frame_names.yaml @@ -6,6 +6,7 @@ ARIA: "aria" # aria in cpf frame ARIA_DEVICE: "ariaDevice" # aria in device frame QUEST3_WORLD: "quest3World" QUEST3_CAMERA: "camera" +QUEST3_DEVICE: "device" MARKER: "marker" From e30c0504e9026706b3be844324d414ffb5af630e Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Wed, 28 Aug 2024 19:44:17 -0700 Subject: [PATCH 5/7] Human motion recognition with fps display --- .../human_sensor_data_streamer_interface.py | 38 ++++++----- .../aria_data_utils/quest3_data_streamer.py | 30 +++++++-- .../human_motion_detector.py | 66 +++++++++++++------ .../perception_and_utils/utils/data_frame.py | 1 + .../perception_and_utils/utils/image_utils.py | 30 +++++++++ 5 files changed, 122 insertions(+), 43 deletions(-) diff --git a/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py index d6642499..06c565d9 100644 --- a/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py +++ b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py @@ -26,6 +26,7 @@ # ROS imports from geometry_msgs.msg import Pose, PoseStamped from nav_msgs.msg import Odometry +from std_msgs.msg import String from tf2_ros import StaticTransformBroadcaster from visualization_msgs.msg import Marker @@ -55,21 +56,8 @@ def __init__(self, verbose: bool = False) -> None: # ROS publishers & broadcaster self.static_tf_broadcaster = StaticTransformBroadcaster() - # self.pose_of_interest_publisher = rospy.Publisher( - # "/pose_of_interest", PoseStamped, queue_size=10 - # ) # pose of device when object was detected from rgb image - # self.pose_of_interest_marker_publisher = rospy.Publisher( - # "/pose_of_interest_marker", Marker, queue_size=10 - # ) - # self.odom_publisher = rospy.Publisher( - # "/odom", Odometry, queue_size=10 - # ) - # self.human_pose_publisher = rospy.Publisher( - # "/current_pose", PoseStamped, queue_size=10 - # ) - # self.human_handoff_pose_publisher = rospy.Publisher( - # "/human_handoff_pose", PoseStamped, queue_size=10 - # ) + self.human_activity_history_pub = rospy.Publisher( + "/human_activity_history", String, queue_size=10) # TODO: Define DEVICE FRAME as a visual frame of reference i.e. front facing frame of reference self.device_T_camera: Optional[sp.SE3] = None # TODO: Should be initialized by implementations of this class @@ -223,3 +211,23 @@ def publish_marker(self, data_frame: DataFrame, marker_scale: float=0.1): marker.color.b = 0.2 self.pose_of_interest_marker_publisher.publish(marker) + + def publish_human_activity_history(self, human_history: List[Tuple[float,str]]): + """ + Publishes human activity history as a string + + Args: + data_frame (DataFrame): DataFrame object containing data packet with rgb, depth + and all necessary transformations + + Publishes: + - /human_activity_history: ROS String message for human activity history + """ + + # Iterate over human history and create a string + human_history_str = "" + for timestamp, activity in human_history: + human_history_str += f"{timestamp},{activity}|" # str = "time1,Standing|time2,Walking" + + # Publish human activity history string + self.human_activity_history_pub.publish(human_history_str) diff --git a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py index d7d8c122..da03692c 100644 --- a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py +++ b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py @@ -1,7 +1,12 @@ from aria_data_utils.human_sensor_data_streamer_interface import CameraParams, DataFrame, HumanSensorDataStreamerInterface from aria_data_utils.aria_sdk_utils import update_iptables_quest3 from perception_and_utils.utils.math_utils import get_running_avg_a_T_b -from quest3_streamer.unified_quest_camera import UnifiedQuestCamera # From internal repo +from perception_and_utils.utils.frame_rate_counter import FrameRateCounter # Local Frame rate counter +try: + from quest3_streamer.unified_quest_camera import UnifiedQuestCamera # From internal repo +except ImportError: + print("Could not import Quest3 camera wrapper") + import rospy import numpy as np @@ -30,6 +35,8 @@ def __init__(self, verbose: bool = False) -> None: handler.setFormatter(formatter) self.logger.addHandler(handler) + self.frame_rate_counter = FrameRateCounter() + self.unified_quest3_camera = UnifiedQuestCamera() self.rgb_cam_params : CameraParams = None self.depth_cam_params : CameraParams = None @@ -115,11 +122,16 @@ def get_latest_data_frame(self) -> Optional[DataFrame]: data_frame._frame_number = self._frame_number data_frame._timestamp_s = time.time() - data_frame._avg_rgb_fps = self.unified_quest3_camera.get_avg_fps_rgb() - data_frame._avg_depth_fps = self.unified_quest3_camera.get_avg_fps_depth() # Align depth frame with rgb frame data_frame._aligned_depth_frame = None # TODO: Add this logic + + # Update frame rate counter + self.frame_rate_counter.update() + data_frame._avg_rgb_fps = self.unified_quest3_camera.get_avg_fps_rgb() + data_frame._avg_depth_fps = self.unified_quest3_camera.get_avg_fps_depth() + data_frame._avg_data_frame_fps = self.frame_rate_counter.avg_value() + return data_frame def process_frame( @@ -189,10 +201,14 @@ def process_frame( ) if detect_human_motion: - _, activity_str = self.human_motion_detector.process_frame(frame=data_frame) - viz_img, _ = self.human_motion_detector.get_outputs(viz_img, {"activity": activity_str}) - print(f"Activity: {activity_str}") - rospy.set_param("human_activity", activity_str) + activity_str, avg_velocity = self.human_motion_detector.process_frame(frame=data_frame) + ops = { + "activity": activity_str, + "velocity": avg_velocity, + "data_frame_fps": data_frame._avg_data_frame_fps, + } + viz_img, _ = self.human_motion_detector.get_outputs(viz_img, ops) + self.publish_human_activity_history(self.human_motion_detector.get_human_motion_history()) # if detect_objects: # viz_img, object_scores = self.object_detector.process_frame(viz_img) diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py index c39d0235..3a83a9a9 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py @@ -12,7 +12,7 @@ from perception_and_utils.perception.detector_wrappers.generic_detector_interface import ( GenericDetector, ) -from perception_and_utils.utils.image_utils import label_img +from perception_and_utils.utils.image_utils import label_img, decorate_img_with_fps from perception_and_utils.utils.data_frame import DataFrame class HumanMotionDetector(GenericDetector): @@ -23,28 +23,39 @@ def __init__(self): super().__init__() self._logger = logging.getLogger("HumanMotionDetector") - def _init_human_motion_detector(self, velocity_threshold=0.75, time_horizon_ns=1000000000): + def _init_human_motion_detector( + self, + # velocity_threshold=1.2, + velocity_threshold=0.4, # hacking this + time_horizon_ns=1000000000): """ Initialize the human motion detector + + Args: + velocity_threshold (float, optional): Velocity threshold for determining walking vs standing. Defaults to 1.2 metres/sec + time_horizon_ns (int, optional): Time horizon for calculating velocity. Defaults to 1 second (in nanosec) """ self._logger.info("Initializing Human Motion Detector") self.enable_detector() self._velocity_threshold = velocity_threshold self._time_horizon_ns = time_horizon_ns # 1 second - self._previous_frames = [] + self._previous_frames = [] # type: List[Tuple[np.ndarray, float]] self._len_prev_frame_cache = 100 # Keep this value larger than fps + + self._human_motion_history = [] # type: List[Tuple[float, str]] + def process_frame( self, - frame: DataFrame, - ) -> Tuple[np.ndarray, str]: + frame: DataFrame, # new data frame + ) -> Tuple[str, float]: # Do nothing if detector is not enabled if self.is_enabled is False: self._logger.warning( "Human Motion detector is disabled... Skipping processing of current frame." ) - return None, "Disabled" + return "Disabled", None # Calculate the current position from the transformation matrix current_position = frame._deviceWorld_T_camera_rgb.matrix()[:3, 3] @@ -52,32 +63,30 @@ def process_frame( # Add the current frame to the cache self._previous_frames.append((current_position, frame._timestamp_s)) - if len(self._previous_frames) < self._len_prev_frame_cache: - return None, "Standing" + # If the cache is just 1 element or less, return "Standing" + lookback_index=10 + if len(self._previous_frames) < lookback_index+1: + return "Standing", None # If the cache is full, remove the oldest frame if len(self._previous_frames) > self._len_prev_frame_cache: self._previous_frames.pop(0) - # Find an index in "_previous_positions" which is 1 sec before current value - index = len(self._previous_frames) - 60 - # If the index is out of range, return "Standing" - if index < 0: - return None, "Standing" - # Calculate the Euclidean distance between now & then - print(f"Current position: {current_position} | Previous position: {self._previous_frames[index][0]}") - distance = np.linalg.norm(current_position - self._previous_frames[index][0]) - time = frame._timestamp_s - self._previous_frames[index][1] # as we are considering x frames per second so our window is 1 sec + print(f"Current position: {current_position} | Previous position: {self._previous_frames[lookback_index][0]}") + distance = np.linalg.norm(current_position - self._previous_frames[lookback_index][0]) + time = frame._timestamp_s - self._previous_frames[lookback_index][1] # as we are considering x frames per second so our window is 1 sec print(f"Distance: {distance} m | Time: {time} sec") avg_velocity = distance / time print(f"Avg velocity: {avg_velocity} m/s") + state = "Standing" # Determine the activity based on the velocity threshold if avg_velocity > self._velocity_threshold: - return None, "Walking" + state = "Walking" - return None, "Standing" + self.update_human_motion_history(state, frame._timestamp_s) + return state, avg_velocity def get_outputs( self, @@ -86,10 +95,25 @@ def get_outputs( ) -> Tuple[np.ndarray, Dict]: """ """ + viz_img = img_frame.copy() # Decorate image with text for visualization label_img( - img=img_frame, + img=viz_img, text=f"Activity: {outputs['activity']}", org=(50, 200), ) - return img_frame, outputs + label_img( + img=viz_img, + text=f"Velocity: {outputs['velocity']}", + org=(50, 250), + ) + decorate_img_with_fps(viz_img, outputs["data_frame_fps"],pos=(50, 400)) + return viz_img, outputs + + def update_human_motion_history(self, state, timestamp): + # If current state of human is different from last state in history list, only then update history + if len(self._human_motion_history) == 0 or self._human_motion_history[-1][1] != state: + self._human_motion_history.append((timestamp, state)) + + def get_human_motion_history(self): + return self._human_motion_history diff --git a/perception_and_utils_root/perception_and_utils/utils/data_frame.py b/perception_and_utils_root/perception_and_utils/utils/data_frame.py index 6cccf06c..116bd16a 100644 --- a/perception_and_utils_root/perception_and_utils/utils/data_frame.py +++ b/perception_and_utils_root/perception_and_utils/utils/data_frame.py @@ -11,6 +11,7 @@ def __init__(self): self._timestamp_s = -1 self._avg_rgb_fps = None # type: Optional[float] self._avg_depth_fps = None # type: Optional[float] + self._avg_data_frame_fps = None # type: Optional[float] self._rgb_frame = None # type: Optional[np.ndarray] self._depth_frame = None # type: Optional[np.ndarray] self._aligned_depth_frame = None # type: Optional[np.ndarray] diff --git a/perception_and_utils_root/perception_and_utils/utils/image_utils.py b/perception_and_utils_root/perception_and_utils/utils/image_utils.py index cfd39819..a1037744 100644 --- a/perception_and_utils_root/perception_and_utils/utils/image_utils.py +++ b/perception_and_utils_root/perception_and_utils/utils/image_utils.py @@ -127,6 +127,36 @@ def decorate_img_with_text_for_qr( return img +def decorate_img_with_fps(frame: np.ndarray, fps_value: float, pos=(50, 50)): + text = f"{fps_value:.2f} FPS" + + font = cv2.FONT_HERSHEY_PLAIN + font_scale = 2 + text_color = (0, 0, 225) + text_color_bg = (0, 0, 0) + font_thickness = 2 + (text_w, text_h), baseline = cv2.getTextSize( + text, font, font_scale, font_thickness + ) + cv2.rectangle( + frame, + pos, + (pos[0] + text_w, pos[1] - text_h - font_scale - 1), + text_color_bg, + -1, + cv2.LINE_8, + ) + cv2.putText( + frame, + text, + pos, + font, + font_scale, + text_color, + font_thickness, + cv2.LINE_8, + ) + def label_img( img: np.ndarray, text: str, From 9bb8b3bdbe2b48e26c1c387ce2db2201fb922811 Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Thu, 29 Aug 2024 00:45:29 -0700 Subject: [PATCH 6/7] Uncomment object detector and remove episodic memory script for quest3 --- .../episodic_memory_robotic_fetch_quest3_c.py | 270 ------------------ .../aria_data_utils/quest3_data_streamer.py | 114 +++++--- 2 files changed, 80 insertions(+), 304 deletions(-) delete mode 100644 aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py diff --git a/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py b/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py deleted file mode 100644 index 4bb37c1e..00000000 --- a/aria_data_loaders/aria_data_utils/episodic_memory_robotic_fetch_quest3_c.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - - -import logging -from typing import Any, Dict, List, Optional, Tuple - -import click -import numpy as np -import rospy -import sophus as sp -from geometry_msgs.msg import PoseStamped -from perception_and_utils.utils.conversions import ( - ros_PoseStamped_to_sophus_SE3, - ros_TransformStamped_to_sophus_SE3, - sophus_SE3_to_ros_PoseStamped, - sophus_SE3_to_ros_TransformStamped, - xyt_to_sophus_SE3, -) -from spot_rl.envs.skill_manager import SpotSkillManager -from spot_rl.utils.utils import ros_frames as rf -from spot_wrapper.spot import Spot, SpotCamIds -from spot_wrapper.spot_qr_detector import SpotQRDetector -from std_msgs.msg import Bool - -# from spot_rl.envs.skill_manager import SpotSkillManager -# from spot_rl.utils.utils import ros_frames as rf -# from spot_wrapper.spot import Spot, SpotCamIds -# from spot_wrapper.spot_qr_detector import SpotQRDetector -# from std_msgs.msg import Bool -from tf2_ros import ( - ConnectivityException, - ExtrapolationException, - LookupException, - StaticTransformBroadcaster, -) -from tf2_ros.buffer import Buffer -from tf2_ros.transform_listener import TransformListener - -# For information on useful reference frame, please check https://github.com/facebookresearch/spot-sim2real?tab=readme-ov-file#eyeglasses-run-spot-aria-project-code - -# Waypoints on left of the dock to observe the QR code clearly -SPOT_DOCK_OBSERVER_WAYPOINT_LEFT = [ - 0.3840912007597151, - -0.5816728569741766, - 149.58030524832756, -] -# Waypoints on right of the dock to observe the QR code clearly -SPOT_DOCK_OBSERVER_WAYPOINT_RIGHT = [ - 0.5419185005639034, - 0.5319243891865243, - -154.1506754378722, -] - - -class EpisodicMemoryRoboticFetchQuest3: - """ - This class handles the core logic of the Episodic Memory Robotic Fetch - It begines with Spot looking at the marker and detecting it which results - in a static tf of marker w.r.t spotWorld frame being published. - - Then it waits for Aria to detect marker and publish ariaWorld w.r.t marker - as a static tf. This is done by AriaLiveReader node. This step ensures both - Spot & Aria worlds are now mutually transformable which is necessary for the - rest of the logic. - - It constantly listens to Aria's current pose w.r.t ariaWorld frame. - It also waits for Aria to detect the object and publish that pose of interest - (i.e. aria's cpf frame when it saw the object) w.r.t ariaWorld frame. - - Once both the poses are received, it waits for the spot_fetch trigger to be True - which will be set by the user via `rostopic pub` on cli. - - Once the trigger is True, it will run the following steps: - 1. Nav to Pose of interest (Aria's cpf frame when it saw the object) - 2. Pick the object (object is hardcoded to be bottle) - 3. Nav to Pose of interest (Aria wearer's last location before triggering spot_fetch) - 4. Place the object (object is hardcoded to be bottle) - - Args: - spot: Spot object - verbose: bool indicating whether to print debug logs - - Warning: Do not create objects of this class directly. It is a standalone node - """ - - def __init__(self): - # # @FIXME: This is initializing a ros-node silently (at core of inheritence in - # # SpotRobotSubscriberMixin). Make it explicit - # self.skill_manager = SpotSkillManager( - # spot=spot, - # verbose=verbose, - # use_policies=use_policies, - # ) - - # Navigate Spot to SPOT_DOCK_OBSERVER_WAYPOINT_LEFT so that it can see the marker - # status, msg = self.skill_manager.nav( - # SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[0], - # SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[1], - # np.deg2rad(SPOT_DOCK_OBSERVER_WAYPOINT_LEFT[2]), - # ) - # if not status: - # rospy.logerr( - # f"Failed to navigate to spot dock observer waypoint. Error: {msg}. Exiting..." - # ) - # return - - # # Sit Spot down - # self.skill_manager.sit() - - # # Instantiate static transform broadcaster for publishing marker w.r.t spotWorld transforms - # self.static_tf_broadcaster = StaticTransformBroadcaster() - - # self._spot_fetch_trigger_sub = rospy.Subscriber( - # "/spot_fetch_trigger", Bool, self.spot_fetch_trigger_callback - # ) - # self._spot_fetch_flag = False - - # Detect the marker and get the average pose of marker w.r.t spotWorld frame - # cam_id = SpotCamIds.HAND_COLOR - # spot_qr = SpotQRDetector(spot=spot, cam_ids=[cam_id]) - # avg_spotWorld_T_marker = spot_qr.get_avg_spotWorld_T_marker(cam_id=cam_id) - - # # Publish marker w.r.t spotWorld transforms for 5 seconds so it can be seen in rviz - # start_time = rospy.Time.now() - # while rospy.Time.now() - start_time < rospy.Duration(2.0): - # self.static_tf_broadcaster.sendTransform( - # sophus_SE3_to_ros_TransformStamped( - # sp_se3=avg_spotWorld_T_marker, - # parent_frame=rf.SPOT_WORLD, - # child_frame=rf.MARKER, - # ) - # ) - # rospy.sleep(0.5) - - # Instantiate publisher for publishing go-to pose for handoff - self._nav_PoseStamped_pub_for_place = rospy.Publisher( - "/nav_pose_for_place_viz", PoseStamped, queue_size=10 - ) - self.nav_xyt_for_handoff = None - - # Initialize static transform subscriber for listening to ariaWorld w.r.t marker transform - self._tf_buffer = Buffer() - self._tf_listener = TransformListener(self._tf_buffer) - - # Wait for transform from camera to spotWorld to be available - while not rospy.is_shutdown(): - while not rospy.is_shutdown() and not self._tf_buffer.can_transform( - target_frame=rf.SPOT_WORLD, source_frame="camera", time=rospy.Time() - ): - rospy.logwarn_throttle( - 5.0, "Waiting for transform from camera to spotWorld" - ) - rospy.sleep(0.5) - try: - transform_stamped_spotWorld_T_camera = self._tf_buffer.lookup_transform( - target_frame=rf.SPOT_WORLD, - source_frame="camera", - time=rospy.Time(0), - ) - except (LookupException, ConnectivityException, ExtrapolationException): - raise RuntimeError( - "Unable to lookup transform from ariaWorld to spotWorld" - ) - spotWorld_T_camera = ros_TransformStamped_to_sophus_SE3( - ros_trf_stamped=transform_stamped_spotWorld_T_camera - ) - - self.nav_xyt_for_handoff = self.get_nav_xyt_to_wearer(spotWorld_T_camera) - # Publish place pose in spotWorld frame so it can be seen in rviz until spot_fetch is triggered - self._nav_PoseStamped_pub_for_place.publish( - sophus_SE3_to_ros_PoseStamped( - sp_se3=xyt_to_sophus_SE3( - np.asarray(self.nav_xyt_for_handoff, dtype=np.float32) - ), - parent_frame=rf.SPOT_WORLD, - ) - ) - - # rospy.logwarn_throttle( - # 5.0, "Waiting for demo trigger on /spot_fetch_trigger topic" - # ) - rospy.sleep(1.0) - - # Set run status to true in the beginning - # run_status = True - - # if run_status: - # # Nav to x,y,theta of interest (Aria's cpf frame when it saw the object) - # nav_xyt_for_handoff = self.nav_xyt_for_handoff - # rospy.loginfo(f"Nav 2D location for pick: {nav_xyt_for_handoff}") - - # self.skill_manager.nav( - # nav_xyt_for_handoff[0], nav_xyt_for_handoff[1], nav_xyt_for_handoff[2] - # ) - - # self.skill_manager.sit() - - def spot_fetch_trigger_callback(self, msg): - """ - Save _spot_fetch_flag as True when spot fetch is to be triggered - - Args: - msg (Bool): Message received on /spot_fetch_trigger topic - - Updates: - self._spot_fetch_flag (bool): True when spot fetch is to be triggered - """ - self._spot_fetch_flag = True - - def get_nav_xyt_to_wearer( - self, aria_pose: sp.SE3, shift_offset: float = 0.6 - ) -> Tuple[float, float, float]: - """ - Converts Sophus SE3 aria current pose to Tuple of x, y, theta, then flips it by 180 degrees and shifts it by a given offset. - At the end, the final x, y, theta is such that the robot will face the wearer. - - Args: - aria_pose (sp.SE3): Sophus SE3 object for aria's current pose (as cpf frame) w.r.t spotWorld frame - - Returns: - Tuple[float, float, float]: Tuple of x,y,theta as floats representing nav target for robot - """ - # ARROW STARTS FROM ORIGIN - - # get position and rotation as x, y, theta - position = aria_pose.translation() - # Find the angle made by CPF's z axis with spotWorld's x axis - # as robot should orient to the CPF's z axis. First 3 elements of - # column 3 from spotWorld_T_cpf represents cpf's z axis in spotWorld frame - cpf_z_axis_in_spotWorld = aria_pose.matrix()[:3, 2] - x_component_of_z_axis = cpf_z_axis_in_spotWorld[0] - y_component_of_z_axis = cpf_z_axis_in_spotWorld[1] - rotation = float( - np.arctan2( - y_component_of_z_axis, - x_component_of_z_axis, - ) - ) # tan^-1(y/x) - x, y, theta = position[0], position[1], rotation - - # push fwd this point along theta - x += shift_offset * np.cos(theta) - y += shift_offset * np.sin(theta) - # rotate theta by pi - theta += np.pi - - return (x, y, theta) - - -@click.command() -@click.option("--verbose", type=bool, default=True) -@click.option("--use-policies", type=bool, default=False) -def main(verbose: bool, use_policies: bool): - rospy.init_node("episode") - rospy.logwarn("Starting up ROS node") - - # spot = Spot("EpisodicMemoryRoboticFetchQuest3Node") - # with spot.get_lease(hijack=True) as lease: - # if lease is None: - # raise RuntimeError("Could not get lease") - # else: - # rospy.loginfo("Acquired lease") - - _ = EpisodicMemoryRoboticFetchQuest3() - - -if __name__ == "__main__": - main() diff --git a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py index da03692c..37b90722 100644 --- a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py +++ b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py @@ -1,21 +1,32 @@ -from aria_data_utils.human_sensor_data_streamer_interface import CameraParams, DataFrame, HumanSensorDataStreamerInterface from aria_data_utils.aria_sdk_utils import update_iptables_quest3 +from aria_data_utils.human_sensor_data_streamer_interface import ( + CameraParams, + DataFrame, + HumanSensorDataStreamerInterface, +) +from perception_and_utils.utils.frame_rate_counter import ( # Local Frame rate counter + FrameRateCounter, +) from perception_and_utils.utils.math_utils import get_running_avg_a_T_b -from perception_and_utils.utils.frame_rate_counter import FrameRateCounter # Local Frame rate counter + try: - from quest3_streamer.unified_quest_camera import UnifiedQuestCamera # From internal repo + from quest3_streamer.unified_quest_camera import ( # From internal repo + UnifiedQuestCamera, + ) except ImportError: print("Could not import Quest3 camera wrapper") -import rospy -import numpy as np -import sophus as sp -import cv2 -from typing import Any, Dict, Optional, Tuple, List -import click import logging import time +from typing import Any, Dict, List, Optional, Tuple + +import click +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import rospy +import sophus as sp from perception_and_utils.utils.conversions import ( sophus_SE3_to_ros_PoseStamped, sophus_SE3_to_ros_TransformStamped, @@ -23,6 +34,7 @@ ) from spot_rl.utils.utils import ros_frames as rf + class Quest3DataStreamer(HumanSensorDataStreamerInterface): def __init__(self, verbose: bool = False) -> None: super().__init__(verbose) @@ -38,8 +50,8 @@ def __init__(self, verbose: bool = False) -> None: self.frame_rate_counter = FrameRateCounter() self.unified_quest3_camera = UnifiedQuestCamera() - self.rgb_cam_params : CameraParams = None - self.depth_cam_params : CameraParams = None + self.rgb_cam_params: CameraParams = None + self.depth_cam_params: CameraParams = None self._frame_number = -1 self._setup_device() @@ -58,7 +70,9 @@ def disconnect(self): self.logger("Quest3DataStreamer :: Disconnected") def is_connected(self) -> bool: - return self._is_connected # TODO: Return device connection status from Bruno's APIs + return ( + self._is_connected + ) # TODO: Return device connection status from Bruno's APIs def _setup_device(self) -> Any: # Setup RGB camera params @@ -67,13 +81,23 @@ def _setup_device(self) -> Any: rgb_pp = self.unified_quest3_camera.get_rgb_principal_point() if rgb_fl is not None and rgb_pp is not None: - rospy.logwarn("RGB Camera Params: focal lengths = {}, principal point = {}".format(rgb_fl, rgb_pp)) - self.rgb_cam_params = CameraParams(focal_lengths=rgb_fl, principal_point=rgb_pp) + rospy.logwarn( + "RGB Camera Params: focal lengths = {}, principal point = {}".format( + rgb_fl, rgb_pp + ) + ) + self.rgb_cam_params = CameraParams( + focal_lengths=rgb_fl, principal_point=rgb_pp + ) else: self.logger.info("Waiting for RGB camera params to be set...") rospy.logwarn("Waiting for RGB camera params to be set...") time.sleep(0.1) - rospy.loginfo("RGB Camera Params: focal lengths = {}, principal point = {}".format(self.rgb_cam_params._focal_lengths, self.rgb_cam_params._principal_point)) + rospy.loginfo( + "RGB Camera Params: focal lengths = {}, principal point = {}".format( + self.rgb_cam_params._focal_lengths, self.rgb_cam_params._principal_point + ) + ) # Setup Depth camera params while self.depth_cam_params is None: @@ -81,13 +105,24 @@ def _setup_device(self) -> Any: depth_pp = self.unified_quest3_camera.get_depth_principal_point() if depth_fl is not None and depth_pp is not None: - rospy.logwarn("Depth Camera Params: focal lengths = {}, principal point = {}".format(depth_fl, depth_pp)) - self.depth_cam_params = CameraParams(focal_lengths=depth_fl, principal_point=depth_pp) + rospy.logwarn( + "Depth Camera Params: focal lengths = {}, principal point = {}".format( + depth_fl, depth_pp + ) + ) + self.depth_cam_params = CameraParams( + focal_lengths=depth_fl, principal_point=depth_pp + ) else: self.logger.info("Waiting for Depth camera params to be set...") rospy.logwarn("Waiting for Depth camera params to be set...") time.sleep(0.1) - print("Depth Camera Params: focal lengths = {}, principal point = {}".format(self.depth_cam_params._focal_lengths, self.depth_cam_params._principal_point)) + print( + "Depth Camera Params: focal lengths = {}, principal point = {}".format( + self.depth_cam_params._focal_lengths, + self.depth_cam_params._principal_point, + ) + ) def get_latest_data_frame(self) -> Optional[DataFrame]: # Create DataFrame object @@ -98,7 +133,9 @@ def get_latest_data_frame(self) -> Optional[DataFrame]: deviceWorld_T_rgbCam = self.unified_quest3_camera.get_deviceWorld_T_rgbCamera() device_T_rgbCam = self.unified_quest3_camera.get_device_T_rgbCamera() if rgb_frame is None or deviceWorld_T_rgbCam is None or device_T_rgbCam is None: - rospy.logwarn("Returning None, as rgb_frame or deviceWorld_T_rgbCam or device_T_rgbCam is None.") + rospy.logwarn( + "Returning None, as rgb_frame or deviceWorld_T_rgbCam or device_T_rgbCam is None." + ) return None else: data_frame._rgb_frame = rgb_frame @@ -107,10 +144,14 @@ def get_latest_data_frame(self) -> Optional[DataFrame]: # Populate depth frame data iff all the required data is available; else return None depth_frame = self.unified_quest3_camera.get_depth() - deviceWorld_T_depthCam = self.unified_quest3_camera.get_deviceWorld_T_depthCamera() + deviceWorld_T_depthCam = ( + self.unified_quest3_camera.get_deviceWorld_T_depthCamera() + ) device_T_depthCam = self.unified_quest3_camera.get_device_T_depthCamera() if depth_frame is None or deviceWorld_T_depthCam is None: - rospy.logwarn("Returning None as depth_frame or deviceWorld_T_depthCam or device_T_depthCam is None.") + rospy.logwarn( + "Returning None as depth_frame or deviceWorld_T_depthCam or device_T_depthCam is None." + ) return None else: data_frame._depth_frame = depth_frame @@ -122,9 +163,8 @@ def get_latest_data_frame(self) -> Optional[DataFrame]: data_frame._frame_number = self._frame_number data_frame._timestamp_s = time.time() - # Align depth frame with rgb frame - data_frame._aligned_depth_frame = None # TODO: Add this logic + data_frame._aligned_depth_frame = None # TODO: Add this logic # Update frame rate counter self.frame_rate_counter.update() @@ -201,21 +241,25 @@ def process_frame( ) if detect_human_motion: - activity_str, avg_velocity = self.human_motion_detector.process_frame(frame=data_frame) + activity_str, avg_velocity = self.human_motion_detector.process_frame( + frame=data_frame + ) ops = { "activity": activity_str, "velocity": avg_velocity, "data_frame_fps": data_frame._avg_data_frame_fps, } viz_img, _ = self.human_motion_detector.get_outputs(viz_img, ops) - self.publish_human_activity_history(self.human_motion_detector.get_human_motion_history()) + self.publish_human_activity_history( + self.human_motion_detector.get_human_motion_history() + ) - # if detect_objects: - # viz_img, object_scores = self.object_detector.process_frame(viz_img) - # if object_label in object_scores.keys(): - # if self.verbose: - # plt.imsave(f"frame_{self.frame_number}.jpg", viz_img) - # # self.publish_pose_of_interest(frame.get("ros_pose")) + if detect_objects: + viz_img, object_scores = self.object_detector.process_frame(viz_img) + if object_label in object_scores.keys(): + if self.verbose: + plt.imsave(f"frame_{self.frame_number}.jpg", viz_img) + # self.publish_pose_of_interest(frame.get("ros_pose")) return viz_img, outputs @@ -292,7 +336,7 @@ def initialize_human_motion_detector(self): @click.option("--do-update-iptables", is_flag=True, type=bool, default=False) @click.option("--debug", is_flag=True, default=False) @click.option("--hz", type=int, default=100) -def main(do_update_iptables:bool, debug:bool, hz:int): +def main(do_update_iptables: bool, debug: bool, hz: int): if debug: _log_level = rospy.DEBUG else: @@ -306,7 +350,7 @@ def main(do_update_iptables:bool, debug:bool, hz:int): else: rospy.logwarn("Not updating iptables") - rate = rospy.Rate(hz) + # rate = rospy.Rate(hz) outputs: Dict[str, Any] = {} data_streamer = None cv2.namedWindow("Image", cv2.WINDOW_NORMAL) @@ -314,7 +358,9 @@ def main(do_update_iptables:bool, debug:bool, hz:int): data_streamer = Quest3DataStreamer() outputs = data_streamer.initialize_april_tag_detector(outputs=outputs) - outputs = data_streamer.initialize_object_detector(outputs=outputs, object_labels=["milk bottle"]) + outputs = data_streamer.initialize_object_detector( + outputs=outputs, object_labels=["milk bottle"] + ) data_streamer.initialize_human_motion_detector() # data_streamer.connect() while not rospy.is_shutdown(): From e4688860fc12f6c022b535b33f0646d05daa485f Mon Sep 17 00:00:00 2001 From: Kavit Shah Date: Wed, 23 Oct 2024 18:00:11 -0700 Subject: [PATCH 7/7] Add Profiling code --- .../human_sensor_data_streamer_interface.py | 168 ++++++++++-------- .../aria_data_utils/quest3_data_streamer.py | 80 +++++++-- .../unified_quest_camera_interface.py | 11 +- .../human_motion_detector.py | 58 +++--- .../detector_wrappers/object_detector.py | 8 +- .../perception_and_utils/utils/data_frame.py | 31 ++-- 6 files changed, 229 insertions(+), 127 deletions(-) diff --git a/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py index 06c565d9..63e30577 100644 --- a/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py +++ b/aria_data_loaders/aria_data_utils/human_sensor_data_streamer_interface.py @@ -10,42 +10,50 @@ import cv2 import numpy as np import rospy -import sophus as sp + +try: + import sophuspy as sp +except Exception as e: + print(f"Cannot import sophuspy due to {e}. Import sophus instead") + import sophus as sp + +# ROS imports +from geometry_msgs.msg import Pose, PoseStamped +from nav_msgs.msg import Odometry from perception_and_utils.perception.detector_wrappers.april_tag_detector import ( AprilTagDetectorWrapper, ) -from perception_and_utils.perception.detector_wrappers.object_detector import ( - ObjectDetectorWrapper, -) from perception_and_utils.perception.detector_wrappers.human_motion_detector import ( HumanMotionDetector, ) - +from perception_and_utils.perception.detector_wrappers.object_detector import ( + ObjectDetectorWrapper, +) +from perception_and_utils.utils.data_frame import DataFrame from spot_rl.utils.utils import ros_frames as rf - -# ROS imports -from geometry_msgs.msg import Pose, PoseStamped -from nav_msgs.msg import Odometry from std_msgs.msg import String from tf2_ros import StaticTransformBroadcaster from visualization_msgs.msg import Marker -from perception_and_utils.utils.data_frame import DataFrame class CameraParams: """ CameraParams class to store camera parameters like focal length, principal point etc. It stores the image, timestamp and frame number of each received image """ - def __init__(self, focal_lengths: Tuple[float, float], principal_point: Tuple[float, float]): + + def __init__( + self, focal_lengths: Tuple[float, float], principal_point: Tuple[float, float] + ): self._focal_lengths = focal_lengths # type: Optional[Tuple[float, float]] self._principal_point = principal_point # type: Optional[Tuple[float, float]] + class HumanSensorDataStreamerInterface: def __init__(self, verbose: bool = False) -> None: self.verbose = verbose - self._is_connected = False # TODO : Maybe not needed + self._is_connected = False # TODO : Maybe not needed # TODO: Update this later with PerceptionPipeline # Create all detectors @@ -57,19 +65,22 @@ def __init__(self, verbose: bool = False) -> None: self.static_tf_broadcaster = StaticTransformBroadcaster() self.human_activity_history_pub = rospy.Publisher( - "/human_activity_history", String, queue_size=10) + "/human_activity_history", String, queue_size=10 + ) # TODO: Define DEVICE FRAME as a visual frame of reference i.e. front facing frame of reference - self.device_T_camera: Optional[sp.SE3] = None # TODO: Should be initialized by implementations of this class + self.device_T_camera: Optional[ + sp.SE3 + ] = None # TODO: Should be initialized by implementations of this class # Maintain a list of all poses where qr code is detected (w.r.t deviceWorld) - self.marker_positions_list = ( - [] - ) # type: List[np.ndarray] # List of position as np.ndarray (x, y, z) - self.marker_quaternion_list = ( - [] - ) # type: List[np.ndarray] # List of quaternions as np.ndarray (x, y, z, w) - self.avg_deviceWorld_T_marker = None # type: Optional[sp.SE3] + self.marker_positions_list: List[ + np.ndarray + ] = [] # List of position as np.ndarray (x, y, z) + self.marker_quaternion_list: List[ + np.ndarray + ] = [] # List of quaternions as np.ndarray (x, y, z, w) + self.avg_deviceWorld_T_marker: Optional[sp.SE3] = None def connect(self): """ @@ -155,64 +166,65 @@ def initialize_object_detector( raise NotImplementedError def initialize_human_motion_detector(self, outputs: dict = {}): - """ - """ + """ """ raise NotImplementedError ##### ROS Publishers ##### - def publish_human_pose(self, data_frame: DataFrame): - """ - Publishes current pose of Device as a pose of interest for Spot - - Args: - data_frame (DataFrame): DataFrame object containing data packet with rgb, depth - and all necessary transformations - - Publishes: - - /human_pose_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame - """ - # Publish as pose for detail - pose_stamped = PoseStamped() - pose_stamped.header.stamp = rospy.Time().now() - pose_stamped.header.seq = data_frame._frame_number - pose_stamped.header.frame_id = rf.QUEST3_CAMERA - pose_stamped.pose = data_frame._deviceWorld_T_camera_rgb # TODO: Use a VIZ frame which is forward facing - self.human_pose_publisher.publish(pose_stamped) - - def publish_marker(self, data_frame: DataFrame, marker_scale: float=0.1): - """ - Publishes marker at pose of interest - - Args: - data_frame (DataFrame): DataFrame object containing data packet with rgb, depth - and all necessary transformations - - Publishes: - - /pose_of_interest_marker_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame - """ - # Publish as marker for interpretability - marker = Marker() - marker.header.stamp = rospy.Time().now() - marker.header.frame_id = rf.rf.QUEST3_CAMERA - marker.id = self._frame_number - - marker.type = Marker.SPHERE - marker.action = Marker.ADD - - marker.pose = data_frame._deviceWorld_T_camera_rgb - - marker.scale.x = marker_scale - marker.scale.y = marker_scale - marker.scale.z = marker_scale - - marker.color.a = 1.0 - marker.color.r = 0.45 - marker.color.g = 0.95 - marker.color.b = 0.2 - - self.pose_of_interest_marker_publisher.publish(marker) - - def publish_human_activity_history(self, human_history: List[Tuple[float,str]]): + # def publish_human_pose(self, data_frame: DataFrame): + # """ + # Publishes current pose of Device as a pose of interest for Spot + + # Args: + # data_frame (DataFrame): DataFrame object containing data packet with rgb, depth + # and all necessary transformations + + # Publishes: + # - /human_pose_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame + # """ + # # Publish as pose for detail + # pose_stamped = PoseStamped() + # pose_stamped.header.stamp = rospy.Time().now() + # pose_stamped.header.seq = data_frame._frame_number + # pose_stamped.header.frame_id = rf.QUEST3_CAMERA + # pose_stamped.pose = ( + # data_frame._deviceWorld_T_camera_rgb + # ) # TODO: Use a VIZ frame which is forward facing + # self.human_pose_publisher.publish(pose_stamped) + + # def publish_marker(self, data_frame: DataFrame, marker_scale: float = 0.1): + # """ + # Publishes marker at pose of interest + + # Args: + # data_frame (DataFrame): DataFrame object containing data packet with rgb, depth + # and all necessary transformations + + # Publishes: + # - /pose_of_interest_marker_publisher: ROS PoseStamped message for quest3World_T_rgb # TODO: Update this from rgb to VIZ frame + # """ + # # Publish as marker for interpretability + # marker = Marker() + # marker.header.stamp = rospy.Time().now() + # marker.header.frame_id = rf.rf.QUEST3_CAMERA + # marker.id = self._frame_number + + # marker.type = Marker.SPHERE + # marker.action = Marker.ADD + + # marker.pose = data_frame._deviceWorld_T_camera_rgb + + # marker.scale.x = marker_scale + # marker.scale.y = marker_scale + # marker.scale.z = marker_scale + + # marker.color.a = 1.0 + # marker.color.r = 0.45 + # marker.color.g = 0.95 + # marker.color.b = 0.2 + + # self.pose_of_interest_marker_publisher.publish(marker) + + def publish_human_activity_history(self, human_history: List[Tuple[float, str]]): """ Publishes human activity history as a string @@ -227,7 +239,9 @@ def publish_human_activity_history(self, human_history: List[Tuple[float,str]]): # Iterate over human history and create a string human_history_str = "" for timestamp, activity in human_history: - human_history_str += f"{timestamp},{activity}|" # str = "time1,Standing|time2,Walking" + human_history_str += ( + f"{timestamp},{activity}|" # str = "time1,Standing|time2,Walking" + ) # Publish human activity history string self.human_activity_history_pub.publish(human_history_str) diff --git a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py index 37b90722..ae7d1a0b 100644 --- a/aria_data_loaders/aria_data_utils/quest3_data_streamer.py +++ b/aria_data_loaders/aria_data_utils/quest3_data_streamer.py @@ -19,6 +19,7 @@ import logging import time +import traceback from typing import Any, Dict, List, Optional, Tuple import click @@ -26,7 +27,12 @@ import matplotlib.pyplot as plt import numpy as np import rospy -import sophus as sp + +try: + import sophuspy as sp +except Exception as e: + print(f"Cannot import sophuspy due to {e}. Import sophus instead") + import sophus as sp from perception_and_utils.utils.conversions import ( sophus_SE3_to_ros_PoseStamped, sophus_SE3_to_ros_TransformStamped, @@ -47,7 +53,10 @@ def __init__(self, verbose: bool = False) -> None: handler.setFormatter(formatter) self.logger.addHandler(handler) - self.frame_rate_counter = FrameRateCounter() + self.frc_all = FrameRateCounter() + self.frc_qrd = FrameRateCounter() + self.frc_hmd = FrameRateCounter() + self.frc_od = FrameRateCounter() self.unified_quest3_camera = UnifiedQuestCamera() self.rgb_cam_params: CameraParams = None @@ -60,14 +69,14 @@ def connect(self): Connect to Device """ self._is_connected = True - self.logger("Quest3DataStreamer :: Connected") + self.logger.info("Quest3DataStreamer :: Connected") def disconnect(self): """ Disconnects from Device cleanly """ self._is_connected = False - self.logger("Quest3DataStreamer :: Disconnected") + self.logger.info("Quest3DataStreamer :: Disconnected") def is_connected(self) -> bool: return ( @@ -167,10 +176,11 @@ def get_latest_data_frame(self) -> Optional[DataFrame]: data_frame._aligned_depth_frame = None # TODO: Add this logic # Update frame rate counter - self.frame_rate_counter.update() + # self.frame_rate_counter.update() data_frame._avg_rgb_fps = self.unified_quest3_camera.get_avg_fps_rgb() data_frame._avg_depth_fps = self.unified_quest3_camera.get_avg_fps_depth() - data_frame._avg_data_frame_fps = self.frame_rate_counter.avg_value() + # data_frame._avg_data_frame_fps = self.frame_rate_counter.avg_value() + data_frame._avg_data_frame_fps = 0.0 return data_frame @@ -183,15 +193,32 @@ def process_frame( detect_human_motion: bool = False, object_label="milk bottle", ) -> Tuple[np.ndarray, dict]: + rate_all = 0.0 + rate_qrd = 0.0 + rate_hmd = 0.0 + rate_od = 0.0 + + self.frc_all.start() + # Initialize object_scores to empty dict for current image frame # object_scores = {} # type: Dict[str, Any] # Get rgb image from frame viz_img = data_frame._rgb_frame if viz_img is None: - rospy.logwarn("No image found in frame") + rospy.logwarn("No image found in frame") # This gets over-written.. WASTE! + + if detect_objects: + self.frc_od.start() + viz_img, object_scores = self.object_detector.process_frame(data_frame) + # if object_label in object_scores.keys(): + # if self.verbose: + # plt.imsave(f"frame_{self.frame_number}.jpg", viz_img) + # self.publish_pose_of_interest(frame.get("ros_pose")) + rate_od = self.frc_od.stop() if detect_qr: + self.frc_qrd.start() (viz_img, camera_T_marker) = self.april_tag_detector.process_frame(frame=data_frame) # type: ignore deviceWorld_T_camera = data_frame._deviceWorld_T_camera_rgb @@ -213,9 +240,9 @@ def process_frame( self.marker_quaternion_list, self.avg_deviceWorld_T_marker, ) = get_running_avg_a_T_b( - current_avg_a_T_b=self.avg_deviceWorld_T_marker, - a_T_b_position_list=self.marker_positions_list, - a_T_b_quaternion_list=self.marker_quaternion_list, + current_avg_a_T_b=self.avg_deviceWorld_T_marker, # type: ignore + a_T_b_position_list=self.marker_positions_list, # type: ignore + a_T_b_quaternion_list=self.marker_quaternion_list, # type: ignore a_T_intermediate=deviceWorld_T_camera, intermediate_T_b=camera_T_marker, ) @@ -240,7 +267,10 @@ def process_frame( viz_img=viz_img, ) + rate_qrd = self.frc_qrd.stop() + if detect_human_motion: + self.frc_hmd.start() activity_str, avg_velocity = self.human_motion_detector.process_frame( frame=data_frame ) @@ -253,14 +283,24 @@ def process_frame( self.publish_human_activity_history( self.human_motion_detector.get_human_motion_history() ) + rate_hmd = self.frc_hmd.stop() if detect_objects: + self.frc_od.start() viz_img, object_scores = self.object_detector.process_frame(viz_img) if object_label in object_scores.keys(): if self.verbose: plt.imsave(f"frame_{self.frame_number}.jpg", viz_img) # self.publish_pose_of_interest(frame.get("ros_pose")) + rate_od = self.frc_od.stop() + + rate_all = self.frc_all.stop() + + outputs["process_all"] = rate_all + outputs["process_qr"] = rate_qrd + outputs["process_od"] = rate_od + outputs["process_hmd"] = rate_hmd return viz_img, outputs def initialize_april_tag_detector(self, outputs: dict = {}): @@ -345,11 +385,20 @@ def main(do_update_iptables: bool, debug: bool, hz: int): rospy.init_node("quest3_data_streamer", log_level=_log_level) rospy.loginfo("Starting quest3_data_streamer node") + # Logger for profiling + loggerp = logging.getLogger("Profiler-Quest3ProcessFrame") + loggerp.setLevel(logging.INFO) + csv_handlerp = logging.FileHandler("quest3_time_profiles_sec.txt") + formatterp = logging.Formatter("%(asctime)s, %(message)s") + csv_handlerp.setFormatter(formatterp) + loggerp.addHandler(csv_handlerp) + if do_update_iptables: update_iptables_quest3() else: rospy.logwarn("Not updating iptables") + outer_frc = FrameRateCounter() # rate = rospy.Rate(hz) outputs: Dict[str, Any] = {} data_streamer = None @@ -364,6 +413,7 @@ def main(do_update_iptables: bool, debug: bool, hz: int): data_streamer.initialize_human_motion_detector() # data_streamer.connect() while not rospy.is_shutdown(): + outer_frc.start() data_frame = data_streamer.get_latest_data_frame() if data_frame is not None: if debug: @@ -380,9 +430,15 @@ def main(do_update_iptables: bool, debug: bool, hz: int): cv2.waitKey(1) else: print("No data frame received.") - except Exception as e: + + outer_rate = outer_frc.stop() + msg_str = f"Fetch+AllDetection:{outer_rate},All_detectors={outputs.get('process_all',0.0)},QR={outputs.get('process_qr',0.0)},OD={outputs.get('process_od',0.0)},HMD={outputs.get('process_hmd',0.0)}" + loggerp.info(msg_str) + print(msg_str) + + except Exception: print("Ending script.") - rospy.logwarn(f"Exception: {e}") + rospy.logwarn(f"Exception: {traceback.format_exc()}") if data_streamer is not None: data_streamer.disconnect() diff --git a/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py b/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py index a6108b00..503f41a1 100644 --- a/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py +++ b/aria_data_loaders/aria_data_utils/unified_quest_camera_interface.py @@ -3,7 +3,12 @@ # LICENSE file in the root directory of this source tree. from typing import Optional, Tuple -import sophus as sp + +try: + import sophuspy as sp +except Exception as e: + print(f"Cannot import sophuspy due to {e}. Import sophus instead") + import sophus as sp import numpy as np """ @@ -51,9 +56,9 @@ def get_device_T_depthCamera(self) -> Optional[sp.SE3]: def get_avg_fps_rgb(self): raise NotImplementedError - + def get_avg_fps_depth(self): raise NotImplementedError - def is_connected(): + def is_connected(self): raise NotImplementedError diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py index 3a83a9a9..6a46803c 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/human_motion_detector.py @@ -5,19 +5,25 @@ import logging import os -from typing import Any, Dict, Tuple +from typing import Any, Dict, List, Tuple import numpy as np -import sophus as sp + +try: + import sophuspy as sp +except Exception as e: + print(f"Cannot import sophuspy due to {e}. Import sophus instead") + import sophus as sp + from perception_and_utils.perception.detector_wrappers.generic_detector_interface import ( GenericDetector, ) -from perception_and_utils.utils.image_utils import label_img, decorate_img_with_fps from perception_and_utils.utils.data_frame import DataFrame +from perception_and_utils.utils.image_utils import decorate_img_with_fps, label_img + class HumanMotionDetector(GenericDetector): - """ - """ + """ """ def __init__(self): super().__init__() @@ -26,8 +32,9 @@ def __init__(self): def _init_human_motion_detector( self, # velocity_threshold=1.2, - velocity_threshold=0.4, # hacking this - time_horizon_ns=1000000000): + velocity_threshold=0.4, # hacking this + time_horizon_ns=1000000000, + ): """ Initialize the human motion detector @@ -39,16 +46,15 @@ def _init_human_motion_detector( self.enable_detector() self._velocity_threshold = velocity_threshold - self._time_horizon_ns = time_horizon_ns # 1 second - self._previous_frames = [] # type: List[Tuple[np.ndarray, float]] - self._len_prev_frame_cache = 100 # Keep this value larger than fps - + self._time_horizon_ns = time_horizon_ns # 1 second + self._previous_frames = [] # type: List[Tuple[np.ndarray, float]] + self._len_prev_frame_cache = 100 # Keep this value larger than fps - self._human_motion_history = [] # type: List[Tuple[float, str]] + self._human_motion_history = [] # type: List[Tuple[float, str]] def process_frame( self, - frame: DataFrame, # new data frame + frame: DataFrame, # new data frame ) -> Tuple[str, float]: # Do nothing if detector is not enabled if self.is_enabled is False: @@ -64,8 +70,8 @@ def process_frame( self._previous_frames.append((current_position, frame._timestamp_s)) # If the cache is just 1 element or less, return "Standing" - lookback_index=10 - if len(self._previous_frames) < lookback_index+1: + lookback_index = 10 + if len(self._previous_frames) < lookback_index + 1: return "Standing", None # If the cache is full, remove the oldest frame @@ -73,9 +79,15 @@ def process_frame( self._previous_frames.pop(0) # Calculate the Euclidean distance between now & then - print(f"Current position: {current_position} | Previous position: {self._previous_frames[lookback_index][0]}") - distance = np.linalg.norm(current_position - self._previous_frames[lookback_index][0]) - time = frame._timestamp_s - self._previous_frames[lookback_index][1] # as we are considering x frames per second so our window is 1 sec + print( + f"Current position: {current_position} | Previous position: {self._previous_frames[lookback_index][0]}" + ) + distance = np.linalg.norm( + current_position - self._previous_frames[lookback_index][0] + ) + time = ( + frame._timestamp_s - self._previous_frames[lookback_index][1] + ) # as we are considering x frames per second so our window is 1 sec print(f"Distance: {distance} m | Time: {time} sec") avg_velocity = distance / time @@ -93,8 +105,7 @@ def get_outputs( img_frame: np.ndarray, outputs: Dict, ) -> Tuple[np.ndarray, Dict]: - """ - """ + """ """ viz_img = img_frame.copy() # Decorate image with text for visualization label_img( @@ -107,12 +118,15 @@ def get_outputs( text=f"Velocity: {outputs['velocity']}", org=(50, 250), ) - decorate_img_with_fps(viz_img, outputs["data_frame_fps"],pos=(50, 400)) + decorate_img_with_fps(viz_img, outputs["data_frame_fps"], pos=(50, 400)) return viz_img, outputs def update_human_motion_history(self, state, timestamp): # If current state of human is different from last state in history list, only then update history - if len(self._human_motion_history) == 0 or self._human_motion_history[-1][1] != state: + if ( + len(self._human_motion_history) == 0 + or self._human_motion_history[-1][1] != state + ): self._human_motion_history.append((timestamp, state)) def get_human_motion_history(self): diff --git a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py index 27540a81..b5ebf0e0 100644 --- a/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py +++ b/perception_and_utils_root/perception_and_utils/perception/detector_wrappers/object_detector.py @@ -10,11 +10,11 @@ from perception_and_utils.perception.detector_wrappers.generic_detector_interface import ( GenericDetector, ) +from perception_and_utils.utils.data_frame import DataFrame from perception_and_utils.utils.image_utils import ( centered_object_detection_heuristic, check_bbox_intersection, ) -from perception_and_utils.utils.data_frame import DataFrame from spot_rl.models.owlvit import OwlVit @@ -226,7 +226,9 @@ def process_frame(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, Any]]: ) return updated_img_frame, object_scores - def process_frame_offline(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, Any]]: + def process_frame_offline( + self, frame: DataFrame + ) -> Tuple[np.ndarray, Dict[str, Any]]: """ Process image frame to detect object instances and score them based on heuristics @@ -239,6 +241,8 @@ def process_frame_offline(self, frame: DataFrame) -> Tuple[np.ndarray, Dict[str, updated_img_frame (np.ndarray) : Image frame with detections and text for visualization object_scores (Dict[str, float]) : Dictionary of scores for each object in the image frame """ + img_frame = frame._rgb_frame + # Do nothing if detector is not enabled if self.is_enabled is False: self._logger.warning( diff --git a/perception_and_utils_root/perception_and_utils/utils/data_frame.py b/perception_and_utils_root/perception_and_utils/utils/data_frame.py index 116bd16a..54b06dea 100644 --- a/perception_and_utils_root/perception_and_utils/utils/data_frame.py +++ b/perception_and_utils_root/perception_and_utils/utils/data_frame.py @@ -1,21 +1,30 @@ +from typing import Optional + import numpy as np -import sophus as sp + +try: + import sophuspy as sp +except Exception as e: + print(f"Cannot import sophuspy due to {e}. Import sophus instead") + import sophus as sp + class DataFrame: """ DataFrame class to store instantaneous data from Human Sensor stream. It stores the image, timestamp and frame number of each received image """ + def __init__(self): self._frame_number = -1 self._timestamp_s = -1 - self._avg_rgb_fps = None # type: Optional[float] - self._avg_depth_fps = None # type: Optional[float] - self._avg_data_frame_fps = None # type: Optional[float] - self._rgb_frame = None # type: Optional[np.ndarray] - self._depth_frame = None # type: Optional[np.ndarray] - self._aligned_depth_frame = None # type: Optional[np.ndarray] - self._deviceWorld_T_camera_rgb = None # type: Optional[sp.SE3] - self._deviceWorld_T_camera_depth = None # type: Optional[sp.SE3] - self._device_T_camera_rgb = None # type: Optional[sp.SE3] - self._device_T_camera_depth = None # type: Optional[sp.SE3] + self._avg_rgb_fps = None # type: Optional[float] + self._avg_depth_fps = None # type: Optional[float] + self._avg_data_frame_fps = None # type: Optional[float] + self._rgb_frame = None # type: Optional[np.ndarray] + self._depth_frame = None # type: Optional[np.ndarray] + self._aligned_depth_frame = None # type: Optional[np.ndarray] + self._deviceWorld_T_camera_rgb = None # type: Optional[sp.SE3] + self._deviceWorld_T_camera_depth = None # type: Optional[sp.SE3] + self._device_T_camera_rgb = None # type: Optional[sp.SE3] + self._device_T_camera_depth = None # type: Optional[sp.SE3]