From 80cde171dd15ced0a84b5ee8f175a67498448b4d Mon Sep 17 00:00:00 2001 From: Eetu Date: Wed, 3 May 2023 15:51:42 +0300 Subject: [PATCH] Add option for local obs for MAPPO with CTDE --- .../cfg/task/MobileFrankaMARL.yaml | 2 + .../cfg/task/MobileFrankaMARL_cv.yaml | 133 ++++++++++++++++++ .../cfg/train/MobileFrankaMARL_cvPPO.yaml | 88 ++++++++++++ omniisaacgymenvs/tasks/mobile_franka_marl.py | 62 ++++++-- omniisaacgymenvs/utils/task_util.py | 3 +- 5 files changed, 275 insertions(+), 13 deletions(-) create mode 100644 omniisaacgymenvs/cfg/task/MobileFrankaMARL_cv.yaml create mode 100644 omniisaacgymenvs/cfg/train/MobileFrankaMARL_cvPPO.yaml diff --git a/omniisaacgymenvs/cfg/task/MobileFrankaMARL.yaml b/omniisaacgymenvs/cfg/task/MobileFrankaMARL.yaml index 7d5c0927..6d0ff0cd 100644 --- a/omniisaacgymenvs/cfg/task/MobileFrankaMARL.yaml +++ b/omniisaacgymenvs/cfg/task/MobileFrankaMARL.yaml @@ -31,6 +31,8 @@ env: actionPenaltyScale: 0.01 fingerCloseRewardScale: 10.0 + useLocalObs: False + sim: dt: 0.0083 # 1/120 s use_gpu_pipeline: ${eq:${...pipeline},"gpu"} diff --git a/omniisaacgymenvs/cfg/task/MobileFrankaMARL_cv.yaml b/omniisaacgymenvs/cfg/task/MobileFrankaMARL_cv.yaml new file mode 100644 index 00000000..62756436 --- /dev/null +++ b/omniisaacgymenvs/cfg/task/MobileFrankaMARL_cv.yaml @@ -0,0 +1,133 @@ +# used to create the object +name: MobileFrankaMARL + +physics_engine: ${..physics_engine} + +# if given, will override the device setting in gym. +env: + numEnvs: ${resolve_default:512,${...num_envs}} + envSpacing: 3.0 + episodeLength: 500 # maybe need to play with this + enableDebugVis: False + + clipObservations: 7.0 + clipActions: 1.0 + + controlFrequencyInv: 2 # 30 Hz 2 for 60 hz, maybe need to play with this too + + startPositionNoise: 0.0 + startRotationNoise: 0.0 + + numProps: 4 + aggregateMode: 3 + + actionScale: 7.5 + dofVelocityScale: 0.1 + distRewardScale: 2.0 + rotRewardScale: 0.5 + aroundHandleRewardScale: 10.0 + openRewardScale: 7.5 + fingerDistRewardScale: 100.0 + actionPenaltyScale: 0.01 + fingerCloseRewardScale: 10.0 + + useLocalObs: True + +sim: + dt: 0.0083 # 1/120 s + use_gpu_pipeline: ${eq:${...pipeline},"gpu"} + gravity: [0.0, 0.0, -9.81] + add_ground_plane: True + use_flatcache: True + enable_scene_query_support: False + disable_contact_processing: False + + # set to True if you use camera sensors in the environment + enable_cameras: False + + default_physics_material: + static_friction: 1.0 + dynamic_friction: 1.0 + restitution: 0.0 + + physx: + worker_thread_count: ${....num_threads} + solver_type: 0 # use pgs because otherwise target velocities is not stable 0: pgs, 1: tgs default: ${....solver_type} + use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU + solver_position_iteration_count: 12 + solver_velocity_iteration_count: 6 + contact_offset: 0.005 + rest_offset: 0.0 + bounce_threshold_velocity: 0.2 + friction_offset_threshold: 0.04 + friction_correlation_distance: 0.025 + enable_sleeping: True + enable_stabilization: True + max_depenetration_velocity: 1000.0 + + # GPU buffers + gpu_max_rigid_contact_count: 524288 + gpu_max_rigid_patch_count: 33554432 + gpu_found_lost_pairs_capacity: 524288 + gpu_found_lost_aggregate_pairs_capacity: 262144 + gpu_total_aggregate_pairs_capacity: 1048576 + gpu_max_soft_body_contacts: 1048576 + gpu_max_particle_contacts: 1048576 + gpu_heap_capacity: 33554432 + gpu_temp_buffer_capacity: 16777216 + gpu_max_num_partitions: 8 + + mobile_franka: + # -1 to use default values + override_usd_defaults: False + fixed_base: False + enable_self_collisions: True + enable_gyroscopic_forces: True + # also in stage params + # per-actor + solver_position_iteration_count: 12 + solver_velocity_iteration_count: 1 + sleep_threshold: 0.005 + stabilization_threshold: 0.001 + # per-body + density: -1 + max_depenetration_velocity: 1000.0 + # per-shape + contact_offset: 0.005 + rest_offset: 0.0 + cabinet: + # -1 to use default values + override_usd_defaults: False + fixed_base: False + enable_self_collisions: False + enable_gyroscopic_forces: True + # also in stage params + # per-actor + solver_position_iteration_count: 12 + solver_velocity_iteration_count: 1 + sleep_threshold: 0.0 + stabilization_threshold: 0.001 + # per-body + density: -1 + max_depenetration_velocity: 1000.0 + # per-shape + contact_offset: 0.005 + rest_offset: 0.0 + prop: + # -1 to use default values + override_usd_defaults: False + fixed_base: False + enable_self_collisions: False + enable_gyroscopic_forces: True + # also in stage params + # per-actor + solver_position_iteration_count: 12 + solver_velocity_iteration_count: 1 + sleep_threshold: 0.005 + stabilization_threshold: 0.001 + # per-body + density: 100 + max_depenetration_velocity: 1000.0 + # per-shape + contact_offset: 0.005 + rest_offset: 0.0 diff --git a/omniisaacgymenvs/cfg/train/MobileFrankaMARL_cvPPO.yaml b/omniisaacgymenvs/cfg/train/MobileFrankaMARL_cvPPO.yaml new file mode 100644 index 00000000..25664d4c --- /dev/null +++ b/omniisaacgymenvs/cfg/train/MobileFrankaMARL_cvPPO.yaml @@ -0,0 +1,88 @@ +params: + seed: ${...seed} + algo: + name: a2c_continuous + + model: + name: continuous_a2c_logstd + + network: + name: actor_critic + separate: False + + space: + continuous: + mu_activation: None + sigma_activation: None + mu_init: + name: default + sigma_init: + name: const_initializer + val: 0 + fixed_sigma: True + mlp: + units: [512, 256, 128] #[256, 128, 64] + activation: elu + d2rl: False + + initializer: + name: default + regularizer: + name: None + + load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint + load_path: ${...checkpoint} # path to the checkpoint to load + + config: + name: ${resolve_default:MobileFrankaMARL,${....experiment}} + full_experiment_name: ${.name} + env_name: rlgpu + device: ${....rl_device} + device_name: ${....rl_device} + ppo: True + mixed_precision: False + normalize_input: True + normalize_value: True + num_actors: ${....task.env.numEnvs} + reward_shaper: + scale_value: 0.01 + normalize_advantage: True + gamma: 0.95 + tau: 0.95 + learning_rate: 3e-4 + lr_schedule: adaptive + kl_threshold: 0.008 + score_to_win: 100000000 + max_epochs: ${resolve_default:1500,${....max_iterations}} + save_best_after: 200 + save_frequency: 100 + print_stats: True + grad_norm: 1.0 + entropy_coef: 0.0 + truncate_grads: True + e_clip: 0.2 + horizon_length: 16 + minibatch_size: 4096 #128 #1024 + mini_epochs: 8 + critic_coef: 4 + clip_value: True + seq_len: 4 + bounds_loss_coef: 0.0001 + + central_value_config: + minibatch_size: 2048 + mini_epochs: 4 + learning_rate: 3e-4 + clip_value: False + normalize_input: True + network: + name: actor_critic + central_value: True + mlp: + units: [512, 256, 128] + activation: elu + initializer: + name: default + scale: 2 + regularizer: + name: None diff --git a/omniisaacgymenvs/tasks/mobile_franka_marl.py b/omniisaacgymenvs/tasks/mobile_franka_marl.py index c0e2d1d5..5dead676 100644 --- a/omniisaacgymenvs/tasks/mobile_franka_marl.py +++ b/omniisaacgymenvs/tasks/mobile_franka_marl.py @@ -65,6 +65,8 @@ def __init__( self.action_penalty_scale = self._task_cfg["env"]["actionPenaltyScale"] self.finger_close_reward_scale = self._task_cfg["env"]["fingerCloseRewardScale"] + self.use_local_obs = self._task_cfg["env"]["useLocalObs"] + self.distX_offset = 0.04 #self.dt = 1/60. # these values depend on the task and how we interface with the real robot @@ -75,6 +77,10 @@ def __init__( self._num_actions = 9 self._num_agents = 2 + if self.use_local_obs: + self._num_observations = 26 + self._num_states = 27 + 3 #27 + self.initial_target_pos = np.array([2.0, 0.0, 0.5]) # set the ranges for the target randomization @@ -243,24 +249,56 @@ def get_observations(self) -> dict: self.to_target = self.target_positions - self.franka_lfinger_pos - obs = torch.hstack(( - base_pos_xy, - base_yaw, - arm_dof_pos_scaled, - #base_vel_xy, - #base_angvel_z, - franka_dof_vel[:, 3:] * self.dof_vel_scale, - self.franka_lfinger_pos, - self.target_positions - )).to(dtype=torch.float32) + if self.use_local_obs: + # pad base_obs with zeros to match the arm_obs + base_obs = torch.hstack(( + base_pos_xy, + base_yaw, + self.franka_lfinger_pos, + self.target_positions, + torch.zeros((self.num_envs, 15), device=self._device) + )).to(dtype=torch.float32) + + arm_obs = torch.hstack(( + arm_dof_pos_scaled, + franka_dof_vel[:, 3:] * self.dof_vel_scale, + self.franka_lfinger_pos, + self.target_positions + )).to(dtype=torch.float32) + + self.states_buf = torch.hstack(( + base_pos_xy, + base_yaw, + base_vel_xy, + base_angvel_z, + arm_dof_pos_scaled, + franka_dof_vel[:, 3:] * self.dof_vel_scale, + self.franka_lfinger_pos, + self.target_positions + )).to(dtype=torch.float32) + + else: + obs = torch.hstack(( + base_pos_xy, + base_yaw, + arm_dof_pos_scaled, + #base_vel_xy, + #base_angvel_z, + franka_dof_vel[:, 3:] * self.dof_vel_scale, + self.franka_lfinger_pos, + self.target_positions + )).to(dtype=torch.float32) + + base_obs = obs + arm_obs = obs #print("obs", obs) #input() base_id = torch.tensor([1.0, 0.0], device=self._device) arm_id = torch.tensor([0.0, 1.0], device=self._device) - base_obs = torch.hstack((obs, base_id.repeat(self.num_envs, 1))) - arm_obs = torch.hstack((obs, arm_id.repeat(self.num_envs, 1))) + base_obs = torch.hstack((base_obs, base_id.repeat(self.num_envs, 1))) + arm_obs = torch.hstack((arm_obs, arm_id.repeat(self.num_envs, 1))) self.obs_buf = torch.vstack((base_obs, arm_obs)) diff --git a/omniisaacgymenvs/utils/task_util.py b/omniisaacgymenvs/utils/task_util.py index 78598c77..5f9f1234 100644 --- a/omniisaacgymenvs/utils/task_util.py +++ b/omniisaacgymenvs/utils/task_util.py @@ -65,7 +65,8 @@ def initialize_task(config, env, init_sim=True): "Jetbot_CNN": JetbotTask, "FrankaExample": FrankaExampleTask, "MobileFranka": MobileFrankaTask, - "MobileFrankaMARL": MobileFrankaMARLTask + "MobileFrankaMARL": MobileFrankaMARLTask, + "MobileFrankaMARL_cv": MobileFrankaMARLTask } from .config_utils.sim_config import SimConfig