Skip to content

Commit

Permalink
Add option for local obs for MAPPO with CTDE
Browse files Browse the repository at this point in the history
  • Loading branch information
ranzuh committed May 3, 2023
1 parent 90d938f commit 80cde17
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 13 deletions.
2 changes: 2 additions & 0 deletions omniisaacgymenvs/cfg/task/MobileFrankaMARL.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ env:
actionPenaltyScale: 0.01
fingerCloseRewardScale: 10.0

useLocalObs: False

sim:
dt: 0.0083 # 1/120 s
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
Expand Down
133 changes: 133 additions & 0 deletions omniisaacgymenvs/cfg/task/MobileFrankaMARL_cv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# used to create the object
name: MobileFrankaMARL

physics_engine: ${..physics_engine}

# if given, will override the device setting in gym.
env:
numEnvs: ${resolve_default:512,${...num_envs}}
envSpacing: 3.0
episodeLength: 500 # maybe need to play with this
enableDebugVis: False

clipObservations: 7.0
clipActions: 1.0

controlFrequencyInv: 2 # 30 Hz 2 for 60 hz, maybe need to play with this too

startPositionNoise: 0.0
startRotationNoise: 0.0

numProps: 4
aggregateMode: 3

actionScale: 7.5
dofVelocityScale: 0.1
distRewardScale: 2.0
rotRewardScale: 0.5
aroundHandleRewardScale: 10.0
openRewardScale: 7.5
fingerDistRewardScale: 100.0
actionPenaltyScale: 0.01
fingerCloseRewardScale: 10.0

useLocalObs: True

sim:
dt: 0.0083 # 1/120 s
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
gravity: [0.0, 0.0, -9.81]
add_ground_plane: True
use_flatcache: True
enable_scene_query_support: False
disable_contact_processing: False

# set to True if you use camera sensors in the environment
enable_cameras: False

default_physics_material:
static_friction: 1.0
dynamic_friction: 1.0
restitution: 0.0

physx:
worker_thread_count: ${....num_threads}
solver_type: 0 # use pgs because otherwise target velocities is not stable 0: pgs, 1: tgs default: ${....solver_type}
use_gpu: ${eq:${....sim_device},"gpu"} # set to False to run on CPU
solver_position_iteration_count: 12
solver_velocity_iteration_count: 6
contact_offset: 0.005
rest_offset: 0.0
bounce_threshold_velocity: 0.2
friction_offset_threshold: 0.04
friction_correlation_distance: 0.025
enable_sleeping: True
enable_stabilization: True
max_depenetration_velocity: 1000.0

# GPU buffers
gpu_max_rigid_contact_count: 524288
gpu_max_rigid_patch_count: 33554432
gpu_found_lost_pairs_capacity: 524288
gpu_found_lost_aggregate_pairs_capacity: 262144
gpu_total_aggregate_pairs_capacity: 1048576
gpu_max_soft_body_contacts: 1048576
gpu_max_particle_contacts: 1048576
gpu_heap_capacity: 33554432
gpu_temp_buffer_capacity: 16777216
gpu_max_num_partitions: 8

mobile_franka:
# -1 to use default values
override_usd_defaults: False
fixed_base: False
enable_self_collisions: True
enable_gyroscopic_forces: True
# also in stage params
# per-actor
solver_position_iteration_count: 12
solver_velocity_iteration_count: 1
sleep_threshold: 0.005
stabilization_threshold: 0.001
# per-body
density: -1
max_depenetration_velocity: 1000.0
# per-shape
contact_offset: 0.005
rest_offset: 0.0
cabinet:
# -1 to use default values
override_usd_defaults: False
fixed_base: False
enable_self_collisions: False
enable_gyroscopic_forces: True
# also in stage params
# per-actor
solver_position_iteration_count: 12
solver_velocity_iteration_count: 1
sleep_threshold: 0.0
stabilization_threshold: 0.001
# per-body
density: -1
max_depenetration_velocity: 1000.0
# per-shape
contact_offset: 0.005
rest_offset: 0.0
prop:
# -1 to use default values
override_usd_defaults: False
fixed_base: False
enable_self_collisions: False
enable_gyroscopic_forces: True
# also in stage params
# per-actor
solver_position_iteration_count: 12
solver_velocity_iteration_count: 1
sleep_threshold: 0.005
stabilization_threshold: 0.001
# per-body
density: 100
max_depenetration_velocity: 1000.0
# per-shape
contact_offset: 0.005
rest_offset: 0.0
88 changes: 88 additions & 0 deletions omniisaacgymenvs/cfg/train/MobileFrankaMARL_cvPPO.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
params:
seed: ${...seed}
algo:
name: a2c_continuous

model:
name: continuous_a2c_logstd

network:
name: actor_critic
separate: False

space:
continuous:
mu_activation: None
sigma_activation: None
mu_init:
name: default
sigma_init:
name: const_initializer
val: 0
fixed_sigma: True
mlp:
units: [512, 256, 128] #[256, 128, 64]
activation: elu
d2rl: False

initializer:
name: default
regularizer:
name: None

load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
load_path: ${...checkpoint} # path to the checkpoint to load

config:
name: ${resolve_default:MobileFrankaMARL,${....experiment}}
full_experiment_name: ${.name}
env_name: rlgpu
device: ${....rl_device}
device_name: ${....rl_device}
ppo: True
mixed_precision: False
normalize_input: True
normalize_value: True
num_actors: ${....task.env.numEnvs}
reward_shaper:
scale_value: 0.01
normalize_advantage: True
gamma: 0.95
tau: 0.95
learning_rate: 3e-4
lr_schedule: adaptive
kl_threshold: 0.008
score_to_win: 100000000
max_epochs: ${resolve_default:1500,${....max_iterations}}
save_best_after: 200
save_frequency: 100
print_stats: True
grad_norm: 1.0
entropy_coef: 0.0
truncate_grads: True
e_clip: 0.2
horizon_length: 16
minibatch_size: 4096 #128 #1024
mini_epochs: 8
critic_coef: 4
clip_value: True
seq_len: 4
bounds_loss_coef: 0.0001

central_value_config:
minibatch_size: 2048
mini_epochs: 4
learning_rate: 3e-4
clip_value: False
normalize_input: True
network:
name: actor_critic
central_value: True
mlp:
units: [512, 256, 128]
activation: elu
initializer:
name: default
scale: 2
regularizer:
name: None
62 changes: 50 additions & 12 deletions omniisaacgymenvs/tasks/mobile_franka_marl.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def __init__(
self.action_penalty_scale = self._task_cfg["env"]["actionPenaltyScale"]
self.finger_close_reward_scale = self._task_cfg["env"]["fingerCloseRewardScale"]

self.use_local_obs = self._task_cfg["env"]["useLocalObs"]

self.distX_offset = 0.04
#self.dt = 1/60.
# these values depend on the task and how we interface with the real robot
Expand All @@ -75,6 +77,10 @@ def __init__(
self._num_actions = 9
self._num_agents = 2

if self.use_local_obs:
self._num_observations = 26
self._num_states = 27 + 3 #27

self.initial_target_pos = np.array([2.0, 0.0, 0.5])

# set the ranges for the target randomization
Expand Down Expand Up @@ -243,24 +249,56 @@ def get_observations(self) -> dict:

self.to_target = self.target_positions - self.franka_lfinger_pos

obs = torch.hstack((
base_pos_xy,
base_yaw,
arm_dof_pos_scaled,
#base_vel_xy,
#base_angvel_z,
franka_dof_vel[:, 3:] * self.dof_vel_scale,
self.franka_lfinger_pos,
self.target_positions
)).to(dtype=torch.float32)
if self.use_local_obs:
# pad base_obs with zeros to match the arm_obs
base_obs = torch.hstack((
base_pos_xy,
base_yaw,
self.franka_lfinger_pos,
self.target_positions,
torch.zeros((self.num_envs, 15), device=self._device)
)).to(dtype=torch.float32)

arm_obs = torch.hstack((
arm_dof_pos_scaled,
franka_dof_vel[:, 3:] * self.dof_vel_scale,
self.franka_lfinger_pos,
self.target_positions
)).to(dtype=torch.float32)

self.states_buf = torch.hstack((
base_pos_xy,
base_yaw,
base_vel_xy,
base_angvel_z,
arm_dof_pos_scaled,
franka_dof_vel[:, 3:] * self.dof_vel_scale,
self.franka_lfinger_pos,
self.target_positions
)).to(dtype=torch.float32)

else:
obs = torch.hstack((
base_pos_xy,
base_yaw,
arm_dof_pos_scaled,
#base_vel_xy,
#base_angvel_z,
franka_dof_vel[:, 3:] * self.dof_vel_scale,
self.franka_lfinger_pos,
self.target_positions
)).to(dtype=torch.float32)

base_obs = obs
arm_obs = obs

#print("obs", obs)
#input()

base_id = torch.tensor([1.0, 0.0], device=self._device)
arm_id = torch.tensor([0.0, 1.0], device=self._device)
base_obs = torch.hstack((obs, base_id.repeat(self.num_envs, 1)))
arm_obs = torch.hstack((obs, arm_id.repeat(self.num_envs, 1)))
base_obs = torch.hstack((base_obs, base_id.repeat(self.num_envs, 1)))
arm_obs = torch.hstack((arm_obs, arm_id.repeat(self.num_envs, 1)))

self.obs_buf = torch.vstack((base_obs, arm_obs))

Expand Down
3 changes: 2 additions & 1 deletion omniisaacgymenvs/utils/task_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def initialize_task(config, env, init_sim=True):
"Jetbot_CNN": JetbotTask,
"FrankaExample": FrankaExampleTask,
"MobileFranka": MobileFrankaTask,
"MobileFrankaMARL": MobileFrankaMARLTask
"MobileFrankaMARL": MobileFrankaMARLTask,
"MobileFrankaMARL_cv": MobileFrankaMARLTask
}

from .config_utils.sim_config import SimConfig
Expand Down

0 comments on commit 80cde17

Please sign in to comment.