From 9407c5d11e57ee1aa5556f155d0f5c51b3a42a3b Mon Sep 17 00:00:00 2001 From: Mihir Kulkarni Date: Tue, 17 Dec 2024 16:59:07 +0100 Subject: [PATCH] slight tuning of training params and reward for morphy Signed-off-by: Mihir Kulkarni --- aerial_gym/rl_training/rl_games/ppo_aerial_quad.yaml | 6 +++--- .../position_setpoint_task_morphy.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aerial_gym/rl_training/rl_games/ppo_aerial_quad.yaml b/aerial_gym/rl_training/rl_games/ppo_aerial_quad.yaml index c7f1467..5fcc61b 100644 --- a/aerial_gym/rl_training/rl_games/ppo_aerial_quad.yaml +++ b/aerial_gym/rl_training/rl_games/ppo_aerial_quad.yaml @@ -52,9 +52,9 @@ params: truncate_grads: True e_clip: 0.2 clip_value: False - num_actors: 8192 - horizon_length: 16 - minibatch_size: 16384 + num_actors: 4096 + horizon_length: 32 + minibatch_size: 8192 mini_epochs: 4 critic_coef: 2 normalize_input: False diff --git a/aerial_gym/task/position_setpoint_task_morphy/position_setpoint_task_morphy.py b/aerial_gym/task/position_setpoint_task_morphy/position_setpoint_task_morphy.py index 404506d..5dec707 100644 --- a/aerial_gym/task/position_setpoint_task_morphy/position_setpoint_task_morphy.py +++ b/aerial_gym/task/position_setpoint_task_morphy/position_setpoint_task_morphy.py @@ -178,8 +178,8 @@ def compute_reward( action_difference = prev_actions - current_action - absolute_action_reward = -0.15 * torch.sum((current_action[:, :4] - 0.711225) ** 2, dim=1) - action_difference_reward = torch.sum(exp_penalty_func(action_difference, 0.3, 10.0), dim=1) + absolute_action_reward = -0.05 * torch.sum((current_action[:, :4] - 0.711225) ** 2, dim=1) + action_difference_reward = torch.sum(exp_penalty_func(action_difference, 0.2, 5.0), dim=1) joint_vel_reward = torch.sum(exp_penalty_func(joint_vels, 0.30, 30.0), dim=1)