finish

inFpZero · Jun 18, 2024 · 6e53e3e · 6e53e3e
1 parent 25028ed
commit 6e53e3e
Show file tree

Hide file tree

Showing 38 changed files with 858,117 additions and 135 deletions.
diff --git a/README.md b/README.md
@@ -11,8 +11,8 @@ cd Locomotion_Baseline
 # Download the Isaac Gym binaries from https://developer.nvidia.com/isaac-gym 
 # Originally trained with Preview3, but haven't seen bugs using Preview4.
 cd isaacgym/python && pip install -e .
-cd ~/extreme-parkour/rsl_rl && pip install -e .
-cd ~/extreme-parkour/legged_gym && pip install -e .
+cd /rsl_rl && pip install -e .
+cd /legged_gym_train && pip install -e .
 pip install "numpy<1.24" pydelatin swanlab tqdm opencv-python ipdb pyfqmr flask
 ```
 
@@ -58,12 +58,11 @@ Can be used in both IsaacGym and web viewer.
 ### Arguments
 - --exptid: string, can be `xxx-xx-WHATEVER`, `xxx-xx` is typically numbers only. `WHATEVER` is the description of the run. 
 - --device: can be `cuda:0`, `cpu`, etc.
-- --delay: whether add delay or not.
 - --checkpoint: the specific checkpoint you want to load. If not specified load the latest one.
 - --resume: resume from another checkpoint, used together with `--resumeid`.
 - --seed: random seed.
 - --no_swanlab: no swanlab logging.
-- --use_camera: use camera or scandots.
+- --debug: debug mode.
 - --web: used for playing on headless machines. It will forward a port with vscode and you can visualize seemlessly in vscode with your idle gpu or cpu. [Live Preview](https://marketplace.visualstudio.com/items?itemName=ms-vscode.live-server) vscode extension required, otherwise you can view it in any browser.
 
 ### todo

diff --git a/legged_deploy/scripts/diff_policy.py b/legged_deploy/scripts/diff_policy.py
@@ -105,8 +105,6 @@ def policy(obs,if_distill = True):
             actions = distill_actor(obs.to('cpu'), hist_encoding=True, scandots_latent = latent)
             # print(actions)
 
-            # else:
-            #     actions = ac.act_inference(obs.to('cpu'), hist_encoding=True)   
             return actions
 
     elif if_md_ac:

diff --git a/legged_train/legged_gym/envs/__init__.py b/legged_train/legged_gym/envs/__init__.py
@@ -29,28 +29,15 @@
 # Copyright (c) 2021 ETH Zurich, Nikita Rudin
 
 from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
-from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO
 from .base.legged_robot import LeggedRobot
-from .anymal_c.anymal import Anymal
-from .anymal_c.mixed_terrains.anymal_c_rough_config import AnymalCRoughCfg, AnymalCRoughCfgPPO
-from .anymal_c.flat.anymal_c_flat_config import AnymalCFlatCfg, AnymalCFlatCfgPPO
-from .anymal_b.anymal_b_config import AnymalBRoughCfg, AnymalBRoughCfgPPO
-from .cassie.cassie import Cassie
-from .cassie.cassie_config import CassieRoughCfg, CassieRoughCfgPPO
-from .a1.a1_config import A1RoughCfg, A1RoughCfgPPO
+
 from .a1.a1_parkour_config import A1ParkourCfg, A1ParkourCfgPPO
 from .go1.go1_config import Go1RoughCfg, Go1RoughCfgPPO
 from .wow.wow_config import wowRoughCfg, wowCfgPPO
 from .wow.wow import Wow
 
-import os
-
 from legged_gym.utils.task_registry import task_registry
 
-# task_registry.register( "anymal_c_rough", Anymal, AnymalCRoughCfg(), AnymalCRoughCfgPPO() )
-# task_registry.register( "anymal_c_flat", Anymal, AnymalCFlatCfg(), AnymalCFlatCfgPPO() )
-# task_registry.register( "anymal_b", Anymal, AnymalBRoughCfg(), AnymalBRoughCfgPPO() )
-# task_registry.register( "cassie", Cassie, CassieRoughCfg(), CassieRoughCfgPPO() )
 task_registry.register( "a1", LeggedRobot, A1ParkourCfg(), A1ParkourCfgPPO() )
 task_registry.register( "go1", LeggedRobot, Go1RoughCfg(), Go1RoughCfgPPO() )
 task_registry.register( "wow", Wow, wowRoughCfg(), wowCfgPPO() )

diff --git a/legged_train/legged_gym/envs/base/legged_robot.py b/legged_train/legged_gym/envs/base/legged_robot.py
@@ -140,7 +140,7 @@ def step(self, actions):
         self.render()
         for _ in range(self.cfg.control.decimation):
             self.torques = self._compute_torques(self.delayed_actions[:, _]).view(self.torques.shape)
-            # print(self.torques[0,:])
+            # print('torque',self.torques[0,:])
             # self.torques[0,1]=30
             self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques))
             self.gym.simulate(self.sim)
@@ -179,26 +179,25 @@ def post_physics_step(self):
 
         self.roll, self.pitch, self.yaw = euler_from_quaternion(self.base_quat)
 
-        self.foot_velocities = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 7:10]
-        self.foot_positions = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 0:3]
-
         contact = torch.norm(self.contact_forces, dim=-1) > 1.
         self.contact_filt = torch.logical_or(contact, self.last_contacts) 
         self.last_contacts = contact
 
+        # ---feet observation--
         feet_contact = self.contact_forces[:, self.feet_indices, 2] > 1.
-        # print('li',self.contact_forces[0, self.feet_indices, 2])
         self.feet_contact_filt = torch.logical_or(feet_contact, self.last_feet_contacts) 
-        # print(self.feet_contact_filt[0])
         self.last_feet_contacts = feet_contact
         self.feet_history_contact_flag = torch.cat([self.feet_history_contact_flag[:,1:].clone(), self.feet_contact_filt[:,None, :].clone()], dim=1)
 
-        feet_quat = self.rigid_body_states[:, self.feet_indices, 3:7] 
+        feet_quat = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 3:7]
         left_feet_roll, left_feet_pitch, left_feet_yaw = euler_from_quaternion(feet_quat[:,0])
         right_feet_roll, right_feet_pitch, right_feet_yaw = euler_from_quaternion(feet_quat[:,1])
         self.feet_rpy[:,0,:] = torch.stack((left_feet_roll, left_feet_pitch, left_feet_yaw), dim=1)
         self.feet_rpy[:,1,:] = torch.stack((right_feet_roll, right_feet_pitch, right_feet_yaw), dim=1)
 
+        self.foot_velocities = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 7:10]
+        self.foot_positions = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 0:3]
+        self.foot_positions_inbody = self.foot_positions - self.root_states[:, None, 0:3]
         self._post_physics_step_callback()
 
         # compute observations, rewards, resets, ...
@@ -358,7 +357,6 @@ def compute_observations(self):
                                     self.dof_vel * self.obs_scales.dof_vel,
                                     self.actions,
                                     ),dim=-1)
-
         self.obs_history_buf = torch.where(
             (self.episode_length_buf <= 1)[:, None, None], 
             torch.stack([obs_buf] * self.cfg.env.history_len, dim=1),
@@ -586,8 +584,6 @@ def _resample_commands(self, env_ids):
             line_vel_ids = env_ids[envs_ids_parts==2]
 
             self.commands[yaw_ids, 2] = torch_rand_float(self.command_ranges["ang_vel_yaw"][0], self.command_ranges["ang_vel_yaw"][1], (len(yaw_ids), 1), device=self.device).squeeze(1)
-            # self.commands[yaw_ids, 2] *= torch.abs(self.commands[yaw_ids, 2]) > self.cfg.commands.ang_vel_clip
-
 
             self.commands[line_vel_ids, 0] = torch_rand_float(self.command_ranges["lin_vel_x"][0], self.command_ranges["lin_vel_x"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
             self.commands[line_vel_ids, 1] = torch_rand_float(self.command_ranges["lin_vel_y"][0], self.command_ranges["lin_vel_y"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
@@ -596,37 +592,7 @@ def _resample_commands(self, env_ids):
             else:
                 self.commands[line_vel_ids, 2] = torch_rand_float(self.command_ranges["ang_vel_yaw"][0], self.command_ranges["ang_vel_yaw"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
             # set stand command while vel command is too small
-            # self.commands[line_vel_ids, 0:2] *= torch.abs(self.commands[line_vel_ids, 0:1]) > self.cfg.commands.lin_vel_clip
-            # self.commands[line_vel_ids, 2] *= torch.abs(self.commands[line_vel_ids, 2]) > self.cfg.commands.ang_vel_clip
-
-
-            # #将一些环境用于单独训练，前进后退、横向移动、旋转
-            # num_of_single_command_env= math.ceil(len(env_ids) / 4)  
-            # single_command_indices = torch.randperm(len(env_ids))[:num_of_single_command_env]  
-
-            # #单独旋转训练
-            # env_ids_to_ang = env_ids[single_command_indices]  
-            # self.commands[env_ids_to_ang, 0] = torch.zeros_like(self.commands[env_ids_to_ang, 0])
-            # self.commands[env_ids_to_ang, 1] = torch.zeros_like(self.commands[env_ids_to_ang, 1])
-
-            # #从旋转数个体中选取一部分，单独前后移动训练
-            # num_of_single_command_env= math.ceil(len(env_ids_to_ang) / 2)  
-            # single_command_indices = torch.randperm(len(env_ids_to_ang))[:num_of_single_command_env]   
-
-            # env_ids_to_back_and_forward=env_ids[single_command_indices]  
-            # self.commands[env_ids_to_back_and_forward, 0] = torch_rand_float(self.command_ranges["lin_vel_x"][0], self.command_ranges["lin_vel_x"][1], (len(env_ids_to_back_and_forward), 1), device=self.device).squeeze(1)
-            # self.commands[env_ids_to_back_and_forward, 1] = torch.zeros_like(self.commands[env_ids_to_back_and_forward, 1])
-            # self.commands[env_ids_to_back_and_forward, 3] = torch.zeros_like(self.commands[env_ids_to_back_and_forward, 3])  
-
-            # #从前后移动个体中，单独横向移动训练
-            # num_of_single_command_env= math.ceil(len(env_ids_to_back_and_forward) / 3)  
-            # single_command_indices = torch.randperm(len(env_ids_to_back_and_forward))[:num_of_single_command_env]  
-
-            # env_ids_to_sldle=env_ids[single_command_indices]  
-            # self.commands[env_ids_to_sldle, 0] = torch.zeros_like(self.commands[env_ids_to_sldle, 0])
-            # self.commands[env_ids_to_sldle, 1] = torch_rand_float(self.command_ranges["lin_vel_y"][0], self.command_ranges["lin_vel_y"][1], (len(env_ids_to_sldle), 1), device=self.device).squeeze(1)
-            # self.commands[env_ids_to_sldle, 3] = torch.zeros_like(self.commands[env_ids_to_sldle, 3]
-
+
 
     def _compute_torques(self, actions):
         """ Compute torques from actions.
@@ -932,9 +898,7 @@ def _init_buffers(self):
         for i in range(self.num_dofs):
             name = self.dof_names[i]
             angle = self.cfg.init_state.default_joint_angles[name]
-            target_angle = self.cfg.init_state.target_joint_angles[name]
             self.default_dof_pos[i] = angle
-            self.target_dof_pos[i] = target_angle
             found = False
             for dof_name in self.cfg.control.stiffness.keys():
                 if dof_name in name:

diff --git a/legged_train/legged_gym/envs/base/legged_robot_config.py b/legged_train/legged_gym/envs/base/legged_robot_config.py
@@ -274,7 +274,7 @@ class control:
         # action scale: target angle = actionScale * action + defaultAngle
         action_scale = 0.5
         # decimation: Number of control action updates @ sim DT per policy DT
-        decimation = 4
+        decimation = 20
 
     class asset:
         file = ""
@@ -344,7 +344,7 @@ class domain_rand:
 
         ##--------------------------
 
-        action_delay = True
+        action_delay = False
         delay_inject_steps = 24 * 1
 
 
@@ -368,7 +368,7 @@ class viewer:
         lookat = [11., 5, 3.]  # [m]
 
     class sim:
-        dt =  0.005
+        dt =  0.001
         substeps = 1
         gravity = [0., 0. ,-9.81]  # [m/s^2]
         up_axis = 1  # 0 is y, 1 is z

diff --git a/legged_train/legged_gym/envs/wow/wow.py b/legged_train/legged_gym/envs/wow/wow.py
@@ -79,7 +79,6 @@ def compute_observations(self):
                                     self.dof_vel * self.obs_scales.dof_vel,
                                     self.actions,
                                     ),dim=-1)
-
         self.obs_history_buf = torch.where(
             (self.episode_length_buf <= 1)[:, None, None], 
             torch.stack([obs_buf] * self.cfg.env.history_len, dim=1),
@@ -130,12 +129,12 @@ def _reward_tracking_y_line_vel(self):
 
     def _reward_tracking_ang_vel(self):
         # Tracking of angular velocity commands (yaw) 
-        ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
-        ang_vel_error[self.stand_envs_ids] = torch.abs(self.commands[self.stand_envs_ids, 2] - self.base_ang_vel[self.stand_envs_ids, 2]) #stand error
+        ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2])
+        # ang_vel_error[self.stand_envs_ids] = torch.abs(self.commands[self.stand_envs_ids, 2] - self.base_ang_vel[self.stand_envs_ids, 2]) #stand error
         return torch.exp(-ang_vel_error/self.cfg.rewards.tracking_sigma)
 
     def _reward_roll_pitch_orient(self):
-        rew = torch.sum(torch.abs(self.projected_gravity[:, :2]), dim=1)
+        rew = 30 * torch.sum(torch.abs(self.imu_obs[:, :2]), dim=1)
         return torch.exp(-rew/self.cfg.rewards.orient_tracking_sigma)
 
     def _reward_base_height(self):
@@ -169,7 +168,7 @@ def _reward_feet_orientation(self):
 
     def _reward_feet_position(self):
         rew_pos_feet = torch.ones(self.num_envs,dtype=torch.float, device=self.device, requires_grad=False)
-        rew_pos_feet[self.stand_envs_ids] = torch.exp(-3*torch.sum(torch.abs(self.dof_pos[self.stand_envs_ids,:]-self.default_dof_pos_all[self.stand_envs_ids,:]),dim=1))
+        rew_pos_feet[self.stand_envs_ids] = torch.exp(-3*torch.sum(torch.abs(self.foot_positions_inbody[self.stand_envs_ids,:,0:2]-self.default_dof_pos_all[self.stand_envs_ids,:]),dim=1))
         return rew_pos_feet
 
     def _reward_base_acc(self):

diff --git a/legged_train/legged_gym/envs/wow/wow_config.py b/legged_train/legged_gym/envs/wow/wow_config.py
@@ -7,7 +7,7 @@ class env( LeggedRobotCfg.env):
         scan_dim = 11*11
         priv_dim = 3
         priv_latent_dim = 15 + 1 + 10 +10
-        proprio_dim = 2+ 3+ 3+ 3+ 30
+        proprio_dim = 2+ 3+ 30 +6
         history_len = 10
 
         num_observations = proprio_dim
@@ -22,27 +22,13 @@ class terrain(LeggedRobotCfg.terrain):
     class init_state( LeggedRobotCfg.init_state ):
         pos = [0.0, 0.0, 0.912] # x,y,z [m]
         default_joint_angles = { # = target angles [rad] when action = 0.0
-            'left_roll_joint': 0.1,   # [rad]
+            'left_roll_joint': 0.,   # [rad]
             'left_yaw_joint': 0,   # [rad]
             'left_pitch_joint': -0.3,  # [rad]
             'left_knee_joint': 0.6,   # [rad]
             'left_foot_joint': -0.3,
 
-            'right_roll_joint': -0.1,   # [rad]
-            'right_yaw_joint': 0,   # [rad]
-            'right_pitch_joint': 0.3 ,  # [rad]
-            'right_knee_joint': -0.6,   # [rad]
-            'right_foot_joint': 0.3,
-
-        }
-        target_joint_angles = { # = target angles [rad] when action = 0.0
-            'left_roll_joint': 0,   # [rad]
-            'left_yaw_joint': 0,   # [rad]
-            'left_pitch_joint': -0.3,  # [rad]
-            'left_knee_joint': 0.6,   # [rad]
-            'left_foot_joint': -0.3,
-
-            'right_roll_joint': 0,   # [rad]
+            'right_roll_joint': -0.,   # [rad]
             'right_yaw_joint': 0,   # [rad]
             'right_pitch_joint': 0.3 ,  # [rad]
             'right_knee_joint': -0.6,   # [rad]
@@ -63,7 +49,7 @@ class control( LeggedRobotCfg.control ):
         # action scale: target angle = actionScale * action + defaultAngle
         action_scale = 0.25
         # decimation: Number of control action updates @ sim DT per policy DT
-        decimation = 4
+        decimation = 20
 
     class asset( LeggedRobotCfg.asset ):
         file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/wow/urdf/wow.urdf'
@@ -78,8 +64,10 @@ class rewards( LeggedRobotCfg.rewards ):
         tracking_sigma = 0.2
         orient_tracking_sigma = 2
         soft_torque_limit = 0.9
-        soft_dof_pos_limit = 0.95
+        soft_dof_pos_limit = 0.90
         base_height_target = 0.86
+        target_foot_pos_x = [0. , 0.  ]
+        target_foot_pos_y = [0.9, -0.9]
         only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
         class scales:
             # tracking_x_line_vel = 0.2
@@ -97,25 +85,6 @@ class scales:
             # roll_yaw_position = 0.20
             # dof_pos_limits = -0.2
         #added
-
-
-            # # 站立reward
-            # tracking_ang_vel = 0.0
-            # tracking_lin_vel = 1.0
-            # lin_vel_z = -1
-            # lin_vel_x = -1
-            # lin_vel_y = -1
-            # ang_vel_xy = -0.00
-            # torques = -1.e-5
-            # dof_acc = -2.e-7
-            # action_rate = -0.01 #-0.01
-            # orientation = -5
-            # dof_pos_limits = -5.
-            # stand_still = 0.05
-            # hip_pos = -1
-            # base_height = 1.0 
-
-
             # termination = -10
             # base_height = -10
             # # feet_air = -1
@@ -132,32 +101,25 @@ class scales:
             # arm_position_stand=0.16
             roll_yaw_position = -0.5
             base_acc=0.02
-            # action_difference=0.02
             torques=0.5
             tracking_x_line_vel = 4
             tracking_y_line_vel = 3
             tracking_ang_vel = 1.
             dof_vel=-6e-5
             dof_acc=-2e-7
-            lin_vel_z = -0.8
+            lin_vel_z = -1
             ang_vel_xy = -0.05
             dof_pos_limits = -10
-            action_rate = -0.015 #-0.01
+            action_rate = -0.02 #-0.01
 
-            # foot_height=-0.3
             orientation = -2
-            feet_air_time=1.3
-            feet_contact = 1.2
-
-
-
-
-
+            feet_air_time = 4
+            feet_contact = 3
+            feet_orientation = 2
 
 class wowCfgPPO( LeggedRobotCfgPPO ):
     class policy:
         init_noise_std = 1.0
-        adapt_hidden_dims=[128,64,19]
         estimate_hidden_dims = [64,128,41],
         actor_hidden_dims = [512, 256, 128]
         critic_hidden_dims = [512, 256, 128]
@@ -167,7 +129,7 @@ class policy:
         rnn_hidden_size = 64
         rnn_num_layers = 2
 
-    class algorithm():
+    class algorithm:
         # training params
         value_loss_coef = 1.0
         use_clipped_value_loss = True
@@ -182,14 +144,14 @@ class algorithm():
         desired_kl = 0.01
         max_grad_norm = 1.
 
-    class runner():
+    class runner:
         policy_class_name = 'ActorCriticRecurrent' #ActorCriticRecurrent
         algorithm_class_name = 'PPO'
         num_steps_per_env = 24 # per iteration
-        max_iterations = 5000 # number of policy updates
+        max_iterations = 50000 # number of policy updates
 
         # logging
-        save_interval = 500 # check for potential saves every this many iterations
+        save_interval = 100 # check for potential saves every this many iterations
         experiment_name = 'test'
         run_name = ''
         # load and resume
@@ -198,11 +160,6 @@ class runner():
         checkpoint = -1 # -1 = last saved model
         resume_path = None # updated from load_run and chkpt
 
-    class estimator():
-        #train_with_estimated_states = True
-        learning_rate = 1.e-4
-        hidden_dims = [19, 128]
-
     class runner( LeggedRobotCfgPPO.runner ):
         run_name = ''
         experiment_name = 'wow'