Skip to content

Commit

Permalink
finish
Browse files Browse the repository at this point in the history
  • Loading branch information
ChengEeee committed Jun 18, 2024
1 parent 25028ed commit 6e53e3e
Show file tree
Hide file tree
Showing 38 changed files with 858,117 additions and 135 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ cd Locomotion_Baseline
# Download the Isaac Gym binaries from https://developer.nvidia.com/isaac-gym
# Originally trained with Preview3, but haven't seen bugs using Preview4.
cd isaacgym/python && pip install -e .
cd ~/extreme-parkour/rsl_rl && pip install -e .
cd ~/extreme-parkour/legged_gym && pip install -e .
cd /rsl_rl && pip install -e .
cd /legged_gym_train && pip install -e .
pip install "numpy<1.24" pydelatin swanlab tqdm opencv-python ipdb pyfqmr flask
```

Expand Down Expand Up @@ -58,12 +58,11 @@ Can be used in both IsaacGym and web viewer.
### Arguments
- --exptid: string, can be `xxx-xx-WHATEVER`, `xxx-xx` is typically numbers only. `WHATEVER` is the description of the run.
- --device: can be `cuda:0`, `cpu`, etc.
- --delay: whether add delay or not.
- --checkpoint: the specific checkpoint you want to load. If not specified load the latest one.
- --resume: resume from another checkpoint, used together with `--resumeid`.
- --seed: random seed.
- --no_swanlab: no swanlab logging.
- --use_camera: use camera or scandots.
- --debug: debug mode.
- --web: used for playing on headless machines. It will forward a port with vscode and you can visualize seemlessly in vscode with your idle gpu or cpu. [Live Preview](https://marketplace.visualstudio.com/items?itemName=ms-vscode.live-server) vscode extension required, otherwise you can view it in any browser.

### todo
Expand Down
2 changes: 0 additions & 2 deletions legged_deploy/scripts/diff_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ def policy(obs,if_distill = True):
actions = distill_actor(obs.to('cpu'), hist_encoding=True, scandots_latent = latent)
# print(actions)

# else:
# actions = ac.act_inference(obs.to('cpu'), hist_encoding=True)
return actions

elif if_md_ac:
Expand Down
15 changes: 1 addition & 14 deletions legged_train/legged_gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,15 @@
# Copyright (c) 2021 ETH Zurich, Nikita Rudin

from legged_gym import LEGGED_GYM_ROOT_DIR, LEGGED_GYM_ENVS_DIR
from legged_gym.envs.a1.a1_config import A1RoughCfg, A1RoughCfgPPO
from .base.legged_robot import LeggedRobot
from .anymal_c.anymal import Anymal
from .anymal_c.mixed_terrains.anymal_c_rough_config import AnymalCRoughCfg, AnymalCRoughCfgPPO
from .anymal_c.flat.anymal_c_flat_config import AnymalCFlatCfg, AnymalCFlatCfgPPO
from .anymal_b.anymal_b_config import AnymalBRoughCfg, AnymalBRoughCfgPPO
from .cassie.cassie import Cassie
from .cassie.cassie_config import CassieRoughCfg, CassieRoughCfgPPO
from .a1.a1_config import A1RoughCfg, A1RoughCfgPPO

from .a1.a1_parkour_config import A1ParkourCfg, A1ParkourCfgPPO
from .go1.go1_config import Go1RoughCfg, Go1RoughCfgPPO
from .wow.wow_config import wowRoughCfg, wowCfgPPO
from .wow.wow import Wow

import os

from legged_gym.utils.task_registry import task_registry

# task_registry.register( "anymal_c_rough", Anymal, AnymalCRoughCfg(), AnymalCRoughCfgPPO() )
# task_registry.register( "anymal_c_flat", Anymal, AnymalCFlatCfg(), AnymalCFlatCfgPPO() )
# task_registry.register( "anymal_b", Anymal, AnymalBRoughCfg(), AnymalBRoughCfgPPO() )
# task_registry.register( "cassie", Cassie, CassieRoughCfg(), CassieRoughCfgPPO() )
task_registry.register( "a1", LeggedRobot, A1ParkourCfg(), A1ParkourCfgPPO() )
task_registry.register( "go1", LeggedRobot, Go1RoughCfg(), Go1RoughCfgPPO() )
task_registry.register( "wow", Wow, wowRoughCfg(), wowCfgPPO() )
Expand Down
50 changes: 7 additions & 43 deletions legged_train/legged_gym/envs/base/legged_robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def step(self, actions):
self.render()
for _ in range(self.cfg.control.decimation):
self.torques = self._compute_torques(self.delayed_actions[:, _]).view(self.torques.shape)
# print(self.torques[0,:])
# print('torque',self.torques[0,:])
# self.torques[0,1]=30
self.gym.set_dof_actuation_force_tensor(self.sim, gymtorch.unwrap_tensor(self.torques))
self.gym.simulate(self.sim)
Expand Down Expand Up @@ -179,26 +179,25 @@ def post_physics_step(self):

self.roll, self.pitch, self.yaw = euler_from_quaternion(self.base_quat)

self.foot_velocities = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 7:10]
self.foot_positions = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 0:3]

contact = torch.norm(self.contact_forces, dim=-1) > 1.
self.contact_filt = torch.logical_or(contact, self.last_contacts)
self.last_contacts = contact

# ---feet observation--
feet_contact = self.contact_forces[:, self.feet_indices, 2] > 1.
# print('li',self.contact_forces[0, self.feet_indices, 2])
self.feet_contact_filt = torch.logical_or(feet_contact, self.last_feet_contacts)
# print(self.feet_contact_filt[0])
self.last_feet_contacts = feet_contact
self.feet_history_contact_flag = torch.cat([self.feet_history_contact_flag[:,1:].clone(), self.feet_contact_filt[:,None, :].clone()], dim=1)

feet_quat = self.rigid_body_states[:, self.feet_indices, 3:7]
feet_quat = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 3:7]
left_feet_roll, left_feet_pitch, left_feet_yaw = euler_from_quaternion(feet_quat[:,0])
right_feet_roll, right_feet_pitch, right_feet_yaw = euler_from_quaternion(feet_quat[:,1])
self.feet_rpy[:,0,:] = torch.stack((left_feet_roll, left_feet_pitch, left_feet_yaw), dim=1)
self.feet_rpy[:,1,:] = torch.stack((right_feet_roll, right_feet_pitch, right_feet_yaw), dim=1)

self.foot_velocities = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 7:10]
self.foot_positions = self.rigid_body_states.view(self.num_envs, self.num_bodies, 13)[:, self.feet_indices, 0:3]
self.foot_positions_inbody = self.foot_positions - self.root_states[:, None, 0:3]
self._post_physics_step_callback()

# compute observations, rewards, resets, ...
Expand Down Expand Up @@ -358,7 +357,6 @@ def compute_observations(self):
self.dof_vel * self.obs_scales.dof_vel,
self.actions,
),dim=-1)

self.obs_history_buf = torch.where(
(self.episode_length_buf <= 1)[:, None, None],
torch.stack([obs_buf] * self.cfg.env.history_len, dim=1),
Expand Down Expand Up @@ -586,8 +584,6 @@ def _resample_commands(self, env_ids):
line_vel_ids = env_ids[envs_ids_parts==2]

self.commands[yaw_ids, 2] = torch_rand_float(self.command_ranges["ang_vel_yaw"][0], self.command_ranges["ang_vel_yaw"][1], (len(yaw_ids), 1), device=self.device).squeeze(1)
# self.commands[yaw_ids, 2] *= torch.abs(self.commands[yaw_ids, 2]) > self.cfg.commands.ang_vel_clip


self.commands[line_vel_ids, 0] = torch_rand_float(self.command_ranges["lin_vel_x"][0], self.command_ranges["lin_vel_x"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
self.commands[line_vel_ids, 1] = torch_rand_float(self.command_ranges["lin_vel_y"][0], self.command_ranges["lin_vel_y"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
Expand All @@ -596,37 +592,7 @@ def _resample_commands(self, env_ids):
else:
self.commands[line_vel_ids, 2] = torch_rand_float(self.command_ranges["ang_vel_yaw"][0], self.command_ranges["ang_vel_yaw"][1], (len(line_vel_ids), 1), device=self.device).squeeze(1)
# set stand command while vel command is too small
# self.commands[line_vel_ids, 0:2] *= torch.abs(self.commands[line_vel_ids, 0:1]) > self.cfg.commands.lin_vel_clip
# self.commands[line_vel_ids, 2] *= torch.abs(self.commands[line_vel_ids, 2]) > self.cfg.commands.ang_vel_clip


# #将一些环境用于单独训练,前进后退、横向移动、旋转
# num_of_single_command_env= math.ceil(len(env_ids) / 4)
# single_command_indices = torch.randperm(len(env_ids))[:num_of_single_command_env]

# #单独旋转训练
# env_ids_to_ang = env_ids[single_command_indices]
# self.commands[env_ids_to_ang, 0] = torch.zeros_like(self.commands[env_ids_to_ang, 0])
# self.commands[env_ids_to_ang, 1] = torch.zeros_like(self.commands[env_ids_to_ang, 1])

# #从旋转数个体中选取一部分,单独前后移动训练
# num_of_single_command_env= math.ceil(len(env_ids_to_ang) / 2)
# single_command_indices = torch.randperm(len(env_ids_to_ang))[:num_of_single_command_env]

# env_ids_to_back_and_forward=env_ids[single_command_indices]
# self.commands[env_ids_to_back_and_forward, 0] = torch_rand_float(self.command_ranges["lin_vel_x"][0], self.command_ranges["lin_vel_x"][1], (len(env_ids_to_back_and_forward), 1), device=self.device).squeeze(1)
# self.commands[env_ids_to_back_and_forward, 1] = torch.zeros_like(self.commands[env_ids_to_back_and_forward, 1])
# self.commands[env_ids_to_back_and_forward, 3] = torch.zeros_like(self.commands[env_ids_to_back_and_forward, 3])

# #从前后移动个体中,单独横向移动训练
# num_of_single_command_env= math.ceil(len(env_ids_to_back_and_forward) / 3)
# single_command_indices = torch.randperm(len(env_ids_to_back_and_forward))[:num_of_single_command_env]

# env_ids_to_sldle=env_ids[single_command_indices]
# self.commands[env_ids_to_sldle, 0] = torch.zeros_like(self.commands[env_ids_to_sldle, 0])
# self.commands[env_ids_to_sldle, 1] = torch_rand_float(self.command_ranges["lin_vel_y"][0], self.command_ranges["lin_vel_y"][1], (len(env_ids_to_sldle), 1), device=self.device).squeeze(1)
# self.commands[env_ids_to_sldle, 3] = torch.zeros_like(self.commands[env_ids_to_sldle, 3]



def _compute_torques(self, actions):
""" Compute torques from actions.
Expand Down Expand Up @@ -932,9 +898,7 @@ def _init_buffers(self):
for i in range(self.num_dofs):
name = self.dof_names[i]
angle = self.cfg.init_state.default_joint_angles[name]
target_angle = self.cfg.init_state.target_joint_angles[name]
self.default_dof_pos[i] = angle
self.target_dof_pos[i] = target_angle
found = False
for dof_name in self.cfg.control.stiffness.keys():
if dof_name in name:
Expand Down
6 changes: 3 additions & 3 deletions legged_train/legged_gym/envs/base/legged_robot_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ class control:
# action scale: target angle = actionScale * action + defaultAngle
action_scale = 0.5
# decimation: Number of control action updates @ sim DT per policy DT
decimation = 4
decimation = 20

class asset:
file = ""
Expand Down Expand Up @@ -344,7 +344,7 @@ class domain_rand:

##--------------------------

action_delay = True
action_delay = False
delay_inject_steps = 24 * 1


Expand All @@ -368,7 +368,7 @@ class viewer:
lookat = [11., 5, 3.] # [m]

class sim:
dt = 0.005
dt = 0.001
substeps = 1
gravity = [0., 0. ,-9.81] # [m/s^2]
up_axis = 1 # 0 is y, 1 is z
Expand Down
9 changes: 4 additions & 5 deletions legged_train/legged_gym/envs/wow/wow.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def compute_observations(self):
self.dof_vel * self.obs_scales.dof_vel,
self.actions,
),dim=-1)

self.obs_history_buf = torch.where(
(self.episode_length_buf <= 1)[:, None, None],
torch.stack([obs_buf] * self.cfg.env.history_len, dim=1),
Expand Down Expand Up @@ -130,12 +129,12 @@ def _reward_tracking_y_line_vel(self):

def _reward_tracking_ang_vel(self):
# Tracking of angular velocity commands (yaw)
ang_vel_error = torch.square(self.commands[:, 2] - self.base_ang_vel[:, 2])
ang_vel_error[self.stand_envs_ids] = torch.abs(self.commands[self.stand_envs_ids, 2] - self.base_ang_vel[self.stand_envs_ids, 2]) #stand error
ang_vel_error = torch.abs(self.commands[:, 2] - self.base_ang_vel[:, 2])
# ang_vel_error[self.stand_envs_ids] = torch.abs(self.commands[self.stand_envs_ids, 2] - self.base_ang_vel[self.stand_envs_ids, 2]) #stand error
return torch.exp(-ang_vel_error/self.cfg.rewards.tracking_sigma)

def _reward_roll_pitch_orient(self):
rew = torch.sum(torch.abs(self.projected_gravity[:, :2]), dim=1)
rew = 30 * torch.sum(torch.abs(self.imu_obs[:, :2]), dim=1)
return torch.exp(-rew/self.cfg.rewards.orient_tracking_sigma)

def _reward_base_height(self):
Expand Down Expand Up @@ -169,7 +168,7 @@ def _reward_feet_orientation(self):

def _reward_feet_position(self):
rew_pos_feet = torch.ones(self.num_envs,dtype=torch.float, device=self.device, requires_grad=False)
rew_pos_feet[self.stand_envs_ids] = torch.exp(-3*torch.sum(torch.abs(self.dof_pos[self.stand_envs_ids,:]-self.default_dof_pos_all[self.stand_envs_ids,:]),dim=1))
rew_pos_feet[self.stand_envs_ids] = torch.exp(-3*torch.sum(torch.abs(self.foot_positions_inbody[self.stand_envs_ids,:,0:2]-self.default_dof_pos_all[self.stand_envs_ids,:]),dim=1))
return rew_pos_feet

def _reward_base_acc(self):
Expand Down
75 changes: 16 additions & 59 deletions legged_train/legged_gym/envs/wow/wow_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class env( LeggedRobotCfg.env):
scan_dim = 11*11
priv_dim = 3
priv_latent_dim = 15 + 1 + 10 +10
proprio_dim = 2+ 3+ 3+ 3+ 30
proprio_dim = 2+ 3+ 30 +6
history_len = 10

num_observations = proprio_dim
Expand All @@ -22,27 +22,13 @@ class terrain(LeggedRobotCfg.terrain):
class init_state( LeggedRobotCfg.init_state ):
pos = [0.0, 0.0, 0.912] # x,y,z [m]
default_joint_angles = { # = target angles [rad] when action = 0.0
'left_roll_joint': 0.1, # [rad]
'left_roll_joint': 0., # [rad]
'left_yaw_joint': 0, # [rad]
'left_pitch_joint': -0.3, # [rad]
'left_knee_joint': 0.6, # [rad]
'left_foot_joint': -0.3,

'right_roll_joint': -0.1, # [rad]
'right_yaw_joint': 0, # [rad]
'right_pitch_joint': 0.3 , # [rad]
'right_knee_joint': -0.6, # [rad]
'right_foot_joint': 0.3,

}
target_joint_angles = { # = target angles [rad] when action = 0.0
'left_roll_joint': 0, # [rad]
'left_yaw_joint': 0, # [rad]
'left_pitch_joint': -0.3, # [rad]
'left_knee_joint': 0.6, # [rad]
'left_foot_joint': -0.3,

'right_roll_joint': 0, # [rad]
'right_roll_joint': -0., # [rad]
'right_yaw_joint': 0, # [rad]
'right_pitch_joint': 0.3 , # [rad]
'right_knee_joint': -0.6, # [rad]
Expand All @@ -63,7 +49,7 @@ class control( LeggedRobotCfg.control ):
# action scale: target angle = actionScale * action + defaultAngle
action_scale = 0.25
# decimation: Number of control action updates @ sim DT per policy DT
decimation = 4
decimation = 20

class asset( LeggedRobotCfg.asset ):
file = '{LEGGED_GYM_ROOT_DIR}/resources/robots/wow/urdf/wow.urdf'
Expand All @@ -78,8 +64,10 @@ class rewards( LeggedRobotCfg.rewards ):
tracking_sigma = 0.2
orient_tracking_sigma = 2
soft_torque_limit = 0.9
soft_dof_pos_limit = 0.95
soft_dof_pos_limit = 0.90
base_height_target = 0.86
target_foot_pos_x = [0. , 0. ]
target_foot_pos_y = [0.9, -0.9]
only_positive_rewards = False # if true negative total rewards are clipped at zero (avoids early termination problems)
class scales:
# tracking_x_line_vel = 0.2
Expand All @@ -97,25 +85,6 @@ class scales:
# roll_yaw_position = 0.20
# dof_pos_limits = -0.2
#added


# # 站立reward
# tracking_ang_vel = 0.0
# tracking_lin_vel = 1.0
# lin_vel_z = -1
# lin_vel_x = -1
# lin_vel_y = -1
# ang_vel_xy = -0.00
# torques = -1.e-5
# dof_acc = -2.e-7
# action_rate = -0.01 #-0.01
# orientation = -5
# dof_pos_limits = -5.
# stand_still = 0.05
# hip_pos = -1
# base_height = 1.0


# termination = -10
# base_height = -10
# # feet_air = -1
Expand All @@ -132,32 +101,25 @@ class scales:
# arm_position_stand=0.16
roll_yaw_position = -0.5
base_acc=0.02
# action_difference=0.02
torques=0.5
tracking_x_line_vel = 4
tracking_y_line_vel = 3
tracking_ang_vel = 1.
dof_vel=-6e-5
dof_acc=-2e-7
lin_vel_z = -0.8
lin_vel_z = -1
ang_vel_xy = -0.05
dof_pos_limits = -10
action_rate = -0.015 #-0.01
action_rate = -0.02 #-0.01

# foot_height=-0.3
orientation = -2
feet_air_time=1.3
feet_contact = 1.2





feet_air_time = 4
feet_contact = 3
feet_orientation = 2

class wowCfgPPO( LeggedRobotCfgPPO ):
class policy:
init_noise_std = 1.0
adapt_hidden_dims=[128,64,19]
estimate_hidden_dims = [64,128,41],
actor_hidden_dims = [512, 256, 128]
critic_hidden_dims = [512, 256, 128]
Expand All @@ -167,7 +129,7 @@ class policy:
rnn_hidden_size = 64
rnn_num_layers = 2

class algorithm():
class algorithm:
# training params
value_loss_coef = 1.0
use_clipped_value_loss = True
Expand All @@ -182,14 +144,14 @@ class algorithm():
desired_kl = 0.01
max_grad_norm = 1.

class runner():
class runner:
policy_class_name = 'ActorCriticRecurrent' #ActorCriticRecurrent
algorithm_class_name = 'PPO'
num_steps_per_env = 24 # per iteration
max_iterations = 5000 # number of policy updates
max_iterations = 50000 # number of policy updates

# logging
save_interval = 500 # check for potential saves every this many iterations
save_interval = 100 # check for potential saves every this many iterations
experiment_name = 'test'
run_name = ''
# load and resume
Expand All @@ -198,11 +160,6 @@ class runner():
checkpoint = -1 # -1 = last saved model
resume_path = None # updated from load_run and chkpt

class estimator():
#train_with_estimated_states = True
learning_rate = 1.e-4
hidden_dims = [19, 128]

class runner( LeggedRobotCfgPPO.runner ):
run_name = ''
experiment_name = 'wow'
Expand Down
Loading

0 comments on commit 6e53e3e

Please sign in to comment.