Skip to content

Commit 039fb02

Browse files
committed
first commit
0 parents  commit 039fb02

File tree

202 files changed

+11152
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+11152
-0
lines changed

.gitattributes

Whitespace-only changes.

.gitignore

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
videos
2+
recorded_frames
3+
/DexHandEnv/logs
4+
*train_dir*
5+
*wandb*
6+
*ige_logs*
7+
*.egg-info
8+
/.vs
9+
/.vscode
10+
/_package
11+
/shaders
12+
._tmptext.txt
13+
__pycache__/
14+
/DexHandEnv/tasks/__pycache__
15+
/DexHandEnv/utils/__pycache__
16+
/DexHandEnv/tasks/base/__pycache__
17+
/tools/format/.lastrun
18+
*.pyc
19+
_doxygen
20+
/rlDexHandEnvgpu/logs
21+
/DexHandEnv/benchmarks/results
22+
/DexHandEnv/simpletests/results
23+
*.pxd2
24+
/tests/logs
25+
/DexHandEnv/balance_bot.xml
26+
/DexHandEnv/quadcopter.xml
27+
/DexHandEnv/ingenuity.xml
28+
logs*
29+
nn/
30+
runs/
31+
.idea
32+
outputs/
33+
*.hydra*
34+
/DexHandEnv/wandb

.gitmodules

Whitespace-only changes.

DexHandEnv/__init__.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import hydra
2+
from hydra import compose, initialize
3+
from hydra.core.hydra_config import HydraConfig
4+
from omegaconf import DictConfig, OmegaConf
5+
from DexHandEnv.utils.reformat import omegaconf_to_dict
6+
7+
8+
OmegaConf.register_new_resolver('eq', lambda x, y: x.lower()==y.lower())
9+
OmegaConf.register_new_resolver('contains', lambda x, y: x.lower() in y.lower())
10+
OmegaConf.register_new_resolver('if', lambda pred, a, b: a if pred else b)
11+
OmegaConf.register_new_resolver('resolve_default', lambda default, arg: default if arg=='' else arg)
12+
13+
14+
def make(
15+
seed: int,
16+
task: str,
17+
num_envs: int,
18+
sim_device: str,
19+
rl_device: str,
20+
graphics_device_id: int = -1,
21+
headless: bool = False,
22+
multi_gpu: bool = False,
23+
virtual_screen_capture: bool = False,
24+
force_render: bool = True,
25+
cfg: DictConfig = None
26+
):
27+
from DexHandEnv.utils.rlgames_utils import get_rlgames_env_creator
28+
# create hydra config if no config passed in
29+
if cfg is None:
30+
# reset current hydra config if already parsed (but not passed in here)
31+
if HydraConfig.initialized():
32+
task = HydraConfig.get().runtime.choices['task']
33+
hydra.core.global_hydra.GlobalHydra.instance().clear()
34+
35+
with initialize(config_path="./cfg"):
36+
cfg = compose(config_name="config", overrides=[f"task={task}"])
37+
cfg_dict = omegaconf_to_dict(cfg.task)
38+
cfg_dict['env']['numEnvs'] = num_envs
39+
# reuse existing config
40+
else:
41+
cfg_dict = omegaconf_to_dict(cfg.task)
42+
43+
create_rlgpu_env = get_rlgames_env_creator(
44+
seed=seed,
45+
task_config=cfg_dict,
46+
task_name=cfg_dict["name"],
47+
sim_device=sim_device,
48+
rl_device=rl_device,
49+
graphics_device_id=graphics_device_id,
50+
headless=headless,
51+
multi_gpu=multi_gpu,
52+
virtual_screen_capture=virtual_screen_capture,
53+
force_render=force_render,
54+
)
55+
return create_rlgpu_env()

DexHandEnv/cfg/config.yaml

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
2+
# Task name - used to pick the class to load
3+
task_name: ${task.name}
4+
# experiment name. defaults to name of training config
5+
experiment: ''
6+
7+
# if set to positive integer, overrides the default number of environments
8+
num_envs: ''
9+
10+
# seed - set to -1 to choose random seed
11+
seed: 42
12+
# set to True for deterministic performance
13+
torch_deterministic: False
14+
15+
# set the maximum number of learning iterations to train for. overrides default per-environment setting
16+
max_iterations: ''
17+
18+
## Device config
19+
# 'physx' or 'flex'
20+
physics_engine: 'physx'
21+
# whether to use cpu or gpu pipeline
22+
pipeline: 'gpu'
23+
# device for running physics simulation
24+
sim_device: 'cuda:0'
25+
# device to run RL
26+
rl_device: 'cuda:0'
27+
graphics_device_id: 0
28+
29+
## PhysX arguments
30+
num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
31+
solver_type: 1 # 0: pgs, 1: tgs
32+
num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
33+
34+
# RLGames Arguments
35+
# test - if set, run policy in inference mode (requires setting checkpoint to load)
36+
test: False
37+
# used to set checkpoint path
38+
checkpoint: ''
39+
# set sigma when restoring network
40+
sigma: ''
41+
# set to True to use multi-gpu training
42+
multi_gpu: False
43+
44+
wandb_activate: False
45+
wandb_group: ''
46+
wandb_name: ${train.params.config.name}
47+
wandb_entity: ''
48+
wandb_project: 'DexHandEnv'
49+
wandb_tags: []
50+
wandb_logcode_dir: ''
51+
52+
capture_video: False
53+
capture_video_freq: 1464
54+
capture_video_len: 100
55+
force_render: True
56+
57+
# disables rendering
58+
headless: False
59+
60+
# set default task and default training config based on task
61+
defaults:
62+
- task: Ant
63+
- train: ${task}PPO
64+
- pbt: no_pbt
65+
- override hydra/job_logging: disabled
66+
- _self_
67+
68+
# set the directory where the output files get saved
69+
hydra:
70+
output_subdir: null
71+
run:
72+
dir: .
73+

DexHandEnv/cfg/pbt/no_pbt.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
enabled: False

DexHandEnv/cfg/pbt/pbt_default.yaml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
defaults:
2+
- mutation: default_mutation
3+
4+
enabled: True
5+
6+
policy_idx: 0 # policy index in a population: should always be specified explicitly! Each run in a population should have a unique idx from [0..N-1]
7+
num_policies: 8 # total number of policies in the population, the total number of learners. Override through CLI!
8+
workspace: "pbt_workspace" # suffix of the workspace dir name inside train_dir, used to distinguish different PBT runs with the same experiment name. Recommended to specify a unique name
9+
10+
# special mode that enables PBT features for debugging even if only one policy is present. Never enable in actual experiments
11+
dbg_mode: False
12+
13+
# PBT hyperparams
14+
interval_steps: 10000000 # Interval in env steps between PBT iterations (checkpointing, mutation, etc.)
15+
start_after: 10000000 # Start PBT after this many env frames are collected, this applies to all experiment restarts, i.e. when we resume training after the weights are mutated
16+
initial_delay: 20000000 # This is a separate delay for when we're just starting the training session. It makes sense to give policies a bit more time to develop different behaviors
17+
18+
# Fraction of the underperforming policies whose weights are to be replaced by better performing policies
19+
# This is rounded up, i.e. for 8 policies and fraction 0.3 we replace ceil(0.3*8)=3 worst policies
20+
replace_fraction_worst: 0.125
21+
22+
# Fraction of agents used to sample weights from when we replace an underperforming agent
23+
# This is also rounded up
24+
replace_fraction_best: 0.3
25+
26+
# Replace an underperforming policy only if its reward is lower by at least this fraction of standard deviation
27+
# within the population.
28+
replace_threshold_frac_std: 0.5
29+
30+
# Replace an underperforming policy only if its reward is lower by at least this fraction of the absolute value
31+
# of the objective of a better policy
32+
replace_threshold_frac_absolute: 0.05
33+
34+
# Probability to mutate a certain parameter
35+
mutation_rate: 0.15
36+
37+
# min and max values for the mutation of a parameter
38+
# The mutation is performed by multiplying or dividing (randomly) the parameter value by a value sampled from [change_min, change_max]
39+
change_min: 1.1
40+
change_max: 1.5

DexHandEnv/cfg/task/Ant.yaml

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# used to create the object
2+
name: Ant
3+
4+
physics_engine: ${..physics_engine}
5+
6+
# if given, will override the device setting in gym.
7+
env:
8+
numEnvs: ${resolve_default:4096,${...num_envs}}
9+
envSpacing: 5
10+
episodeLength: 1000
11+
enableDebugVis: False
12+
13+
clipActions: 1.0
14+
15+
powerScale: 1.0
16+
controlFrequencyInv: 1 # 60 Hz
17+
18+
# reward parameters
19+
headingWeight: 0.5
20+
upWeight: 0.1
21+
22+
# cost parameters
23+
actionsCost: 0.005
24+
energyCost: 0.05
25+
dofVelocityScale: 0.2
26+
contactForceScale: 0.1
27+
jointsAtLimitCost: 0.1
28+
deathCost: -2.0
29+
terminationHeight: 0.31
30+
31+
plane:
32+
staticFriction: 1.0
33+
dynamicFriction: 1.0
34+
restitution: 0.0
35+
36+
asset:
37+
assetFileName: "mjcf/nv_ant.xml"
38+
39+
# set to True if you use camera sensors in the environment
40+
enableCameraSensors: False
41+
42+
sim:
43+
dt: 0.0166 # 1/60 s
44+
substeps: 2
45+
up_axis: "z"
46+
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
47+
gravity: [0.0, 0.0, -9.81]
48+
physx:
49+
num_threads: ${....num_threads}
50+
solver_type: ${....solver_type}
51+
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
52+
num_position_iterations: 4
53+
num_velocity_iterations: 0
54+
contact_offset: 0.02
55+
rest_offset: 0.0
56+
bounce_threshold_velocity: 0.2
57+
max_depenetration_velocity: 10.0
58+
default_buffer_size_multiplier: 5.0
59+
max_gpu_contact_pairs: 8388608 # 8*1024*1024
60+
num_subscenes: ${....num_subscenes}
61+
contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
62+
63+
task:
64+
randomize: False
65+
randomization_params:
66+
# specify which attributes to randomize for each actor type and property
67+
frequency: 600 # Define how many environment steps between generating new randomizations
68+
observations:
69+
range: [0, .002] # range for the white noise
70+
operation: "additive"
71+
distribution: "gaussian"
72+
actions:
73+
range: [0., .02]
74+
operation: "additive"
75+
distribution: "gaussian"
76+
actor_params:
77+
ant:
78+
color: True
79+
rigid_body_properties:
80+
mass:
81+
range: [0.5, 1.5]
82+
operation: "scaling"
83+
distribution: "uniform"
84+
setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
85+
dof_properties:
86+
damping:
87+
range: [0.5, 1.5]
88+
operation: "scaling"
89+
distribution: "uniform"
90+
stiffness:
91+
range: [0.5, 1.5]
92+
operation: "scaling"
93+
distribution: "uniform"
94+
lower:
95+
range: [0, 0.01]
96+
operation: "additive"
97+
distribution: "gaussian"
98+
upper:
99+
range: [0, 0.01]
100+
operation: "additive"
101+
distribution: "gaussian"

DexHandEnv/cfg/task/DexCube.yaml

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# used to create the object
2+
name: DexCube
3+
4+
physics_engine: ${..physics_engine}
5+
6+
# if given, will override the device setting in gym.
7+
env:
8+
numEnvs: ${resolve_default:4096,${...num_envs}}
9+
envSpacing: 1.5
10+
episodeLength: 100 # *1/(100/5)
11+
enableDebugVis: True
12+
13+
clipObservations: 5.0
14+
clipActions: 1.0
15+
16+
startPositionNoise: 0.25
17+
startRotationNoise: 0.785
18+
dexPositionNoise: 0.0
19+
dexRotationNoise: 0.0
20+
dexDofNoise: 0.25
21+
22+
aggregateMode: 3
23+
24+
actionScale: 0.1
25+
26+
# reward weights
27+
heightRewardScale: 10.0
28+
distanceRewardScale: 2.0
29+
successRewardScale: 10.0
30+
qdotPenaltyScale: 0.1
31+
actionPenaltyScale: 1.0
32+
pregraspPenaltyScale: 0.1
33+
terminationPenaltyScale: 10.0
34+
35+
controlType: joint_pos # options are {osc, joint_tor, joint_pos}
36+
37+
pregraspDatasetPath: "../../dataset/dataset.csv"
38+
39+
asset:
40+
assetRoot: "../../assets"
41+
assetFileNamedex: "dexhand/mjcf/dexhand021_right.xml"
42+
43+
# set to True if you use camera sensors in the environment
44+
enableCameraSensors: False
45+
46+
sim:
47+
dt: 0.01
48+
substeps: 2
49+
up_axis: "z"
50+
use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
51+
gravity: [0.0, 0.0, -9.81]
52+
physx:
53+
num_threads: ${....num_threads}
54+
solver_type: ${....solver_type}
55+
use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
56+
num_position_iterations: 8
57+
num_velocity_iterations: 1
58+
contact_offset: 0.005
59+
rest_offset: 0.0
60+
bounce_threshold_velocity: 0.2
61+
max_depenetration_velocity: 1000.0
62+
default_buffer_size_multiplier: 5.0
63+
max_gpu_contact_pairs: 4194304 # 1024*1024
64+
num_subscenes: ${....num_subscenes}
65+
contact_collection: 2 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
66+
67+
task:
68+
randomize: False

0 commit comments

Comments
 (0)