DexRobot
diff --git a/‎.gitattributes b/‎.gitattributes
diff --git a/‎.gitignore
+34 b/‎.gitignore
+34
diff --git a/‎.gitmodules b/‎.gitmodules
diff --git a/‎DexHandEnv/__init__.py
+55 b/‎DexHandEnv/__init__.py
+55
diff --git a/‎DexHandEnv/cfg/config.yaml
+73 b/‎DexHandEnv/cfg/config.yaml
+73
diff --git a/‎DexHandEnv/cfg/pbt/no_pbt.yaml
+1 b/‎DexHandEnv/cfg/pbt/no_pbt.yaml
+1
diff --git a/‎DexHandEnv/cfg/pbt/pbt_default.yaml
+40 b/‎DexHandEnv/cfg/pbt/pbt_default.yaml
+40
diff --git a/‎DexHandEnv/cfg/task/Ant.yaml
+101 b/‎DexHandEnv/cfg/task/Ant.yaml
+101
diff --git a/‎DexHandEnv/cfg/task/DexCube.yaml
+68 b/‎DexHandEnv/cfg/task/DexCube.yaml
+68
@@ -0,0 +1,34 @@
+videos
+recorded_frames
+/DexHandEnv/logs
+*train_dir*
+*wandb*
+*ige_logs*
+*.egg-info
+/.vs
+/.vscode
+/_package
+/shaders
+._tmptext.txt
+__pycache__/
+/DexHandEnv/tasks/__pycache__
+/DexHandEnv/utils/__pycache__
+/DexHandEnv/tasks/base/__pycache__
+/tools/format/.lastrun
+*.pyc
+_doxygen
+/rlDexHandEnvgpu/logs
+/DexHandEnv/benchmarks/results
+/DexHandEnv/simpletests/results
+*.pxd2
+/tests/logs
+/DexHandEnv/balance_bot.xml
+/DexHandEnv/quadcopter.xml
+/DexHandEnv/ingenuity.xml
+logs*
+nn/
+runs/
+.idea
+outputs/
+*.hydra*
+/DexHandEnv/wandb
@@ -0,0 +1,55 @@
+import hydra
+from hydra import compose, initialize
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import DictConfig, OmegaConf
+from DexHandEnv.utils.reformat import omegaconf_to_dict
+
+
+OmegaConf.register_new_resolver('eq', lambda x, y: x.lower()==y.lower())
+OmegaConf.register_new_resolver('contains', lambda x, y: x.lower() in y.lower())
+OmegaConf.register_new_resolver('if', lambda pred, a, b: a if pred else b)
+OmegaConf.register_new_resolver('resolve_default', lambda default, arg: default if arg=='' else arg)
+
+
+def make(
+    seed: int, 
+    task: str, 
+    num_envs: int, 
+    sim_device: str,
+    rl_device: str,
+    graphics_device_id: int = -1,
+    headless: bool = False,
+    multi_gpu: bool = False,
+    virtual_screen_capture: bool = False,
+    force_render: bool = True,
+    cfg: DictConfig = None
+): 
+    from DexHandEnv.utils.rlgames_utils import get_rlgames_env_creator
+    # create hydra config if no config passed in
+    if cfg is None:
+        # reset current hydra config if already parsed (but not passed in here)
+        if HydraConfig.initialized():
+            task = HydraConfig.get().runtime.choices['task']
+            hydra.core.global_hydra.GlobalHydra.instance().clear()
+
+        with initialize(config_path="./cfg"):
+            cfg = compose(config_name="config", overrides=[f"task={task}"])
+            cfg_dict = omegaconf_to_dict(cfg.task)
+            cfg_dict['env']['numEnvs'] = num_envs
+    # reuse existing config
+    else:
+        cfg_dict = omegaconf_to_dict(cfg.task)
+
+    create_rlgpu_env = get_rlgames_env_creator(
+        seed=seed,
+        task_config=cfg_dict,
+        task_name=cfg_dict["name"],
+        sim_device=sim_device,
+        rl_device=rl_device,
+        graphics_device_id=graphics_device_id,
+        headless=headless,
+        multi_gpu=multi_gpu,
+        virtual_screen_capture=virtual_screen_capture,
+        force_render=force_render,
+    )
+    return create_rlgpu_env()
@@ -0,0 +1,73 @@
+
+# Task name - used to pick the class to load
+task_name: ${task.name}
+# experiment name. defaults to name of training config
+experiment: ''
+
+# if set to positive integer, overrides the default number of environments
+num_envs: ''
+
+# seed - set to -1 to choose random seed
+seed: 42
+# set to True for deterministic performance
+torch_deterministic: False
+
+# set the maximum number of learning iterations to train for. overrides default per-environment setting
+max_iterations: ''
+
+## Device config
+#  'physx' or 'flex'
+physics_engine: 'physx'
+# whether to use cpu or gpu pipeline
+pipeline: 'gpu'
+# device for running physics simulation
+sim_device: 'cuda:0'
+# device to run RL
+rl_device: 'cuda:0'
+graphics_device_id: 0
+
+## PhysX arguments
+num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
+solver_type: 1 # 0: pgs, 1: tgs
+num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
+
+# RLGames Arguments
+# test - if set, run policy in inference mode (requires setting checkpoint to load)
+test: False
+# used to set checkpoint path
+checkpoint: ''
+# set sigma when restoring network
+sigma: ''
+# set to True to use multi-gpu training
+multi_gpu: False
+
+wandb_activate: False
+wandb_group: ''
+wandb_name: ${train.params.config.name}
+wandb_entity: ''
+wandb_project: 'DexHandEnv'
+wandb_tags: []
+wandb_logcode_dir: '' 
+
+capture_video: False
+capture_video_freq: 1464
+capture_video_len: 100
+force_render: True
+
+# disables rendering
+headless: False
+
+# set default task and default training config based on task
+defaults:
+  - task: Ant
+  - train: ${task}PPO
+  - pbt: no_pbt
+  - override hydra/job_logging: disabled
+  - _self_
+
+# set the directory where the output files get saved
+hydra:
+  output_subdir: null
+  run:
+    dir: .
+
@@ -0,0 +1 @@
+enabled: False
@@ -0,0 +1,40 @@
+defaults:
+  - mutation: default_mutation
+
+enabled: True
+
+policy_idx: 0  # policy index in a population: should always be specified explicitly! Each run in a population should have a unique idx from [0..N-1]
+num_policies: 8  # total number of policies in the population, the total number of learners. Override through CLI!
+workspace: "pbt_workspace"  # suffix of the workspace dir name inside train_dir, used to distinguish different PBT runs with the same experiment name. Recommended to specify a unique name
+
+# special mode that enables PBT features for debugging even if only one policy is present. Never enable in actual experiments
+dbg_mode: False
+
+# PBT hyperparams
+interval_steps: 10000000  # Interval in env steps between PBT iterations (checkpointing, mutation, etc.)
+start_after: 10000000  # Start PBT after this many env frames are collected, this applies to all experiment restarts, i.e. when we resume training after the weights are mutated
+initial_delay: 20000000  # This is a separate delay for when we're just starting the training session. It makes sense to give policies a bit more time to develop different behaviors
+
+# Fraction of the underperforming policies whose weights are to be replaced by better performing policies
+# This is rounded up, i.e. for 8 policies and fraction 0.3 we replace ceil(0.3*8)=3 worst policies
+replace_fraction_worst: 0.125
+
+# Fraction of agents used to sample weights from when we replace an underperforming agent
+# This is also rounded up
+replace_fraction_best: 0.3
+
+# Replace an underperforming policy only if its reward is lower by at least this fraction of standard deviation
+# within the population.
+replace_threshold_frac_std: 0.5
+
+# Replace an underperforming policy only if its reward is lower by at least this fraction of the absolute value
+# of the objective of a better policy
+replace_threshold_frac_absolute: 0.05
+
+# Probability to mutate a certain parameter
+mutation_rate: 0.15
+
+# min and max values for the mutation of a parameter
+# The mutation is performed by multiplying or dividing (randomly) the parameter value by a value sampled from [change_min, change_max]
+change_min: 1.1
+change_max: 1.5
@@ -0,0 +1,101 @@
+# used to create the object
+name: Ant
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env:
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 5
+  episodeLength: 1000
+  enableDebugVis: False
+
+  clipActions: 1.0
+
+  powerScale: 1.0
+  controlFrequencyInv: 1 # 60 Hz
+
+  # reward parameters
+  headingWeight: 0.5
+  upWeight: 0.1
+
+  # cost parameters
+  actionsCost: 0.005
+  energyCost: 0.05
+  dofVelocityScale: 0.2
+  contactForceScale: 0.1
+  jointsAtLimitCost: 0.1
+  deathCost: -2.0
+  terminationHeight: 0.31
+
+  plane:
+    staticFriction: 1.0
+    dynamicFriction: 1.0
+    restitution: 0.0
+
+  asset:
+    assetFileName: "mjcf/nv_ant.xml"
+
+  # set to True if you use camera sensors in the environment
+  enableCameraSensors: False
+
+sim:
+  dt: 0.0166 # 1/60 s
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  physx:
+    num_threads: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
+    num_position_iterations: 4
+    num_velocity_iterations: 0
+    contact_offset: 0.02
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 10.0
+    default_buffer_size_multiplier: 5.0
+    max_gpu_contact_pairs: 8388608 # 8*1024*1024
+    num_subscenes: ${....num_subscenes}
+    contact_collection: 0 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
+
+task:
+  randomize: False
+  randomization_params:
+    # specify which attributes to randomize for each actor type and property
+    frequency: 600   # Define how many environment steps between generating new randomizations
+    observations:
+      range: [0, .002] # range for the white noise
+      operation: "additive"
+      distribution: "gaussian"
+    actions:
+      range: [0., .02]
+      operation: "additive"
+      distribution: "gaussian"
+    actor_params:
+      ant:
+        color: True
+        rigid_body_properties:
+          mass: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+            setup_only: True # Property will only be randomized once before simulation is started. See Domain Randomization Documentation for more info.
+        dof_properties:
+          damping: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+          stiffness: 
+            range: [0.5, 1.5]
+            operation: "scaling"
+            distribution: "uniform"
+          lower:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
+          upper:
+            range: [0, 0.01]
+            operation: "additive"
+            distribution: "gaussian"
@@ -0,0 +1,68 @@
+# used to create the object
+name: DexCube
+
+physics_engine: ${..physics_engine}
+
+# if given, will override the device setting in gym.
+env:
+  numEnvs: ${resolve_default:4096,${...num_envs}}
+  envSpacing: 1.5
+  episodeLength: 100 # *1/(100/5)
+  enableDebugVis: True
+
+  clipObservations: 5.0
+  clipActions: 1.0
+
+  startPositionNoise: 0.25
+  startRotationNoise: 0.785
+  dexPositionNoise: 0.0
+  dexRotationNoise: 0.0
+  dexDofNoise: 0.25
+
+  aggregateMode: 3
+
+  actionScale: 0.1
+
+  # reward weights
+  heightRewardScale: 10.0
+  distanceRewardScale: 2.0
+  successRewardScale: 10.0
+  qdotPenaltyScale: 0.1
+  actionPenaltyScale: 1.0
+  pregraspPenaltyScale: 0.1
+  terminationPenaltyScale: 10.0
+
+  controlType: joint_pos  # options are {osc, joint_tor, joint_pos}
+
+  pregraspDatasetPath: "../../dataset/dataset.csv"
+
+  asset:
+    assetRoot: "../../assets"
+    assetFileNamedex: "dexhand/mjcf/dexhand021_right.xml"
+
+  # set to True if you use camera sensors in the environment
+  enableCameraSensors: False
+
+sim:
+  dt: 0.01
+  substeps: 2
+  up_axis: "z"
+  use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
+  gravity: [0.0, 0.0, -9.81]
+  physx:
+    num_threads: ${....num_threads}
+    solver_type: ${....solver_type}
+    use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
+    num_position_iterations: 8
+    num_velocity_iterations: 1
+    contact_offset: 0.005
+    rest_offset: 0.0
+    bounce_threshold_velocity: 0.2
+    max_depenetration_velocity: 1000.0
+    default_buffer_size_multiplier: 5.0
+    max_gpu_contact_pairs: 4194304 # 1024*1024
+    num_subscenes: ${....num_subscenes}
+    contact_collection: 2 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
+
+task:
+  randomize: False