Remove dense rewards for larger experiments.

daphne-cornelisse · daphne-cornelisse · commit 20e6cacc2dda · 2023-12-30T16:57:10.000-05:00
diff --git a/configs/env_config.yaml b/configs/env_config.yaml
@@ -8,7 +8,7 @@ env: my_custom_multi_env_v1 # name of the env, hardcoded for now
 episode_length: 80
 warmup_period: 10 # In the RL setting we use a warmup of 10 steps
 # How many files of the total dataset to use. -1 indicates to use all of them
-num_files: 1
+num_files: 20
 fix_file_order: true # If true, always select the SAME files (when creating the environent), if false, pick files at random
 sample_file_method: "random" # ALTERNATIVES: "no_replacement" (SUPPORTED) / "score-based" (TODO: @Daphne)
 dt: 0.1
@@ -62,7 +62,7 @@ rew_cfg:
   goal_tolerance: 0.5
   reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms
   collision_penalty: 0
-  shaped_goal_distance_scaling: 0.2
+  shaped_goal_distance_scaling: 0.0 # Default is 0.2, setting to 0.0 removes all dense rewards
   shaped_goal_distance: true
   goal_distance_penalty: false # if shaped_goal_distance is true, then when this is True the goal distance
                                # is a penalty for being far from
@@ -109,4 +109,4 @@ subscriber:
   n_frames_stacked: 1 # Agent memory
 
 # Path to folder with traffic scene(s) from which to create an environment
-data_path: ./data_10/train
+data_path: ./data_full/train
diff --git a/configs/exp_config.yaml b/configs/exp_config.yaml
@@ -1,4 +1,5 @@
 project: scaling_ppo
+group: playground
 env_id: Nocturne
 seed: 42
 track_wandb: true
diff --git a/experiments/hr_rl/run_hr_ppo.py b/experiments/hr_rl/run_hr_ppo.py
@@ -50,8 +50,8 @@ def train(env_config, exp_config, video_config, model_config):  # pylint: disabl
     with wandb.init(
         project=exp_config.project,
         name=run_id,
+        group=exp_config.group,
         config={**exp_config, **env_config},
-        group='hr_ppo',
         id=run_id,
         **exp_config.wandb,
     ) if exp_config.track_wandb else nullcontext() as run:
@@ -134,7 +134,7 @@ def train(env_config, exp_config, video_config, model_config):  # pylint: disabl
         {
             "arch_ego_state": [8],
             "arch_road_objects": [64],
-            "arch_road_graph": [126, 64],
+            "arch_road_graph": [128, 64],
             "arch_shared_net": [],
             "act_func": "tanh",
             "dropout": 0.0,
diff --git a/experiments/hr_rl/run_hr_ppo_cli.py b/experiments/hr_rl/run_hr_ppo_cli.py
@@ -49,12 +49,12 @@ def run_hr_ppo(
     ent_coef: float = 0.0,
     vf_coef: float = 0.5,
     seed: int = 42,
-    arch_road_objects: str = "small",
+    arch_road_objects: str = "tiny",
     arch_road_graph: str = "small",
     arch_shared_net: str = "small",
     activation_fn: str = "tanh",
     dropout: float = 0.0, 
-    total_timesteps: int = 50_000,
+    total_timesteps: int = 1_000_000,
     num_files: int = 10,
     reg_weight: float = 0.0,
 ) -> None:
diff --git a/networks/mlp_late_fusion.py b/networks/mlp_late_fusion.py
@@ -33,7 +33,7 @@ def __init__(
         env_config: Box,
         arch_ego_state: List[int] = [8],
         arch_road_objects: List[int] = [64],
-        arch_road_graph: List[int] = [126, 64],
+        arch_road_graph: List[int] = [128, 64],
         arch_shared_net: List[int] = [],
         act_func: str = "tanh", 
         dropout: float = 0.0,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`project: scaling_ppo`
	`2`	`+group: playground`
`2`	`3`	`env_id: Nocturne`
`3`	`4`	`seed: 42`
`4`	`5`	`track_wandb: true`