Skip to content

Commit 20e6cac

Browse files
Remove dense rewards for larger experiments.
1 parent 2a7efbb commit 20e6cac

File tree

5 files changed

+9
-8
lines changed

5 files changed

+9
-8
lines changed

configs/env_config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ env: my_custom_multi_env_v1 # name of the env, hardcoded for now
88
episode_length: 80
99
warmup_period: 10 # In the RL setting we use a warmup of 10 steps
1010
# How many files of the total dataset to use. -1 indicates to use all of them
11-
num_files: 1
11+
num_files: 20
1212
fix_file_order: true # If true, always select the SAME files (when creating the environent), if false, pick files at random
1313
sample_file_method: "random" # ALTERNATIVES: "no_replacement" (SUPPORTED) / "score-based" (TODO: @Daphne)
1414
dt: 0.1
@@ -62,7 +62,7 @@ rew_cfg:
6262
goal_tolerance: 0.5
6363
reward_scaling: 10.0 # rescale all the rewards by this value. This can help w/ some learning algorithms
6464
collision_penalty: 0
65-
shaped_goal_distance_scaling: 0.2
65+
shaped_goal_distance_scaling: 0.0 # Default is 0.2, setting to 0.0 removes all dense rewards
6666
shaped_goal_distance: true
6767
goal_distance_penalty: false # if shaped_goal_distance is true, then when this is True the goal distance
6868
# is a penalty for being far from
@@ -109,4 +109,4 @@ subscriber:
109109
n_frames_stacked: 1 # Agent memory
110110

111111
# Path to folder with traffic scene(s) from which to create an environment
112-
data_path: ./data_10/train
112+
data_path: ./data_full/train

configs/exp_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
project: scaling_ppo
2+
group: playground
23
env_id: Nocturne
34
seed: 42
45
track_wandb: true

experiments/hr_rl/run_hr_ppo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ def train(env_config, exp_config, video_config, model_config): # pylint: disabl
5050
with wandb.init(
5151
project=exp_config.project,
5252
name=run_id,
53+
group=exp_config.group,
5354
config={**exp_config, **env_config},
54-
group='hr_ppo',
5555
id=run_id,
5656
**exp_config.wandb,
5757
) if exp_config.track_wandb else nullcontext() as run:
@@ -134,7 +134,7 @@ def train(env_config, exp_config, video_config, model_config): # pylint: disabl
134134
{
135135
"arch_ego_state": [8],
136136
"arch_road_objects": [64],
137-
"arch_road_graph": [126, 64],
137+
"arch_road_graph": [128, 64],
138138
"arch_shared_net": [],
139139
"act_func": "tanh",
140140
"dropout": 0.0,

experiments/hr_rl/run_hr_ppo_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ def run_hr_ppo(
4949
ent_coef: float = 0.0,
5050
vf_coef: float = 0.5,
5151
seed: int = 42,
52-
arch_road_objects: str = "small",
52+
arch_road_objects: str = "tiny",
5353
arch_road_graph: str = "small",
5454
arch_shared_net: str = "small",
5555
activation_fn: str = "tanh",
5656
dropout: float = 0.0,
57-
total_timesteps: int = 50_000,
57+
total_timesteps: int = 1_000_000,
5858
num_files: int = 10,
5959
reg_weight: float = 0.0,
6060
) -> None:

networks/mlp_late_fusion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def __init__(
3333
env_config: Box,
3434
arch_ego_state: List[int] = [8],
3535
arch_road_objects: List[int] = [64],
36-
arch_road_graph: List[int] = [126, 64],
36+
arch_road_graph: List[int] = [128, 64],
3737
arch_shared_net: List[int] = [],
3838
act_func: str = "tanh",
3939
dropout: float = 0.0,

0 commit comments

Comments
 (0)