Skip to content

Commit

Permalink
latest version
Browse files Browse the repository at this point in the history
  • Loading branch information
daphne-cornelisse committed Jan 3, 2024
1 parent a9697f8 commit 35b3748
Show file tree
Hide file tree
Showing 14 changed files with 5,523 additions and 13,411 deletions.
8 changes: 4 additions & 4 deletions configs/exp_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
project: scaling_ppo
group: playground
group: effect_of_human_reg
env_id: Nocturne
seed: 42
track_wandb: true
Expand All @@ -19,10 +19,10 @@ ma_callback:
log_indiv_metrics: false
log_agent_actions: false
save_model: true
model_save_freq: 100 # In iterations (one iter ~ (num_agents x n_steps))
model_save_freq: 300 # In iterations (one iter ~ (num_agents x n_steps))
save_video: true
record_n_scenes: 10 # Number of different scenes to render
video_save_freq: 100 # Make a video every k iterations (100 iters ~ 1M steps)
video_save_freq: 500 # Make a video every k iterations (100 iters ~ 1M steps)
video_deterministic: true

ppo:
Expand All @@ -32,7 +32,7 @@ ppo:
vf_coef: 0.5 # Default in SB3 is 0.5

learn:
total_timesteps: 2_000_000
total_timesteps: 10_000_000
progress_bar: false

# human-regularized RL
Expand Down
2 changes: 1 addition & 1 deletion configs/video_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ render:

logging:
render_interval: 3
fps: 4
fps: 2
where_am_i: headless_machine
18,696 changes: 5,373 additions & 13,323 deletions evaluation/policy_performance_analysis.ipynb

Large diffs are not rendered by default.

40 changes: 26 additions & 14 deletions experiments/hr_rl/run_hr_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def train(env_config, exp_config, video_config, model_config): # pylint: disabl
env = MultiAgentAsVecEnv(
config=env_config,
num_envs=env_config.max_num_vehicles,
train_on_single_scene=exp_config.train_on_single_scene,
)

# Set up run
Expand All @@ -62,6 +61,9 @@ def train(env_config, exp_config, video_config, model_config): # pylint: disabl
logging.info(f"Learning in {len(env.env.files)} scene(s): {env.env.files} | using {exp_config.ppo.device}")
logging.info(f"--- obs_space: {env.observation_space.shape[0]} ---")
logging.info(f"Action_space\n: {env.env.idx_to_actions}")

if exp_config.reg_weight > 0.0:
logging.info(f"Regularization weight: {exp_config.reg_weight} with policy: {exp_config.human_policy_path}")

# Initialize custom callback
custom_callback = CustomMultiAgentCallback(
Expand Down Expand Up @@ -135,24 +137,34 @@ def train(env_config, exp_config, video_config, model_config): # pylint: disabl
"arch_ego_state": [8],
"arch_road_objects": [64],
"arch_road_graph": [128, 64],
"arch_shared_net": [],
"arch_shared_net": [128],
"act_func": "tanh",
"dropout": 0.0,
"last_layer_dim_pi": 64,
"last_layer_dim_vf": 64,
}
)

num_files_list = [10, 100, 1000]
num_files_list = [10]
#MEMORY = [4, 2]
MEMORY = [1]

for scenes in num_files_list:
# Set regularization weight
env_config.num_files = scenes

# Train
train(
env_config=env_config,
exp_config=exp_config,
video_config=video_config,
model_config=model_config,
)
for mem in MEMORY:
for num_scenes in num_files_list:

# Set memory
env_config.subscriber.n_frames_stacked = mem

# Set regularization weight
#exp_config.reg_weight = lam

exp_config.human_policy_path = f"models/il/human_policy_S{num_scenes}_2024_01_02.pt"
env_config.num_files = num_scenes

# Train
train(
env_config=env_config,
exp_config=exp_config,
video_config=video_config,
model_config=model_config,
)
1 change: 0 additions & 1 deletion experiments/hr_rl/run_hr_ppo_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"large": [256, 128, 64],
}


def run_hr_ppo(
sweep_name: str = exp_config.group,
steer_disc: int = 5,
Expand Down
2 changes: 1 addition & 1 deletion experiments/il/run_behavioral_cloning.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
if __name__ == "__main__":

MAX_EVAL_FILES = 12
NUM_TRAIN_FILES = 1000
NUM_TRAIN_FILES = 50

# Create run
run = wandb.init(
Expand Down
20 changes: 11 additions & 9 deletions networks/mlp_late_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def __init__(
self.arch_shared_net = arch_shared_net
self.dropout = dropout

#TODO: write function that gets this information from config
self.input_dim_ego = 10
self.input_dim_road_graph = 6500
self.input_dim_road_objects = 220
#TODO @Daphne: write function that gets this information from config
self.input_dim_ego = 10 * self.config.subscriber.n_frames_stacked
self.input_dim_road_graph = 6500 * self.config.subscriber.n_frames_stacked
self.input_dim_road_objects = 220 * self.config.subscriber.n_frames_stacked

# IMPORTANT:Save output dimensions, used to create the distributions
self.latent_dim_pi = last_layer_dim_pi
Expand Down Expand Up @@ -198,10 +198,10 @@ def _unflatten_obs(self, obs_flat):

# Visible state object order: road_objects, road_points, traffic_lights, stop_signs
# Find the ends of each section
ROAD_OBJECTS_END = 13 * self.config.scenario.max_visible_objects
ROAD_POINTS_END = ROAD_OBJECTS_END + (13 * self.config.scenario.max_visible_road_points)
TL_END = ROAD_POINTS_END + (12 * self.config.scenario.max_visible_traffic_lights)
STOP_SIGN_END = TL_END + (3 * self.config.scenario.max_visible_stop_signs)
ROAD_OBJECTS_END = (13 * self.config.scenario.max_visible_objects) * self.config.subscriber.n_frames_stacked
ROAD_POINTS_END = (ROAD_OBJECTS_END + (13 * self.config.scenario.max_visible_road_points)) * self.config.subscriber.n_frames_stacked
TL_END = (ROAD_POINTS_END + (12 * self.config.scenario.max_visible_traffic_lights)) * self.config.subscriber.n_frames_stacked
STOP_SIGN_END = (TL_END + (3 * self.config.scenario.max_visible_stop_signs)) * self.config.subscriber.n_frames_stacked

# Unflatten
road_objects = vis_state[:, :ROAD_OBJECTS_END]
Expand Down Expand Up @@ -251,12 +251,14 @@ def _build_mlp_extractor(self) -> None:
# Load environment and experiment configurations
env_config = load_config("env_config")
exp_config = load_config("exp_config")

env_config.subscriber.n_frames_stacked = 2

# Make environment
env = MultiAgentAsVecEnv(
config=env_config,
num_envs=env_config.max_num_vehicles,
train_on_single_scene=exp_config.train_on_single_scene,

)

obs = env.reset()
Expand Down
12 changes: 9 additions & 3 deletions nocturne/envs/base_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def __init__( # pylint: disable=too-many-arguments
self,
config: Dict[str, Any],
*,
img_width=1600,
img_height=1600,
img_width=1200,
img_height=1200,
draw_target_positions=True,
padding=50.0,
padding=10.0,
) -> None:
"""Initialize a Nocturne environment.
Expand Down Expand Up @@ -488,6 +488,10 @@ def _get_obs_space_dim(self, config, base=0):
(3 * self.config.scenario.max_visible_stop_signs) +
(12 * self.config.scenario.max_visible_traffic_lights)
)

# Multiply by memory to get the final dimension
obs_space_dim = obs_space_dim * self.config.subscriber.n_frames_stacked

return (obs_space_dim,)

def normalize_ego_state_by_cat(self, state):
Expand Down Expand Up @@ -520,6 +524,8 @@ def render(self, mode: Optional[bool] = None) -> Optional[RenderType]: # pylint
Optional[RenderType]: Rendered image.
"""
return self.scenario.getImage(**self._render_settings)

env.scenario.getImage(**video_config.render)

def render_ego(self, mode: Optional[bool] = None) -> Optional[RenderType]: # pylint: disable=unused-argument
"""Render the ego vehicles.
Expand Down
9 changes: 7 additions & 2 deletions nocturne/envs/vec_env_ma.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class MultiAgentAsVecEnv(VecEnv):
VecEnv (SB3 VecEnv): SB3 VecEnv base class.
"""

def __init__(self, config, num_envs, psr=False, train_on_single_scene=False):
def __init__(self, config, num_envs, psr=False):
# Create Nocturne env
self.env = BaseEnv(config)

Expand All @@ -44,7 +44,7 @@ def __init__(self, config, num_envs, psr=False, train_on_single_scene=False):
self.frac_collided = [] # Log fraction of agents that collided
self.frac_goal_achieved = [] # Log fraction of agents that achieved their goal
self.agents_in_scene = []
self.filename = self.env.files[0] if train_on_single_scene else None # If provided, always use the same file
self.filename = None # If provided, always use the same file

def _reset_seeds(self) -> None:
"""Reset all environments' seeds."""
Expand Down Expand Up @@ -172,6 +172,11 @@ def reset_scene_dict(self):
def step_num(self) -> List[int]:
"""The episodic timestep."""
return self.env.step_num
@property
def render(self) -> List[int]:
"""The episodic timestep."""
img = self.env.render()
return img

def seed(self, seed=None):
"""Set the random seeds for all environments."""
Expand Down
Loading

0 comments on commit 35b3748

Please sign in to comment.