latest version

Emerge-Lab · Jan 3, 2024 · 35b3748 · 35b3748
1 parent a9697f8
commit 35b3748
Show file tree

Hide file tree

Showing 14 changed files with 5,523 additions and 13,411 deletions.
diff --git a/configs/exp_config.yaml b/configs/exp_config.yaml
@@ -1,5 +1,5 @@
 project: scaling_ppo
-group: playground
+group: effect_of_human_reg
 env_id: Nocturne
 seed: 42
 track_wandb: true
@@ -19,10 +19,10 @@ ma_callback:
   log_indiv_metrics: false
   log_agent_actions: false
   save_model: true
-  model_save_freq: 100 # In iterations (one iter ~ (num_agents x n_steps))
+  model_save_freq: 300 # In iterations (one iter ~ (num_agents x n_steps))
   save_video: true
   record_n_scenes: 10 # Number of different scenes to render
-  video_save_freq: 100 # Make a video every k iterations (100 iters ~ 1M steps)
+  video_save_freq: 500 # Make a video every k iterations (100 iters ~ 1M steps)
   video_deterministic: true
 
 ppo:
@@ -32,7 +32,7 @@ ppo:
   vf_coef: 0.5 # Default in SB3 is 0.5
 
 learn:
-  total_timesteps: 2_000_000
+  total_timesteps: 10_000_000
   progress_bar: false
 
 # human-regularized RL

diff --git a/configs/video_config.yaml b/configs/video_config.yaml
@@ -6,5 +6,5 @@ render:
 
 logging:
   render_interval: 3
-  fps: 4
+  fps: 2
   where_am_i: headless_machine
diff --git a/evaluation/policy_performance_analysis.ipynb b/evaluation/policy_performance_analysis.ipynb
diff --git a/experiments/hr_rl/run_hr_ppo.py b/experiments/hr_rl/run_hr_ppo.py
@@ -37,7 +37,6 @@ def train(env_config, exp_config, video_config, model_config):  # pylint: disabl
     env = MultiAgentAsVecEnv(
         config=env_config,
         num_envs=env_config.max_num_vehicles,
-        train_on_single_scene=exp_config.train_on_single_scene,
     )
 
     # Set up run
@@ -62,6 +61,9 @@ def train(env_config, exp_config, video_config, model_config):  # pylint: disabl
         logging.info(f"Learning in {len(env.env.files)} scene(s): {env.env.files} | using {exp_config.ppo.device}")
         logging.info(f"--- obs_space: {env.observation_space.shape[0]} ---")
         logging.info(f"Action_space\n: {env.env.idx_to_actions}")
+
+        if exp_config.reg_weight > 0.0:
+            logging.info(f"Regularization weight: {exp_config.reg_weight} with policy: {exp_config.human_policy_path}")
 
         # Initialize custom callback
         custom_callback = CustomMultiAgentCallback(
@@ -135,24 +137,34 @@ def train(env_config, exp_config, video_config, model_config):  # pylint: disabl
             "arch_ego_state": [8],
             "arch_road_objects": [64],
             "arch_road_graph": [128, 64],
-            "arch_shared_net": [],
+            "arch_shared_net": [128],
             "act_func": "tanh",
             "dropout": 0.0,
             "last_layer_dim_pi": 64,
             "last_layer_dim_vf": 64,
         }
     )
 
-    num_files_list = [10, 100, 1000]
+    num_files_list = [10]
+    #MEMORY = [4, 2]
+    MEMORY = [1]
 
-    for scenes in num_files_list:
-        # Set regularization weight
-        env_config.num_files = scenes
-
-        # Train
-        train(
-            env_config=env_config,
-            exp_config=exp_config,
-            video_config=video_config,
-            model_config=model_config,
-        )
+    for mem in MEMORY:
+        for num_scenes in num_files_list:
+
+            # Set memory
+            env_config.subscriber.n_frames_stacked = mem
+
+            # Set regularization weight
+            #exp_config.reg_weight = lam
+
+            exp_config.human_policy_path = f"models/il/human_policy_S{num_scenes}_2024_01_02.pt"
+            env_config.num_files = num_scenes
+
+            # Train
+            train(
+                env_config=env_config,
+                exp_config=exp_config,
+                video_config=video_config,
+                model_config=model_config,
+            )
diff --git a/experiments/hr_rl/run_hr_ppo_cli.py b/experiments/hr_rl/run_hr_ppo_cli.py
@@ -41,7 +41,6 @@
     "large": [256, 128, 64],
 }
 
-
 def run_hr_ppo(
     sweep_name: str = exp_config.group,
     steer_disc: int = 5,

diff --git a/experiments/il/run_behavioral_cloning.py b/experiments/il/run_behavioral_cloning.py
@@ -19,7 +19,7 @@
 if __name__ == "__main__":
 
     MAX_EVAL_FILES = 12
-    NUM_TRAIN_FILES = 1000
+    NUM_TRAIN_FILES = 50
 
     # Create run
     run = wandb.init(

diff --git a/networks/mlp_late_fusion.py b/networks/mlp_late_fusion.py
@@ -49,10 +49,10 @@ def __init__(
         self.arch_shared_net = arch_shared_net
         self.dropout = dropout
 
-        #TODO: write function that gets this information from config
-        self.input_dim_ego = 10
-        self.input_dim_road_graph = 6500
-        self.input_dim_road_objects = 220
+        #TODO @Daphne: write function that gets this information from config
+        self.input_dim_ego = 10 * self.config.subscriber.n_frames_stacked
+        self.input_dim_road_graph = 6500 * self.config.subscriber.n_frames_stacked
+        self.input_dim_road_objects = 220 * self.config.subscriber.n_frames_stacked
 
         # IMPORTANT:Save output dimensions, used to create the distributions
         self.latent_dim_pi = last_layer_dim_pi
@@ -198,10 +198,10 @@ def _unflatten_obs(self, obs_flat):
 
         # Visible state object order: road_objects, road_points, traffic_lights, stop_signs
         # Find the ends of each section
-        ROAD_OBJECTS_END = 13 * self.config.scenario.max_visible_objects
-        ROAD_POINTS_END = ROAD_OBJECTS_END + (13 * self.config.scenario.max_visible_road_points)
-        TL_END = ROAD_POINTS_END + (12 * self.config.scenario.max_visible_traffic_lights)
-        STOP_SIGN_END = TL_END + (3 * self.config.scenario.max_visible_stop_signs)
+        ROAD_OBJECTS_END = (13 * self.config.scenario.max_visible_objects) * self.config.subscriber.n_frames_stacked
+        ROAD_POINTS_END = (ROAD_OBJECTS_END + (13 * self.config.scenario.max_visible_road_points)) * self.config.subscriber.n_frames_stacked
+        TL_END = (ROAD_POINTS_END + (12 * self.config.scenario.max_visible_traffic_lights)) * self.config.subscriber.n_frames_stacked
+        STOP_SIGN_END = (TL_END + (3 * self.config.scenario.max_visible_stop_signs)) * self.config.subscriber.n_frames_stacked
 
         # Unflatten
         road_objects = vis_state[:, :ROAD_OBJECTS_END]
@@ -251,12 +251,14 @@ def _build_mlp_extractor(self) -> None:
     # Load environment and experiment configurations
     env_config = load_config("env_config")
     exp_config = load_config("exp_config")
+
+    env_config.subscriber.n_frames_stacked = 2
 
     # Make environment
     env = MultiAgentAsVecEnv(
         config=env_config, 
         num_envs=env_config.max_num_vehicles,
-        train_on_single_scene=exp_config.train_on_single_scene,
+
     )
 
     obs = env.reset()

diff --git a/nocturne/envs/base_env.py b/nocturne/envs/base_env.py
@@ -43,10 +43,10 @@ def __init__(  # pylint: disable=too-many-arguments
         self,
         config: Dict[str, Any],
         *,
-        img_width=1600,
-        img_height=1600,
+        img_width=1200,
+        img_height=1200,
         draw_target_positions=True,
-        padding=50.0,
+        padding=10.0,
     ) -> None:
         """Initialize a Nocturne environment.
 
@@ -488,6 +488,10 @@ def _get_obs_space_dim(self, config, base=0):
                 (3  * self.config.scenario.max_visible_stop_signs) + 
                 (12 * self.config.scenario.max_visible_traffic_lights)
             )
+
+        # Multiply by memory to get the final dimension
+        obs_space_dim = obs_space_dim * self.config.subscriber.n_frames_stacked
+
         return (obs_space_dim,)
 
     def normalize_ego_state_by_cat(self, state):
@@ -520,6 +524,8 @@ def render(self, mode: Optional[bool] = None) -> Optional[RenderType]:  # pylint
             Optional[RenderType]: Rendered image.
         """
         return self.scenario.getImage(**self._render_settings)
+
+        env.scenario.getImage(**video_config.render)
 
     def render_ego(self, mode: Optional[bool] = None) -> Optional[RenderType]:  # pylint: disable=unused-argument
         """Render the ego vehicles.

diff --git a/nocturne/envs/vec_env_ma.py b/nocturne/envs/vec_env_ma.py
@@ -26,7 +26,7 @@ class MultiAgentAsVecEnv(VecEnv):
         VecEnv (SB3 VecEnv): SB3 VecEnv base class.
     """
 
-    def __init__(self, config, num_envs, psr=False, train_on_single_scene=False):
+    def __init__(self, config, num_envs, psr=False):
         # Create Nocturne env
         self.env = BaseEnv(config)        
 
@@ -44,7 +44,7 @@ def __init__(self, config, num_envs, psr=False, train_on_single_scene=False):
         self.frac_collided = []  # Log fraction of agents that collided
         self.frac_goal_achieved = []  # Log fraction of agents that achieved their goal
         self.agents_in_scene = []
-        self.filename = self.env.files[0] if train_on_single_scene else None # If provided, always use the same file 
+        self.filename = None # If provided, always use the same file 
 
     def _reset_seeds(self) -> None:
         """Reset all environments' seeds."""
@@ -172,6 +172,11 @@ def reset_scene_dict(self):
     def step_num(self) -> List[int]:
         """The episodic timestep."""
         return self.env.step_num
+    @property
+    def render(self) -> List[int]:
+        """The episodic timestep."""
+        img = self.env.render()
+        return img
 
     def seed(self, seed=None):
         """Set the random seeds for all environments."""