Improve eval pipeline.

Emerge-Lab · Jan 2, 2024 · a9697f8 · a9697f8
1 parent 20e6cac
commit a9697f8
Show file tree

Hide file tree

Showing 14 changed files with 6,716 additions and 135 deletions.
diff --git a/configs/bc_config.yaml b/configs/bc_config.yaml
@@ -1,8 +1,8 @@
 # Model 
 save_model: true # Save model after training
-model_name: "human_policy_data_2_scenes" # Name of saved model
+model_name: "human_policy" # Name of saved model
 save_model_path: ./models/il/ # Path to save model
 
 # Train 
-total_samples: 10_000 # Number of obs-act-next_obs-done pairs to generate
-n_epochs: 100 # Training epochs
+total_samples: 150_000 # Number of obs-act-next_obs-done pairs to generate
+n_epochs: 200 # Training epochs
diff --git a/configs/env_config.yaml b/configs/env_config.yaml
@@ -8,7 +8,7 @@ env: my_custom_multi_env_v1 # name of the env, hardcoded for now
 episode_length: 80
 warmup_period: 10 # In the RL setting we use a warmup of 10 steps
 # How many files of the total dataset to use. -1 indicates to use all of them
-num_files: 20
+num_files: 100
 fix_file_order: true # If true, always select the SAME files (when creating the environent), if false, pick files at random
 sample_file_method: "random" # ALTERNATIVES: "no_replacement" (SUPPORTED) / "score-based" (TODO: @Daphne)
 dt: 0.1
@@ -18,9 +18,9 @@ discretize_actions: true
 include_head_angle: false # Whether to include the head tilt/angle as part of a vehicle's action
 accel_discretization: 5
 accel_lower_bound: -3
-accel_upper_bound: 3
-steering_lower_bound: -0.7
-steering_upper_bound: 0.7
+accel_upper_bound: 3 
+steering_lower_bound: -0.7 # steer right
+steering_upper_bound: 0.7 # steer left
 steering_discretization: 5
 max_num_vehicles: 20
 randomize_goals: false
@@ -109,4 +109,5 @@ subscriber:
   n_frames_stacked: 1 # Agent memory
 
 # Path to folder with traffic scene(s) from which to create an environment
-data_path: ./data_full/train
+data_path: ./data_full/train
+val_data_path: ./data_full/valid
diff --git a/configs/exp_config.yaml b/configs/exp_config.yaml
@@ -22,7 +22,7 @@ ma_callback:
   model_save_freq: 100 # In iterations (one iter ~ (num_agents x n_steps))
   save_video: true
   record_n_scenes: 10 # Number of different scenes to render
-  video_save_freq: 50 # Make a video every k iterations (100 iters ~ 1M steps)
+  video_save_freq: 100 # Make a video every k iterations (100 iters ~ 1M steps)
   video_deterministic: true
 
 ppo:
@@ -32,7 +32,7 @@ ppo:
   vf_coef: 0.5 # Default in SB3 is 0.5
 
 learn:
-  total_timesteps: 10_000_000
+  total_timesteps: 2_000_000
   progress_bar: false
 
 # human-regularized RL