Rename safe_policy_checkpoints -> base_policy_checkpoints

cassidylaidlaw · Nov 11, 2024 · 95cd1fa · 95cd1fa
1 parent a3f6ed7
commit 95cd1fa
Show file tree

Hide file tree

Showing 31 changed files with 1 addition and 1 deletion.
diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@ All Python code is under the `occupancy_measures` package. Run
 to install dependencies.
 
 ## Training the ORPO policies
-Checkpoints for the behavioral cloning (BC) trained base policies are stored within the `data/safe_policy_checkpoints` directory. For now, these checkpoints were generated in Python 3.9, but in the future, we will provide checkpoints that work with all python versions. You can use these checkpoints to train your own ORPO policies using the following commands: 
+Checkpoints for the behavioral cloning (BC) trained base policies are stored within the `data/base_policy_checkpoints` directory. For now, these checkpoints were generated in Python 3.9, but in the future, we will provide checkpoints that work with all python versions. You can use these checkpoints to train your own ORPO policies using the following commands: 
 
 - state-action occupancy measure regularization:
 ```

diff --git a/...e_policy/checkpoint_000300/.is_checkpoint → ...e_policy/checkpoint_000300/.is_checkpoint b/...e_policy/checkpoint_000300/.is_checkpoint → ...e_policy/checkpoint_000300/.is_checkpoint
diff --git a/...e_policy/checkpoint_000300/.tune_metadata → ...e_policy/checkpoint_000300/.tune_metadata b/...e_policy/checkpoint_000300/.tune_metadata → ...e_policy/checkpoint_000300/.tune_metadata
diff --git a/...icy/checkpoint_000300/algorithm_state.pkl → ...icy/checkpoint_000300/algorithm_state.pkl b/...icy/checkpoint_000300/algorithm_state.pkl → ...icy/checkpoint_000300/algorithm_state.pkl
diff --git a/...00/policies/safe_policy0/policy_state.pkl → ...00/policies/safe_policy0/policy_state.pkl b/...00/policies/safe_policy0/policy_state.pkl → ...00/policies/safe_policy0/policy_state.pkl
diff --git a/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json b/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json
diff --git a/...y/checkpoint_000300/rllib_checkpoint.json → ...y/checkpoint_000300/rllib_checkpoint.json b/...y/checkpoint_000300/rllib_checkpoint.json → ...y/checkpoint_000300/rllib_checkpoint.json
diff --git a/...eckpoints/glucose_safe_policy/params.json → ...eckpoints/glucose_base_policy/params.json b/...eckpoints/glucose_safe_policy/params.json → ...eckpoints/glucose_base_policy/params.json
diff --git a/...heckpoints/glucose_safe_policy/params.pkl → ...heckpoints/glucose_base_policy/params.pkl b/...heckpoints/glucose_safe_policy/params.pkl → ...heckpoints/glucose_base_policy/params.pkl
diff --git a/...e_policy/checkpoint_000100/.is_checkpoint → ...e_policy/checkpoint_000100/.is_checkpoint b/...e_policy/checkpoint_000100/.is_checkpoint → ...e_policy/checkpoint_000100/.is_checkpoint
diff --git a/...e_policy/checkpoint_000100/.tune_metadata → ...e_policy/checkpoint_000100/.tune_metadata b/...e_policy/checkpoint_000100/.tune_metadata → ...e_policy/checkpoint_000100/.tune_metadata
diff --git a/...icy/checkpoint_000100/algorithm_state.pkl → ...icy/checkpoint_000100/algorithm_state.pkl b/...icy/checkpoint_000100/algorithm_state.pkl → ...icy/checkpoint_000100/algorithm_state.pkl
diff --git a/...00/policies/safe_policy0/policy_state.pkl → ...00/policies/safe_policy0/policy_state.pkl b/...00/policies/safe_policy0/policy_state.pkl → ...00/policies/safe_policy0/policy_state.pkl
diff --git a/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json b/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json
diff --git a/...y/checkpoint_000100/rllib_checkpoint.json → ...y/checkpoint_000100/rllib_checkpoint.json b/...y/checkpoint_000100/rllib_checkpoint.json → ...y/checkpoint_000100/rllib_checkpoint.json
diff --git a/...ckpoints/pandemic_safe_policy/params.json → ...ckpoints/pandemic_base_policy/params.json b/...ckpoints/pandemic_safe_policy/params.json → ...ckpoints/pandemic_base_policy/params.json
diff --git a/...eckpoints/pandemic_safe_policy/params.pkl → ...eckpoints/pandemic_base_policy/params.pkl b/...eckpoints/pandemic_safe_policy/params.pkl → ...eckpoints/pandemic_base_policy/params.pkl
diff --git a/...e_policy/checkpoint_000003/.is_checkpoint → ...e_policy/checkpoint_000003/.is_checkpoint b/...e_policy/checkpoint_000003/.is_checkpoint → ...e_policy/checkpoint_000003/.is_checkpoint
diff --git a/...e_policy/checkpoint_000003/.tune_metadata → ...e_policy/checkpoint_000003/.tune_metadata b/...e_policy/checkpoint_000003/.tune_metadata → ...e_policy/checkpoint_000003/.tune_metadata
diff --git a/...icy/checkpoint_000003/algorithm_state.pkl → ...icy/checkpoint_000003/algorithm_state.pkl b/...icy/checkpoint_000003/algorithm_state.pkl → ...icy/checkpoint_000003/algorithm_state.pkl
diff --git a/.../policies/default_policy/policy_state.pkl → .../policies/default_policy/policy_state.pkl b/.../policies/default_policy/policy_state.pkl → .../policies/default_policy/policy_state.pkl
diff --git a/...cies/default_policy/rllib_checkpoint.json → ...cies/default_policy/rllib_checkpoint.json b/...cies/default_policy/rllib_checkpoint.json → ...cies/default_policy/rllib_checkpoint.json
diff --git a/...y/checkpoint_000003/rllib_checkpoint.json → ...y/checkpoint_000003/rllib_checkpoint.json b/...y/checkpoint_000003/rllib_checkpoint.json → ...y/checkpoint_000003/rllib_checkpoint.json
diff --git a/...heckpoints/tomato_safe_policy/params.json → ...heckpoints/tomato_base_policy/params.json b/...heckpoints/tomato_safe_policy/params.json → ...heckpoints/tomato_base_policy/params.json
diff --git a/...checkpoints/tomato_safe_policy/params.pkl → ...checkpoints/tomato_base_policy/params.pkl b/...checkpoints/tomato_safe_policy/params.pkl → ...checkpoints/tomato_base_policy/params.pkl
diff --git a/...icy/checkpoint_000025/algorithm_state.pkl → ...icy/checkpoint_000025/algorithm_state.pkl b/...icy/checkpoint_000025/algorithm_state.pkl → ...icy/checkpoint_000025/algorithm_state.pkl
diff --git a/...25/policies/safe_policy0/policy_state.pkl → ...25/policies/safe_policy0/policy_state.pkl b/...25/policies/safe_policy0/policy_state.pkl → ...25/policies/safe_policy0/policy_state.pkl
diff --git a/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json b/...licies/safe_policy0/rllib_checkpoint.json → ...licies/safe_policy0/rllib_checkpoint.json
diff --git a/...y/checkpoint_000025/rllib_checkpoint.json → ...y/checkpoint_000025/rllib_checkpoint.json b/...y/checkpoint_000025/rllib_checkpoint.json → ...y/checkpoint_000025/rllib_checkpoint.json
diff --git a/...eckpoints/traffic_safe_policy/params.json → ...eckpoints/traffic_base_policy/params.json b/...eckpoints/traffic_safe_policy/params.json → ...eckpoints/traffic_base_policy/params.json
diff --git a/...heckpoints/traffic_safe_policy/params.pkl → ...heckpoints/traffic_base_policy/params.pkl b/...heckpoints/traffic_safe_policy/params.pkl → ...heckpoints/traffic_base_policy/params.pkl