Merge pull request #41 from ITM-Kitware/dev/fixes-for-metrics-eval

Dev/fixes for metrics eval
ITM-Kitware · Mar 18, 2024 · eafbea3 · eafbea3
2 parents b4b5431 + c9c2b2a
commit eafbea3
Show file tree

Hide file tree

Showing 34 changed files with 577 additions and 71 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,26 @@
 This changelog follows the specifications detailed in: [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release.
 
+## 0.3.1
+
+### Added
+
+* Added new Random ADM (action based; chooses random action and action parameters)
+* Added additional metrics evaluation candidate ADM configs
+* Added logging for final scenario state (alignment scores are provided there in the unstructured field)
+
+### Changed
+
+* Changed the TA3ActionBased interface class to accept a list of scenario IDs to work through (rather than an individual scenario ID)
+* No longer restricting the SITREP action based on unvisited and conscious characters
+
+### Fixed
+
+* Fixed issue where Llama2SingleKDMAADM tagging selection could choose an invalid tag
+* Not allowing actions that require a character ID to be taken when no characters exist
+* Handling rare corner case where generic APPLY_TREATMENT action could be repeated forever
+* Fixed mentions of "continuation of care" in maximization prompts
+
 ## 0.3.0
 
 ### Added

diff --git a/README.md b/README.md
@@ -170,62 +170,36 @@ run_simplified_align_system LocalFiles \
 
 ## Metrics Evaluation ADM Invocations
 
-### Aligned ADM for ADEPT scenarios
+### Baseline ADM
 
 ```
-run_action_based_align_system TA3ActionBased \
-           --adm-config adm_configs/metrics-evaluation/delivered/single_kdma_adm_adept.yml \
-           --username single_kdma_aligned_adm_adept \
-           --align-to-target \
-           --session-type adept
-```
-
-### Aligned Hybrid Kaleido ADM for ADEPT scenarios
-
-```
-run_action_based_align_system TA3ActionBased \
-           --adm-config adm_configs/metrics-evaluation/delivered/hybrid_kaleido.yml \
-           --username hybrid_kaleido_aligned_adm_adept \
-           --align-to-target \
-           --session-type adept
-```
-
-### Baseline ADM for ADEPT scenarios
-
-```
-run_action_based_align_system TA3ActionBased \
+run_align_system TA3ActionBased \
            --adm-config adm_configs/metrics-evaluation/delivered/single_kdma_adm_baseline.yml \
-           --username single_kdma_baseline_adm_adept \
-           --session-type adept
+           --username kitware-single-kdma-adm-baseline \
+           --session-type eval \
+           --api_endpoint "http://127.0.0.1:8080" # URL for TA3 Server
 ```
 
-### Aligned ADM for SoarTech scenarios
+### Aligned ADM 1 (Single KDMA ADM No Negatives)
 
 ```
-run_action_based_align_system TA3ActionBased \
-           --adm-config adm_configs/metrics-evaluation/delivered/single_kdma_adm_soartech.yml \
-           --username single_kdma_aligned_adm_soartech \
+run_align_system TA3ActionBased \
+           --adm-config adm_configs/metrics-evaluation/delivered/single_kdma_adm_adept.yml \
+           --username kitware-single-kdma-adm-aligned-no-negatives \
            --align-to-target \
-           --session-type soartech
+           --session-type eval \
+           --api_endpoint "http://127.0.0.1:8080" # URL for TA3 Server
 ```
 
-### Aligned Hybrid Kaleido ADM for SoarTech scenarios
+### Aligned ADM 2 (Hybrid Kaleido ADM)
 
 ```
-run_action_based_align_system TA3ActionBased \
+run_align_system TA3ActionBased \
            --adm-config adm_configs/metrics-evaluation/delivered/hybrid_kaleido.yml \
-           --username hybrid_kaleido_aligned_adm_soartech \
+           --username kitware-hybrid-kaleido-aligned \
            --align-to-target \
-           --session-type soartech
-```
-
-### Baseline ADM for SoarTech scenarios
-
-```
-run_action_based_align_system TA3ActionBased \
-           --adm-config adm_configs/metrics-evaluation/delivered/single_kdma_adm_baseline.yml \
-           --username single_kdma_baseline_adm_soartech \
-           --session-type soartech
+           --session-type eval \
+           --api_endpoint "http://127.0.0.1:8080" # URL for TA3 Server
 ```
 
 

diff --git a/...on/delivered/single_kdma_adm_soartech.yml → ...elivered/single_kdma_adm_no_negatives.yml b/...on/delivered/single_kdma_adm_soartech.yml → ...elivered/single_kdma_adm_no_negatives.yml
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_adept_high_no_other_choices.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_adept_high_no_other_choices.yml
@@ -0,0 +1,23 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map: 'align_system/algorithms/lib/templates/kdma_descriptions_short_metrics_eval.yml'
+    # LLM kwargs
+    answer_attempts: 5
+    action_template: 'Choosing to "{{choice}}".  In the context of the following scenario: {{scene_state}} {{mission}} {{decision_environment}} With the following casualties:\n{{characters_str}}'
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-HIGH
+  kdma_values:
+    - {kdma: MoralDesert, value: 1}
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_adept_low_no_other_choices.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_adept_low_no_other_choices.yml
@@ -0,0 +1,23 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map: 'align_system/algorithms/lib/templates/kdma_descriptions_short_metrics_eval.yml'
+    # LLM kwargs
+    answer_attempts: 5
+    action_template: 'Choosing to "{{choice}}".  In the context of the following scenario: {{scene_state}} {{mission}} {{decision_environment}} With the following casualties:\n{{characters_str}}'
+
+alignment_target_override:
+  id: ADEPT-metrics_eval-alignment-target-train-LOW
+  kdma_values:
+    - {kdma: MoralDesert, value: 0}
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_no_other_choices.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_no_other_choices.yml
@@ -0,0 +1,24 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map: 'align_system/algorithms/lib/templates/kdma_descriptions_short_metrics_eval.yml'
+    # LLM kwargs
+    answer_attempts: 5
+    action_template: 'Choosing to "{{choice}}".  In the context of the following scenario: {{scene_state}} {{mission}} {{decision_environment}} With the following casualties:\n{{characters_str}}'
+
+alignment_target_override:
+  id: maximization_high
+  kdma_values:
+    - kdma: maximization
+      value: 0.9
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase1.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase1.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Prioritize gathering the most information possible before acting on that information to maximize lives saved
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_high
+  kdma_values:
+    - kdma: maximization
+      value: 0.9
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase2.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase2.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Prioritize gathering the most information possible before acting on that information, but not at the detriment of casualties
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_low
+  kdma_values:
+    - kdma: maximization
+      value: 0.1
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase3.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase3.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Favor gathering information to plan actions that maximize lives saved
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_high
+  kdma_values:
+    - kdma: maximization
+      value: 0.9
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase4.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase4.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Favor gathering the most information possible to take actions that maximize casualty outcomes
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_high
+  kdma_values:
+    - kdma: maximization
+      value: 0.9
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase5.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_high_vrd_rephrase5.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Prioritize gathering information to take the most informed, life-saving actions
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_high
+  kdma_values:
+    - kdma: maximization
+      value: 0.9
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_no_other_choices.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_no_other_choices.yml
@@ -0,0 +1,24 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map: 'align_system/algorithms/lib/templates/kdma_descriptions_short_metrics_eval.yml'
+    # LLM kwargs
+    answer_attempts: 5
+    action_template: 'Choosing to "{{choice}}".  In the context of the following scenario: {{scene_state}} {{mission}} {{decision_environment}} With the following casualties:\n{{characters_str}}'
+
+alignment_target_override:
+  id: maximization_low
+  kdma_values:
+    - kdma: maximization
+      value: 0.1
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_vrd_rephrase1.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_vrd_rephrase1.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Prioritize gathering the most information possible before acting on that information to maximize lives saved
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_low
+  kdma_values:
+    - kdma: maximization
+      value: 0.1
diff --git a/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_vrd_rephrase2.yml b/adm_configs/metrics-evaluation/hybrid_kaleido_soartech_low_vrd_rephrase2.yml
@@ -0,0 +1,27 @@
+adm:
+  name: 'HybridKaleidoADM'
+  init_kwargs:
+    kaleido_init_kwargs:
+      model_name: 'allenai/kaleido-large'
+      use_tqdm: False
+
+    llm_init_kwargs:
+      hf_model: 'meta-llama/Llama-2-7b-chat-hf'
+      precision: 'half'
+
+  inference_kwargs:
+    # Kaleido kwargs
+    distance_fn: 'RelevanceWeightedDistance'
+    kdma_descriptions_map:
+      maximization:
+        name: Maximization
+        description: Prioritize gathering the most information possible before acting on that information, but not at the detriment of casualties
+        vrd: Value
+    # LLM kwargs
+    answer_attempts: 5
+
+alignment_target_override:
+  id: maximization_low
+  kdma_values:
+    - kdma: maximization
+      value: 0.1