Added final phase 1 evaluation configs and updated version for 0.5.5 …

…release
ITM-Kitware · Nov 18, 2024 · b00c4c3 · b00c4c3
1 parent e5f820d
commit b00c4c3
Show file tree

Hide file tree

Showing 5 changed files with 94 additions and 16 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,15 @@
 This changelog follows the specifications detailed in: [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release.
 
+## 0.5.5
+
+### Added
+
+* Added Phase 1 Evaluation experiment configuration files
+* Added ICL example selection method that gives larger weight to examples with the same characetr ids as the current probe. To use set `incontext.method` to `matching_characters`.
+* Added ICL example selection method that gives larger weight to examples with the same action types as the current probe. To use set `incontext.method` to `matching_actions`.
+* Added retrieved ICL examples to input-output.json
+
 ## 0.5.4
 
 ### Changed
@@ -31,8 +40,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 * Added KDMA scaling factor option. Scale factors for each KDMA are added to `align_system/prompt_engineering/kdma_descriptions.yml`
 * Added heuristic treatment options component
 * Added incontext examples to the `input_output.json` files for comparative regression
-* Added ICL example selection method that gives larger weight to examples with the same action types as the current probe. To use set `incontext.method` to `matching_actions`.
-* Added ICL example selection method that gives larger weight to examples with the same characetr ids as the current probe. To use set `incontext.method` to `matching_characters`.
 
 ### Fixed
 

diff --git a/README.md b/README.md
@@ -112,9 +112,9 @@ captured in a new configuration file.  We manage these experiments in
 delivered ADMs for the Metrics Evaluation (both to run on training
 data, and eval data).
 
-## Dry Run Evaluation ADM Invocations
+## Phase 1 Evaluation ADM Invocations
 
-We've specified Hydra experiments for the Dry Run Evaluation ADMs.
+We've specified Hydra experiments for the Phase 1 Evaluation ADMs.
 Note that by default these configurations attempt to connect to
 `https://darpaitm.caci.com` as the TA3 API endpoint, but this can be
 overridden with `interface.api_endpoint='http://127.0.0.1:8080'` on
@@ -125,32 +125,27 @@ the command line.
 (Good candidate for a smoketest)
 
 ```
-run_align_system +experiment=dry_run_evaluation/random_eval_live
+run_align_system +experiment=phase1_evaluation/random_eval_live
 ```
 
 ### Baseline ADM
 
 ```
-run_align_system +experiment=dry_run_evaluation/outlines_baseline_eval_live
+run_align_system +experiment=phase1_evaluation/baseline_eval_live
 ```
 
-### Aligned ADM 1 (Comparative Regression + ICL + Template ADM) (ADEPT eval scenarios)
+### Aligned ADM Adept (Comparative Regression + ICL + Template ADM) (ADEPT eval scenarios)
 
 ```
-run_align_system +experiment=dry_run_evaluation/comparative_regression_icl_template_eval_live_adept
+run_align_system +experiment=dry_run_evaluation/aligned_adm_adept_eval
 ```
 
-### Aligned ADM 1 (Comparative Regression + ICL + Template ADM) (SoarTech eval scenarios)
+### Aligned ADM SoarTech (Comparative Regression + ICL + Template ADM) (SoarTech eval scenarios)
 
 ```
-run_align_system +experiment=dry_run_evaluation/comparative_regression_icl_template_eval_live_soartech
+run_align_system +experiment=dry_run_evaluation/aligned_adm_soartech_eval
 ```
 
-### Aligned ADM 2 (Hybrid Regression ADM)
-
-```
-run_align_system +experiment=dry_run_evaluation/hybrid_regression_eval_live
-```
 
 ## Implementing a new ADM
 

diff --git a/align_system/configs/experiment/phase1_evaluation/aligned_adm_adept_eval.yaml b/align_system/configs/experiment/phase1_evaluation/aligned_adm_adept_eval.yaml
@@ -0,0 +1,37 @@
+# @package _global_
+defaults:
+  - override /adm: outlines_regression_aligned_comparative/incontext_phase1
+  - override /interface: ta3
+
+interface:
+  api_endpoint: "https://darpaitm.caci.com"
+  session_type: adept
+  training_session: null
+  username: "ALIGN-ADM-ComparativeRegression-Mistral-7B-Instruct-v0.2-ADEPT-10Sample"
+
+adm:
+  instance:
+    precision: half
+    sampler:
+      _target_: outlines.samplers.MultinomialSampler
+      temperature: 0.7
+    model_name: mistralai/Mistral-7B-Instruct-v0.2
+  inference_kwargs:
+    kdma_score_examples: true
+    num_samples: 10
+    predict_outcomes: false
+    generator_batch_size: 5
+    incontext:
+      sort_actions: true
+      normalization: null
+      number: 5
+      leave_one_out_strategy: null
+      most_similar_first: false
+
+sort_available_actions: true
+align_to_target: true
+save_last_unstructured_state_per_scenario: true
+
+hydra:
+  run:
+    dir: 'phase1_eval_live/ALIGN-ADM-ComparativeRegression-Mistral-7B-Instruct-v0.2-ADEPT-10Sample/${now:%Y-%m-%d__%H-%M-%S}'
diff --git a/align_system/configs/experiment/phase1_evaluation/aligned_adm_soartech_eval.yaml b/align_system/configs/experiment/phase1_evaluation/aligned_adm_soartech_eval.yaml
@@ -0,0 +1,39 @@
+# @package _global_
+defaults:
+  - override /adm: outlines_regression_aligned_comparative/incontext_phase1
+  - override /interface: ta3
+
+interface:
+  api_endpoint: "https://darpaitm.caci.com"
+  session_type: soartech
+  training_session: null
+  username: "ALIGN-ADM-ComparativeRegression-Llama-3.2-3B-Instruct-SoarTech-MatchingChars"
+adm:
+  instance:
+    precision: half
+    sampler:
+      _target_: outlines.samplers.GreedySampler
+    model_name: meta-llama/Llama-3.2-3B-Instruct
+  inference_kwargs:
+    distribution_matching: cumulative_kde
+    kde_norm: priornorm
+    priornorm_factor: 0.5
+    kdma_score_examples: true
+    num_samples: 1
+    predict_outcomes: false
+    generator_batch_size: 5
+    incontext:
+      most_similar_first: false
+      sort_actions: true
+      normalization: null
+      number: 4
+      leave_one_out_strategy: null
+      method: matching_characters
+
+force_determinism: true
+align_to_target: true
+save_last_unstructured_state_per_scenario: true
+
+hydra:
+  run:
+    dir: 'phase1_eval_live/ALIGN-ADM-ComparativeRegression-Llama-3.2-3B-Instruct-SoarTech-MatchingChars/${now:%Y-%m-%d__%H-%M-%S}'
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "align-system"
-version = "0.5.4"
+version = "0.5.5"
 description = ""
 authors = ["David Joy <10147749+dmjoy@users.noreply.github.com>"]
 readme = "README.md"