Add outlines sampler parameter; fix issue with outlines_llama3 experi…

…ments
ITM-Kitware · Jul 19, 2024 · c99ca28 · c99ca28
1 parent 6419086
commit c99ca28
Show file tree

Hide file tree

Showing 8 changed files with 93 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -16,6 +16,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 * Fixed issue with outlines ADM where responses weren't a list when only a single sample was requested
 * Fixed issue with outlines ADM during target KDMA conversion (should only run to_dict on KDMAValue objects)
 * Fixed a typo issue with outlines ADM where the positive system prompt was being used instead of the negative system prompt
+* Fixed issue with llama3 outlines ADM experiment files where the model wasn't being correctly set
 
 ### Added
 
@@ -26,6 +27,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 * Added outlines based prompts (in `align_system/prompt_engineering/outlines_prompts.py`)
 * Added dedicated function to utils for calculating votes (same voting scheme as the single KDMA ADM)
 * Added top level config options to force determinism and fix seeds; along with an example experiment to demonstrate
+* Added sampler parameter to outlines ADMs (example usage in `align_system/configs/experiment/examples/outlines_sampler.yaml`)
 
 ### Deprecated 
 * The algorithm `align_system/algorithms/chat_kdma_predicting_adm.py` has been replaced by `align_system/algorithms/outlines_regression_adm.py`

diff --git a/align_system/algorithms/outlines_adm.py b/align_system/algorithms/outlines_adm.py
@@ -3,6 +3,7 @@
 import itertools
 
 import outlines
+from outlines.samplers import MultinomialSampler
 import jinja2
 from rich.highlighter import JSONHighlighter
 from swagger_client.models import (
@@ -44,13 +45,20 @@ def __init__(self,
                  model_name,
                  device='auto',
                  baseline=False,
+                 sampler=MultinomialSampler(),
                  **kwargs):
         self.baseline = baseline
         self.model = outlines.models.transformers(
             model_name,
             device=device,
             model_kwargs=kwargs.get('model_kwargs', {}),
             tokenizer_kwargs=kwargs.get('tokenizer_kwargs', {}))
+        # NOTE: In cases where we want multiple samples, we're passing
+        # in a list of prompts (this allows us to shuffle answers in
+        # each prompt), rather than setting the number of samples in
+        # the sampler itself (which defaults to 1); setting the number
+        # of samples in the sampler may result in unexpected behavior
+        self.sampler = sampler
 
     def dialog_to_prompt(self, dialog):
         tokenizer = self.model.tokenizer.tokenizer
@@ -181,6 +189,7 @@ def top_level_choose_action(self,
         generator = outlines.generate.json(
             self.model,
             action_choice_json_schema(json.dumps(choices)),
+            sampler=self.sampler,
             whitespace_pattern=r"[ ]?")
 
         dialog_texts = [self.dialog_to_prompt(d) for d in
@@ -270,6 +279,7 @@ def choose_action(self, scenario_state, available_actions, alignment_target, **k
             generator = outlines.generate.json(
                 self.model,
                 character_choice_json_schema(json.dumps(characters)),
+                sampler=self.sampler,
                 whitespace_pattern=r"[ ]?")
 
             log.info("[bold]*DIALOG PROMPT*[/bold]",
@@ -305,6 +315,7 @@ def choose_action(self, scenario_state, available_actions, alignment_target, **k
                 treatment_choice_json_schema(
                     json.dumps([s.type for s in available_supplies]),
                     json.dumps(valid_treatment_locations)),
+                sampler=self.sampler,
                 whitespace_pattern=r"[ ]?")
 
             log.info("[bold]*DIALOG PROMPT*[/bold]",
@@ -343,6 +354,7 @@ def choose_action(self, scenario_state, available_actions, alignment_target, **k
                 self.model,
                 tag_choice_json_schema(
                     json.dumps(valid_tags)),
+                sampler=self.sampler,
                 whitespace_pattern=r"[ ]?")
 
             log.info("[bold]*DIALOG PROMPT*[/bold]",

diff --git a/align_system/algorithms/outlines_regression_adm.py b/align_system/algorithms/outlines_regression_adm.py
@@ -1,11 +1,12 @@
 import json
 import random
-import os 
+import os
 import pathlib
 import yaml
 import itertools
 
 import outlines
+from outlines.samplers import MultinomialSampler
 from rich.highlighter import JSONHighlighter
 from swagger_client.models import (
     ActionTypeEnum,
@@ -30,15 +31,28 @@
 log = logging.getLogger(__name__)
 JSON_HIGHLIGHTER = JSONHighlighter()
 
-# TODO - make this configurable 
+# TODO - make this configurable
 KDMA_DESCRIPTIONS_FILE_PATH = os.path.join(
     pathlib.Path(__file__).parent.absolute(), '..',
     'prompt_engineering/kdma_descriptions.yml')
 
+
+# Function borrowed from
+# https://docs.python.org/3/library/itertools.html#itertools.batched
+# (since itertools.batched is only available in Python 3.12 or newer):
+def batched(iterable, n):
+    # batched('ABCDEFG', 3) --> ABC DEF G
+    if n < 1:
+        raise ValueError('n must be at least one')
+    iterator = iter(iterable)
+    while batch := tuple(itertools.islice(iterator, n)):
+        yield batch
+
+
 def run_in_batches(inference_function, inputs, batch_size):
     ''' Batch inference to avoid out of memory error'''
     outputs = []
-    for batch in itertools.batched(inputs, batch_size):
+    for batch in batched(inputs, batch_size):
         outputs.extend(inference_function(list(batch)))
     return outputs
 
@@ -47,18 +61,25 @@ def __init__(self,
                  model_name,
                  device='auto',
                  baseline=False,
+                 sampler=MultinomialSampler(),
                  **kwargs):
         self.baseline = baseline
         self.model = outlines.models.transformers(
             model_name,
             device=device,
             model_kwargs=kwargs.get('model_kwargs', {}),
             tokenizer_kwargs=kwargs.get('tokenizer_kwargs', {}))
+        # NOTE: In cases where we want multiple samples, we're passing
+        # in a list of prompts (this allows us to shuffle answers in
+        # each prompt), rather than setting the number of samples in
+        # the sampler itself (which defaults to 1); setting the number
+        # of samples in the sampler may result in unexpected behavior
+        self.sampler = sampler
 
 
-    def sample_outcome_predictions(self, 
-                                   scenario_description, 
-                                   choices, 
+    def sample_outcome_predictions(self,
+                                   scenario_description,
+                                   choices,
                                    num_samples=1,
                                    batch_size=5):
         '''
@@ -80,6 +101,7 @@ def sample_outcome_predictions(self,
         outcome_generator = outlines.generate.json(
             self.model,
             outcome_prediction_json_schema(),
+            sampler=self.sampler,
             whitespace_pattern=r"[ ]?")
 
         outcome_dialog_texts = [self.dialog_to_prompt(d) for d in outcome_dialogs]
@@ -100,11 +122,11 @@ def sample_outcome_predictions(self,
         return predicted_outcomes
 
 
-    def sample_kdma_score_predictions(self, 
-                                      scenario_description, 
-                                      choices, 
-                                      target_kdmas, 
-                                      predicted_outcomes=None, 
+    def sample_kdma_score_predictions(self,
+                                      scenario_description,
+                                      choices,
+                                      target_kdmas,
+                                      predicted_outcomes=None,
                                       num_samples=1,
                                       batch_size=6):
         '''
@@ -118,7 +140,7 @@ def sample_kdma_score_predictions(self,
         # loop over samples
         for sample_idx in range(num_samples):
             # loop over target kdmas
-            for target_kdma in target_kdmas: 
+            for target_kdma in target_kdmas:
                 kdma_score_sys_prompt = kdma_score_prediction_system_prompt(target_kdma['name'], target_kdma['description'])
                 # loop over choices
                 for choice_idx in range(len(choices)):
@@ -138,6 +160,7 @@ def sample_kdma_score_predictions(self,
         kdma_score_generator = outlines.generate.json(
             self.model,
             kdma_score_prediction_json_schema(),
+            sampler=self.sampler,
             whitespace_pattern=r"[ ]?")
 
         kdma_dialog_texts = [self.dialog_to_prompt(d) for d in kdma_dialogs]
@@ -161,8 +184,8 @@ def sample_kdma_score_predictions(self,
 
 
     # TODO - create a separate class for distribution matching approaches
-    # (each with a __call__ method) so we can specify the class target and 
-    # initialize in our hydra configs. 
+    # (each with a __call__ method) so we can specify the class target and
+    # initialize in our hydra configs.
 
     def average_distribution_matching(self, predicted_kdma_values, target_kdmas):
         '''
@@ -207,7 +230,7 @@ def mse(target_kdma_values, predicted_kdma_values):
                 min_mse = mse_
                 choice_idx = i
         selected_choice = choices[choice_idx]
-        
+
         # If outcomes were predicted, add to reasoning
         if predicted_kdma_values[selected_choice]['outcomes']:
             reasoning = 'The predicted outcome for choice ' + selected_choice + ' was: '
@@ -263,7 +286,7 @@ def top_level_choose_action(self,
 
         target_kdmas = alignment_target.kdma_values
 
-        # Get kdma names and descriptions 
+        # Get kdma names and descriptions
         with open(KDMA_DESCRIPTIONS_FILE_PATH, 'r') as f:
             kdma_descriptions = yaml.load(f, Loader=yaml.FullLoader)
         # Add names and descriptions to target_kdmas
@@ -336,4 +359,4 @@ def top_level_choose_action(self,
         dialog = [{'role': 'system', 'content': alignment_system_prompt},
                   {'role': 'user', 'content': prompt}]
 
-        return action_to_take, dialog
+        return action_to_take, dialog
diff --git a/align_system/configs/experiment/examples/outlines_sampler.yaml b/align_system/configs/experiment/examples/outlines_sampler.yaml
@@ -0,0 +1,27 @@
+# @package _global_
+defaults:
+  - /alignment_target: moral_deservingness_high
+  - override /adm: outlines_transformers_structured_aligned
+
+adm:
+  instance:
+    model_name: meta-llama/Meta-Llama-3-8B
+
+    # For greedy sampling
+    # sampler:
+    #   _target_: outlines.samplers.GreedySampler
+
+    # For multinomial sampling with temperature 0.7 (default is 1.0 if
+    # not specified)
+    sampler:
+      _target_: outlines.samplers.MultinomialSampler
+      temperature: 0.7
+
+    # NOTE: In cases where we want multiple samples, we're
+    # passing in a list of prompts (this allows us to shuffle
+    # answers in each prompt), rather than setting the number of
+    # samples in the sampler itself (which defaults to 1); setting
+    # the number of samples in the sampler may result in
+    # unexpected behavior
+
+align_to_target: true
diff --git a/...configs/experiment/metrics_refinement_evaluation/outlines_llama3_adept_high_training.yaml b/...configs/experiment/metrics_refinement_evaluation/outlines_llama3_adept_high_training.yaml
@@ -4,7 +4,9 @@ defaults:
   - override /adm: outlines_transformers_structured_aligned
   - override /interface: ta3
 
-adm.instance.model_name: meta-llama/Meta-Llama-3-8B
+adm:
+  instance:
+    model_name: meta-llama/Meta-Llama-3-8B
 
 interface:
   session_type: adept

diff --git a/.../configs/experiment/metrics_refinement_evaluation/outlines_llama3_adept_low_training.yaml b/.../configs/experiment/metrics_refinement_evaluation/outlines_llama3_adept_low_training.yaml
@@ -4,7 +4,9 @@ defaults:
   - override /adm: outlines_transformers_structured_aligned
   - override /interface: ta3
 
-adm.instance.model_name: meta-llama/Meta-Llama-3-8B
+adm:
+  instance:
+    model_name: meta-llama/Meta-Llama-3-8B
 
 interface:
   session_type: adept

diff --git a/...figs/experiment/metrics_refinement_evaluation/outlines_llama3_soartech_high_training.yaml b/...figs/experiment/metrics_refinement_evaluation/outlines_llama3_soartech_high_training.yaml
@@ -4,7 +4,9 @@ defaults:
   - override /adm: outlines_transformers_structured_aligned
   - override /interface: ta3
 
-adm.instance.model_name: meta-llama/Meta-Llama-3-8B
+adm:
+  instance:
+    model_name: meta-llama/Meta-Llama-3-8B
 
 interface:
   session_type: soartech

diff --git a/...nfigs/experiment/metrics_refinement_evaluation/outlines_llama3_soartech_low_training.yaml b/...nfigs/experiment/metrics_refinement_evaluation/outlines_llama3_soartech_low_training.yaml
@@ -4,7 +4,9 @@ defaults:
   - override /adm: outlines_transformers_structured_aligned
   - override /interface: ta3
 
-adm.instance.model_name: meta-llama/Meta-Llama-3-8B
+adm:
+  instance:
+    model_name: meta-llama/Meta-Llama-3-8B
 
 interface:
   session_type: soartech