tweaks for per-call temperature setting

jardinetsouffleton · jardinetsouffleton · commit e5d5171dde08 · 2024-12-04T22:57:42.000Z
diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py
@@ -261,7 +261,7 @@ def __init__(
             **client_args,
         )
 
-    def __call__(self, messages: list[dict], n_samples: int = 1) -> dict:
+    def __call__(self, messages: list[dict], n_samples: int = 1, temperature: float = None) -> dict:
         # Initialize retry tracking attributes
         self.retries = 0
         self.success = False
@@ -271,12 +271,13 @@ def __call__(self, messages: list[dict], n_samples: int = 1) -> dict:
         e = None
         for itr in range(self.max_retry):
             self.retries += 1
+            temperature = temperature if temperature is not None else self.temperature
             try:
                 completion = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=messages,
                     n=n_samples,
-                    temperature=self.temperature,
+                    temperature=temperature,
                     max_tokens=self.max_tokens,
                 )
 
@@ -414,11 +415,10 @@ def __init__(
         super().__init__(model_name, n_retry_server)
         if temperature < 1e-3:
             logging.warning("Models might behave weirdly when temperature is too low.")
+        self.temperature = temperature
 
         if token is None:
             token = os.environ["TGI_TOKEN"]
 
         client = InferenceClient(model=model_url, token=token)
-        self.llm = partial(
-            client.text_generation, temperature=temperature, max_new_tokens=max_new_tokens
-        )
+        self.llm = partial(client.text_generation, max_new_tokens=max_new_tokens)
diff --git a/src/agentlab/llm/huggingface_utils.py b/src/agentlab/llm/huggingface_utils.py
@@ -56,13 +56,15 @@ def __call__(
         self,
         messages: list[dict],
         n_samples: int = 1,
+        temperature: float = None,
     ) -> Union[AIMessage, List[AIMessage]]:
         """
         Generate one or more responses for the given messages.
 
         Args:
             messages: List of message dictionaries containing the conversation history.
             n_samples: Number of independent responses to generate. Defaults to 1.
+            temperature: The temperature for response sampling. Defaults to None.
 
         Returns:
             If n_samples=1, returns a single AIMessage.
@@ -91,7 +93,8 @@ def __call__(
             itr = 0
             while True:
                 try:
-                    response = AIMessage(self.llm(prompt))
+                    temperature = temperature if temperature is not None else self.temperature
+                    response = AIMessage(self.llm(prompt, temperature=temperature))
                     responses.append(response)
                     break
                 except Exception as e: