neulab · neubig · Sep 23, 2024 · Sep 15, 2024 · Sep 15, 2024 · Sep 16, 2024
diff --git a/llments/lm/base/api.py b/llments/lm/base/api.py
@@ -47,6 +47,7 @@ def __init__(self, model_name: str) -> None:
     def generate(
         self,
         message: str,
+        api_base: str | None,
         condition: str | None,
         do_sample: bool = False,
         max_length: int | None = None,
@@ -61,6 +62,7 @@ def generate(
 
         Args:
             message (str): The prompt for generating a response.
+            api_base (str): The API endpoint to call the model.
             condition (str): The conditioning sequence for the output.
                 If None, the output is not conditioned.
             do_sample (bool): Whether to use sampling or greedy decoding.
@@ -86,6 +88,7 @@ def generate(
             temperature = temperature,
             max_tokens = max_new_tokens,
             n = num_return_sequences,
+            api_base = api_base,
             messages=[{"content": message, "role": "user"}]
         )
         for choice in response['choices']:
@@ -96,6 +99,7 @@ def generate(
     def chat_generate(
         self,
         messages: list[str],
+        api_base: str | None,
         condition: str | None,
         do_sample: bool = False,
         max_length: int | None = None,
@@ -110,6 +114,7 @@ def chat_generate(
 
         Args:
             messages (list): The list of prompts for generating responses.
+            api_base (str): The API endpoint to call the model.
             condition (str): The conditioning sequence for the output.
                 If None, the output is not conditioned.
             do_sample (bool): Whether to use sampling or greedy decoding.
@@ -134,6 +139,7 @@ def chat_generate(
             temperature = temperature,
             max_tokens = max_new_tokens,
             n = num_return_sequences,
+            api_base = api_base,
             messages=[[{"content": content, "role": "user"}] for content in messages]
         )
         return [response['choices'][0]['message']['content'] for response in responses]