From b13d95209c9ef3084cbd317b37d38c7548d861e1 Mon Sep 17 00:00:00 2001 From: justin-together Date: Tue, 11 Feb 2025 13:49:15 -0800 Subject: [PATCH 01/11] Add together.ai API --- .../bfcl/eval_checker/model_metadata.py | 56 +++++++++- .../model_handler/api_inference/together.py | 104 ++++++++++++++++++ .../bfcl/model_handler/handler_map.py | 10 ++ .../bfcl/model_handler/model_style.py | 1 + .../bfcl/model_handler/utils.py | 2 + 5 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index 6362cb7563..cfde4cfd96 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -894,7 +894,61 @@ "https://huggingface.co/uiuc-convai/CALM-405B", "UIUC + Oumi" "Meta Llama 3 Community" - ] + ], + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-3.3-70B-Instruct-Turbo": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "mistralai/Mixtral-8x7B-Instruct-v0.1": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "mistralai/Mistral-7B-Instruct-v0.1": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "Qwen/Qwen2.5-7B-Instruct-Turbo": [ + "Qwen2.5-7B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2.5-72B-Instruct-Turbo": [ + "Qwen2.5-72B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", + "Qwen", + "qwen", + ], + "deepseek-ai/DeepSeek-V3": [ + "DeepSeek-V3 (FC)", + "https://api-docs.deepseek.com/news/news1226", + "DeepSeek", + "DeepSeek License", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py new file mode 100644 index 0000000000..33ef7b942c --- /dev/null +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py @@ -0,0 +1,104 @@ +import os +import time + +from bfcl.model_handler.model_style import ModelStyle +from bfcl.model_handler.api_inference.openai import OpenAIHandler +from openai import OpenAI + + +class TogetherHandler(OpenAIHandler): + def __init__(self, model_name, temperature) -> None: + super().__init__(model_name, temperature) + self.model_style = ModelStyle.TOGETHER_AI + self.client = OpenAI( + base_url="https://api.together.xyz/v1", + api_key=os.getenv("TOGETHER_API_KEY"), + ) + + #### FC methods #### + + def _query_FC(self, inference_data: dict): + message: list[dict] = inference_data["message"] + tools = inference_data["tools"] + inference_data["inference_input_log"] = {"message": message, "tools": tools} + + start_time = time.time() + if len(tools) > 0: + api_response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=self.temperature, + tools=tools, + ) + else: + api_response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=self.temperature, + ) + end_time = time.time() + + return api_response, end_time - start_time + + def _pre_query_processing_FC(self, inference_data: dict, test_entry: dict) -> dict: + return super()._pre_query_processing_FC(inference_data, test_entry) + + def _compile_tools(self, inference_data: dict, test_entry: dict) -> dict: + return super()._compile_tools(inference_data, test_entry) + + def _parse_query_response_FC(self, api_response: any) -> dict: + try: + model_responses = [ + {func_call.function.name: func_call.function.arguments} + for func_call in api_response.choices[0].message.tool_calls + ] + tool_calls = [ + tool_call.model_dump() + for tool_call in api_response.choices[0].message.tool_calls + ] + except: + model_responses = api_response.choices[0].message.content + tool_calls = [] + + return { + "model_responses": model_responses, + "model_responses_message_for_chat_history": tool_calls, + "input_token": api_response.usage.prompt_tokens, + "output_token": api_response.usage.completion_tokens, + } + + def add_first_turn_message_FC( + self, inference_data: dict, first_turn_message: list[dict] + ) -> dict: + return super().add_first_turn_message_FC(inference_data, first_turn_message) + + def _add_next_turn_user_message_FC( + self, inference_data: dict, user_message: list[dict] + ) -> dict: + return super()._add_next_turn_user_message_FC(inference_data, user_message) + + def _add_assistant_message_FC( + self, inference_data: dict, model_response_data: dict + ) -> dict: + inference_data["message"].append( + { + "role": "assistant", + "content": "", + "tool_calls": model_response_data[ + "model_responses_message_for_chat_history" + ], + } + ) + return inference_data + + def _add_execution_results_FC( + self, inference_data: dict, execution_results: list[str], model_response_data: dict + ) -> dict: + for execution_result in execution_results: + tool_message = { + "role": "tool", + "content": execution_result, + } + inference_data["message"].append(tool_message) + + return inference_data diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 236417661c..871de69874 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -13,6 +13,7 @@ from bfcl.model_handler.api_inference.nova import NovaHandler from bfcl.model_handler.api_inference.nvidia import NvidiaHandler from bfcl.model_handler.api_inference.openai import OpenAIHandler +from bfcl.model_handler.api_inference.together import TogetherHandler from bfcl.model_handler.api_inference.writer import WriterHandler from bfcl.model_handler.api_inference.yi import YiHandler from bfcl.model_handler.local_inference.bielik import BielikHandler @@ -102,6 +103,15 @@ # "yi-large-fc": YiHandler, # Their API is under maintenance, and will not be back online in the near future "palmyra-x-004": WriterHandler, "grok-beta": GrokHandler, + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": TogetherHandler, + "meta-llama/Llama-3.3-70B-Instruct-Turbo": TogetherHandler, + "mistralai/Mixtral-8x7B-Instruct-v0.1": TogetherHandler, + "mistralai/Mistral-7B-Instruct-v0.1": TogetherHandler, + "Qwen/Qwen2.5-7B-Instruct-Turbo": TogetherHandler, + "Qwen/Qwen2.5-72B-Instruct-Turbo": TogetherHandler, + "deepseek-ai/DeepSeek-V3": TogetherHandler, } # Inference through local hosting diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py b/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py index 7c26f8e859..21432e10dc 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/model_style.py @@ -10,6 +10,7 @@ class ModelStyle(Enum): Google = "google" AMAZON = "amazon" FIREWORK_AI = "firework_ai" + TOGETHER_AI = "together_ai" NEXUS = "nexus" OSSMODEL = "ossmodel" COHERE = "cohere" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py index 59f10593f8..e6e9f54f6d 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py @@ -147,6 +147,7 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.Anthropic, ModelStyle.Google, ModelStyle.FIREWORK_AI, + ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ModelStyle.AMAZON, ]: @@ -166,6 +167,7 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.OpenAI, ModelStyle.Mistral, ModelStyle.FIREWORK_AI, + ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ]: oai_tool.append({"type": "function", "function": item}) From fe3fcdc28c7d7de95e9bfa2b7d49b8304aa77498 Mon Sep 17 00:00:00 2001 From: justin-together Date: Thu, 10 Apr 2025 19:52:45 -0700 Subject: [PATCH 02/11] Add FC variants of new models --- .../bfcl/eval_checker/model_metadata.py | 78 +++++++++++++++++++ .../model_handler/api_inference/together.py | 4 +- .../bfcl/model_handler/handler_map.py | 13 ++++ 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index cfde4cfd96..15fb65748f 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -901,54 +901,132 @@ "Meta", "Meta Llama 3 Community", ], + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC": [ + "Llama-3.1-8B-Instruct (FC)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": [ "Llama-3.1-8B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC": [ + "Llama-3.1-8B-Instruct (FC)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": [ "Llama-3.1-8B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], "meta-llama/Llama-3.3-70B-Instruct-Turbo": [ "Llama-3.1-8B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], + "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-4-Scout-17B-16E-Instruct": [ + "Llama 4 Scout (17Bx16E)", + "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC": [ + "Llama 4 Scout (17Bx16E)", + "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": [ + "Llama 4 Maverick (17Bx128E)", + "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC": [ + "Llama 4 Maverick (17Bx128E)", + "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "Meta", + "Meta Llama 4 Community", + ], "mistralai/Mixtral-8x7B-Instruct-v0.1": [ "Open-Mixtral-8x7b (Prompt)", "https://mistral.ai/news/mixtral-of-experts/", "Mistral AI", "Proprietary", ], + "mistralai/Mixtral-8x7B-Instruct-v0.1-FC": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], "mistralai/Mistral-7B-Instruct-v0.1": [ "Open-Mixtral-8x7b (Prompt)", "https://mistral.ai/news/mixtral-of-experts/", "Mistral AI", "Proprietary", ], + "mistralai/Mistral-7B-Instruct-v0.1-FC": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], "Qwen/Qwen2.5-7B-Instruct-Turbo": [ "Qwen2.5-7B-Instruct (Prompt)", "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", "Qwen", "apache-2.0", ], + "Qwen/Qwen2.5-7B-Instruct-Turbo-FC": [ + "Qwen2.5-7B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], "Qwen/Qwen2.5-72B-Instruct-Turbo": [ "Qwen2.5-72B-Instruct (Prompt)", "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", "Qwen", "qwen", ], + "Qwen/Qwen2.5-72B-Instruct-Turbo-FC": [ + "Qwen2.5-72B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", + "Qwen", + "qwen", + ], "deepseek-ai/DeepSeek-V3": [ "DeepSeek-V3 (FC)", "https://api-docs.deepseek.com/news/news1226", "DeepSeek", "DeepSeek License", ], + "deepseek-ai/DeepSeek-V3-FC": [ + "DeepSeek-V3 (FC)", + "https://api-docs.deepseek.com/news/news1226", + "DeepSeek", + "DeepSeek License", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py index 33ef7b942c..3867565aed 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py @@ -26,14 +26,14 @@ def _query_FC(self, inference_data: dict): if len(tools) > 0: api_response = self.client.chat.completions.create( messages=message, - model=self.model_name, + model=self.model_name.replace('-FC', ''), temperature=self.temperature, tools=tools, ) else: api_response = self.client.chat.completions.create( messages=message, - model=self.model_name, + model=self.model_name.replace('-FC', ''), temperature=self.temperature, ) end_time = time.time() diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 871de69874..af6fad2d0c 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -104,14 +104,27 @@ "palmyra-x-004": WriterHandler, "grok-beta": GrokHandler, "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC": TogetherHandler, "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC": TogetherHandler, "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": TogetherHandler, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC": TogetherHandler, "meta-llama/Llama-3.3-70B-Instruct-Turbo": TogetherHandler, + "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC": TogetherHandler, + "meta-llama/Llama-4-Scout-17B-16E-Instruct": TogetherHandler, + "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC": TogetherHandler, + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": TogetherHandler, + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC": TogetherHandler, "mistralai/Mixtral-8x7B-Instruct-v0.1": TogetherHandler, + "mistralai/Mixtral-8x7B-Instruct-v0.1-FC": TogetherHandler, "mistralai/Mistral-7B-Instruct-v0.1": TogetherHandler, + "mistralai/Mistral-7B-Instruct-v0.1-FC": TogetherHandler, "Qwen/Qwen2.5-7B-Instruct-Turbo": TogetherHandler, + "Qwen/Qwen2.5-7B-Instruct-Turbo-FC": TogetherHandler, "Qwen/Qwen2.5-72B-Instruct-Turbo": TogetherHandler, + "Qwen/Qwen2.5-72B-Instruct-Turbo-FC": TogetherHandler, "deepseek-ai/DeepSeek-V3": TogetherHandler, + "deepseek-ai/DeepSeek-V3-FC": TogetherHandler, } # Inference through local hosting From 5c3b1bf56bfc5da3fde3de9193efbd65023098dc Mon Sep 17 00:00:00 2001 From: justin-together Date: Wed, 16 Apr 2025 13:46:34 -0700 Subject: [PATCH 03/11] Update integration following Novita integration --- .../.env.example | 4 +- .../bfcl/constants/model_metadata.py | 134 +++++++++++++++++- .../model_handler/api_inference/together.py | 87 +++--------- .../bfcl/model_handler/handler_map.py | 44 +++--- .../bfcl/model_handler/utils.py | 5 +- 5 files changed, 178 insertions(+), 96 deletions(-) diff --git a/berkeley-function-call-leaderboard/.env.example b/berkeley-function-call-leaderboard/.env.example index faa98f5c50..a81efc627c 100644 --- a/berkeley-function-call-leaderboard/.env.example +++ b/berkeley-function-call-leaderboard/.env.example @@ -10,7 +10,6 @@ COHERE_API_KEY= GROK_API_KEY=xai-XXXXXX GOGOAGENT_API_KEY= WRITER_API_KEY= -TOGETHER_API_KEY= # We use Vertex AI to inference Google Gemini models VERTEX_AI_PROJECT_ID= @@ -25,6 +24,9 @@ DATABRICKS_AZURE_ENDPOINT_URL= # [OPTIONAL] For inference via Novita AI endpoint NOVITA_API_KEY=sk-XXXXXX +# [OPTIONAL] For inference via Together AI endpoint +TOGETHER_API_KEY= + # [OPTIONAL] For local vllm/sglang server configuration # Defaults to localhost port 1053 if not provided VLLM_ENDPOINT=localhost diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py index 2f3a1c0e12..77979ed5e6 100644 --- a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py @@ -983,7 +983,139 @@ "https://huggingface.co/Qwen/QwQ-32B", "Qwen", "apache-2.0", - ] + ], + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC-together": [ + "Llama-3.1-8B-Instruct (FC)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC-together": [ + "Llama-3.1-8B-Instruct (FC)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-3.3-70B-Instruct-Turbo-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": [ + "Llama-3.1-8B-Instruct (Prompt)", + "https://llama.meta.com/llama3", + "Meta", + "Meta Llama 3 Community", + ], + "meta-llama/Llama-4-Scout-17B-16E-Instruct-together": [ + "Llama 4 Scout (17Bx16E)", + "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC-together": [ + "Llama 4 Scout (17Bx16E)", + "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-together": [ + "Llama 4 Maverick (17Bx128E)", + "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "Meta", + "Meta Llama 4 Community", + ], + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC-together": [ + "Llama 4 Maverick (17Bx128E)", + "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "Meta", + "Meta Llama 4 Community", + ], + "mistralai/Mixtral-8x7B-Instruct-v0.1-together": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "mistralai/Mistral-7B-Instruct-v0.1-together": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "mistralai/Mistral-7B-Instruct-v0.1-FC-together": [ + "Open-Mixtral-8x7b (Prompt)", + "https://mistral.ai/news/mixtral-of-experts/", + "Mistral AI", + "Proprietary", + ], + "Qwen/Qwen2.5-7B-Instruct-Turbo-together": [ + "Qwen2.5-7B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": [ + "Qwen2.5-7B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2.5-72B-Instruct-Turbo-together": [ + "Qwen2.5-72B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", + "Qwen", + "qwen", + ], + "Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": [ + "Qwen2.5-72B-Instruct (Prompt)", + "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", + "Qwen", + "qwen", + ], + "deepseek-ai/DeepSeek-V3-together": [ + "DeepSeek-V3 (FC)", + "https://api-docs.deepseek.com/news/news1226", + "DeepSeek", + "DeepSeek License", + ], + "deepseek-ai/DeepSeek-V3-FC-together": [ + "DeepSeek-V3 (FC)", + "https://api-docs.deepseek.com/news/news1226", + "DeepSeek", + "DeepSeek License", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py index 3867565aed..a0e77008d6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/api_inference/together.py @@ -1,8 +1,7 @@ import os -import time -from bfcl.model_handler.model_style import ModelStyle from bfcl.model_handler.api_inference.openai import OpenAIHandler +from bfcl.model_handler.model_style import ModelStyle from openai import OpenAI @@ -20,85 +19,33 @@ def __init__(self, model_name, temperature) -> None: def _query_FC(self, inference_data: dict): message: list[dict] = inference_data["message"] tools = inference_data["tools"] - inference_data["inference_input_log"] = {"message": message, "tools": tools} + inference_data["inference_input_log"] = { + "message": repr(message), + "tools": tools, + } - start_time = time.time() if len(tools) > 0: - api_response = self.client.chat.completions.create( + return self.generate_with_backoff( messages=message, - model=self.model_name.replace('-FC', ''), + model=self.model_name.replace("-FC", "").replace("-together", ""), temperature=self.temperature, tools=tools, ) else: - api_response = self.client.chat.completions.create( + + return self.generate_with_backoff( messages=message, - model=self.model_name.replace('-FC', ''), + model=self.model_name.replace("-FC", "").replace("-together", ""), temperature=self.temperature, ) - end_time = time.time() - - return api_response, end_time - start_time - - def _pre_query_processing_FC(self, inference_data: dict, test_entry: dict) -> dict: - return super()._pre_query_processing_FC(inference_data, test_entry) - def _compile_tools(self, inference_data: dict, test_entry: dict) -> dict: - return super()._compile_tools(inference_data, test_entry) + #### Prompting methods #### - def _parse_query_response_FC(self, api_response: any) -> dict: - try: - model_responses = [ - {func_call.function.name: func_call.function.arguments} - for func_call in api_response.choices[0].message.tool_calls - ] - tool_calls = [ - tool_call.model_dump() - for tool_call in api_response.choices[0].message.tool_calls - ] - except: - model_responses = api_response.choices[0].message.content - tool_calls = [] + def _query_prompting(self, inference_data: dict): + inference_data["inference_input_log"] = {"message": repr(inference_data["message"])} - return { - "model_responses": model_responses, - "model_responses_message_for_chat_history": tool_calls, - "input_token": api_response.usage.prompt_tokens, - "output_token": api_response.usage.completion_tokens, - } - - def add_first_turn_message_FC( - self, inference_data: dict, first_turn_message: list[dict] - ) -> dict: - return super().add_first_turn_message_FC(inference_data, first_turn_message) - - def _add_next_turn_user_message_FC( - self, inference_data: dict, user_message: list[dict] - ) -> dict: - return super()._add_next_turn_user_message_FC(inference_data, user_message) - - def _add_assistant_message_FC( - self, inference_data: dict, model_response_data: dict - ) -> dict: - inference_data["message"].append( - { - "role": "assistant", - "content": "", - "tool_calls": model_response_data[ - "model_responses_message_for_chat_history" - ], - } + return self.generate_with_backoff( + messages=inference_data["message"], + model=self.model_name.replace("-together", ""), + temperature=self.temperature, ) - return inference_data - - def _add_execution_results_FC( - self, inference_data: dict, execution_results: list[str], model_response_data: dict - ) -> dict: - for execution_result in execution_results: - tool_message = { - "role": "tool", - "content": execution_result, - } - inference_data["message"].append(tool_message) - - return inference_data diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index dbc2b92c37..e9f81e1b73 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -104,28 +104,6 @@ # "yi-large-fc": YiHandler, # Their API is under maintenance, and will not be back online in the near future "palmyra-x-004": WriterHandler, "grok-beta": GrokHandler, - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": TogetherHandler, - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC": TogetherHandler, - "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": TogetherHandler, - "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC": TogetherHandler, - "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": TogetherHandler, - "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC": TogetherHandler, - "meta-llama/Llama-3.3-70B-Instruct-Turbo": TogetherHandler, - "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC": TogetherHandler, - "meta-llama/Llama-4-Scout-17B-16E-Instruct": TogetherHandler, - "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC": TogetherHandler, - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": TogetherHandler, - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC": TogetherHandler, - "mistralai/Mixtral-8x7B-Instruct-v0.1": TogetherHandler, - "mistralai/Mixtral-8x7B-Instruct-v0.1-FC": TogetherHandler, - "mistralai/Mistral-7B-Instruct-v0.1": TogetherHandler, - "mistralai/Mistral-7B-Instruct-v0.1-FC": TogetherHandler, - "Qwen/Qwen2.5-7B-Instruct-Turbo": TogetherHandler, - "Qwen/Qwen2.5-7B-Instruct-Turbo-FC": TogetherHandler, - "Qwen/Qwen2.5-72B-Instruct-Turbo": TogetherHandler, - "Qwen/Qwen2.5-72B-Instruct-Turbo-FC": TogetherHandler, - "deepseek-ai/DeepSeek-V3": TogetherHandler, - "deepseek-ai/DeepSeek-V3-FC": TogetherHandler, } # Inference through local hosting @@ -220,6 +198,28 @@ "meta-llama/llama-4-scout-17b-16e-instruct-FC-novita": NovitaHandler, "qwen/qwq-32b-FC-novita": NovitaHandler, "qwen/qwq-32b-novita": NovitaHandler, + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-together": TogetherHandler, + "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-FC-together": TogetherHandler, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-together": TogetherHandler, + "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC-together": TogetherHandler, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-together": TogetherHandler, + "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC-together": TogetherHandler, + "meta-llama/Llama-3.3-70B-Instruct-Turbo-together": TogetherHandler, + "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": TogetherHandler, + "meta-llama/Llama-4-Scout-17B-16E-Instruct-together": TogetherHandler, + "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC-together": TogetherHandler, + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-together": TogetherHandler, + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC-together": TogetherHandler, + "mistralai/Mixtral-8x7B-Instruct-v0.1-together": TogetherHandler, + "mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": TogetherHandler, + "mistralai/Mistral-7B-Instruct-v0.1-together": TogetherHandler, + "mistralai/Mistral-7B-Instruct-v0.1-FC-together": TogetherHandler, + "Qwen/Qwen2.5-7B-Instruct-Turbo-together": TogetherHandler, + "Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": TogetherHandler, + "Qwen/Qwen2.5-72B-Instruct-Turbo-together": TogetherHandler, + "Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": TogetherHandler, + "deepseek-ai/DeepSeek-V3-together": TogetherHandler, + "deepseek-ai/DeepSeek-V3-FC-together": TogetherHandler, } # Deprecated/outdated models, no longer on the leaderboard diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py index 89badda448..551de608c6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py @@ -78,6 +78,7 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.COHERE, ModelStyle.AMAZON, ModelStyle.NOVITA_AI, + ModelStyle.TOGETHER_AI, ]: # OAI does not support "." in the function name so we replace it with "_". ^[a-zA-Z0-9_-]{1,64}$ is the regex for the name. item["name"] = re.sub(r"\.", "_", item["name"]) @@ -149,10 +150,10 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.Anthropic, ModelStyle.Google, ModelStyle.FIREWORK_AI, - ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ModelStyle.AMAZON, ModelStyle.NOVITA_AI, + ModelStyle.TOGETHER_AI, ]: item[ "description" @@ -170,9 +171,9 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.OpenAI, ModelStyle.Mistral, ModelStyle.FIREWORK_AI, - ModelStyle.TOGETHER_AI, ModelStyle.WRITER, ModelStyle.NOVITA_AI, + ModelStyle.TOGETHER_AI, ]: oai_tool.append({"type": "function", "function": item}) elif model_style == ModelStyle.AMAZON: From 246c504a12969c8cf14e49664d941af8c6d1a3c4 Mon Sep 17 00:00:00 2001 From: justin-together Date: Wed, 16 Apr 2025 14:53:27 -0700 Subject: [PATCH 04/11] Fix function calls with dot --- berkeley-function-call-leaderboard/bfcl/model_handler/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py index 551de608c6..064be01d4b 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/utils.py @@ -78,7 +78,6 @@ def convert_to_tool(functions, mapping, model_style): ModelStyle.COHERE, ModelStyle.AMAZON, ModelStyle.NOVITA_AI, - ModelStyle.TOGETHER_AI, ]: # OAI does not support "." in the function name so we replace it with "_". ^[a-zA-Z0-9_-]{1,64}$ is the regex for the name. item["name"] = re.sub(r"\.", "_", item["name"]) From 5cb3290433f498a598ed8ef3caa586a8958e006f Mon Sep 17 00:00:00 2001 From: justin-together Date: Wed, 16 Apr 2025 15:33:28 -0700 Subject: [PATCH 05/11] Addd mistral small 2501 --- .../bfcl/constants/model_metadata.py | 24 ++++++++++++++----- .../bfcl/model_handler/handler_map.py | 2 ++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py index 77979ed5e6..b40ab74242 100644 --- a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py @@ -1063,23 +1063,35 @@ "Proprietary", ], "mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": [ - "Open-Mixtral-8x7b (Prompt)", + "Open-Mixtral-8x7b (FC)", "https://mistral.ai/news/mixtral-of-experts/", "Mistral AI", "Proprietary", ], "mistralai/Mistral-7B-Instruct-v0.1-together": [ - "Open-Mixtral-8x7b (Prompt)", + "Open-Mixtral-7b (Prompt)", "https://mistral.ai/news/mixtral-of-experts/", "Mistral AI", "Proprietary", ], "mistralai/Mistral-7B-Instruct-v0.1-FC-together": [ - "Open-Mixtral-8x7b (Prompt)", + "Open-Mixtral-7b (FC)", "https://mistral.ai/news/mixtral-of-experts/", "Mistral AI", "Proprietary", ], + "mistralai/Mistral-Small-24B-Instruct-2501-together": [ + "Mistral Small 3 (2501) (Prompt)", + "https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501", + "Mistral AI", + "apache-2.0", + ], + "mistralai/Mistral-Small-24B-Instruct-2501-FC-together": [ + "Mistral Small 3 (2501) (FC)", + "https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501", + "Mistral AI", + "apache-2.0", + ], "Qwen/Qwen2.5-7B-Instruct-Turbo-together": [ "Qwen2.5-7B-Instruct (Prompt)", "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", @@ -1087,7 +1099,7 @@ "apache-2.0", ], "Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": [ - "Qwen2.5-7B-Instruct (Prompt)", + "Qwen2.5-7B-Instruct (FC)", "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", "Qwen", "apache-2.0", @@ -1099,13 +1111,13 @@ "qwen", ], "Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": [ - "Qwen2.5-72B-Instruct (Prompt)", + "Qwen2.5-72B-Instruct (FC)", "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct", "Qwen", "qwen", ], "deepseek-ai/DeepSeek-V3-together": [ - "DeepSeek-V3 (FC)", + "DeepSeek-V3 (Prompt)", "https://api-docs.deepseek.com/news/news1226", "DeepSeek", "DeepSeek License", diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index e9f81e1b73..959d9a750a 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -214,6 +214,8 @@ "mistralai/Mixtral-8x7B-Instruct-v0.1-FC-together": TogetherHandler, "mistralai/Mistral-7B-Instruct-v0.1-together": TogetherHandler, "mistralai/Mistral-7B-Instruct-v0.1-FC-together": TogetherHandler, + "mistralai/Mistral-Small-24B-Instruct-2501-together": TogetherHandler, + "mistralai/Mistral-Small-24B-Instruct-2501-FC-together": TogetherHandler, "Qwen/Qwen2.5-7B-Instruct-Turbo-together": TogetherHandler, "Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": TogetherHandler, "Qwen/Qwen2.5-72B-Instruct-Turbo-together": TogetherHandler, From a8bb1b376c8761c305cf1fa750b1446fe48af89a Mon Sep 17 00:00:00 2001 From: justin-together Date: Wed, 16 Apr 2025 15:36:27 -0700 Subject: [PATCH 06/11] Update descriptions --- .../bfcl/constants/model_metadata.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py index b40ab74242..bfb7270844 100644 --- a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py @@ -997,25 +997,25 @@ "Meta Llama 3 Community", ], "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-together": [ - "Llama-3.1-8B-Instruct (Prompt)", + "Llama-3.1-70B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo-FC-together": [ - "Llama-3.1-8B-Instruct (FC)", + "Llama-3.1-70B-Instruct (FC)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-together": [ - "Llama-3.1-8B-Instruct (Prompt)", + "Llama-3.1-405B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo-FC-together": [ - "Llama-3.1-8B-Instruct (Prompt)", + "Llama-3.1-405B-Instruct (FC)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", @@ -1027,31 +1027,31 @@ "Meta Llama 3 Community", ], "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": [ - "Llama-3.1-8B-Instruct (Prompt)", + "Llama-3.1-8B-Instruct (FC)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], "meta-llama/Llama-4-Scout-17B-16E-Instruct-together": [ - "Llama 4 Scout (17Bx16E)", + "Llama 4 Scout (17Bx16E) (Prompt)", "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", "Meta", "Meta Llama 4 Community", ], "meta-llama/Llama-4-Scout-17B-16E-Instruct-FC-together": [ - "Llama 4 Scout (17Bx16E)", + "Llama 4 Scout (17Bx16E) (FC)", "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", "Meta", "Meta Llama 4 Community", ], "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-together": [ - "Llama 4 Maverick (17Bx128E)", + "Llama 4 Maverick (17Bx128E) (Prompt)", "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "Meta", "Meta Llama 4 Community", ], "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-FC-together": [ - "Llama 4 Maverick (17Bx128E)", + "Llama 4 Maverick (17Bx128E) (FC)", "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "Meta", "Meta Llama 4 Community", From 627d364300df1654685c65e66c4bac02a959777c Mon Sep 17 00:00:00 2001 From: justin-together Date: Wed, 16 Apr 2025 15:37:59 -0700 Subject: [PATCH 07/11] Proof read --- .../bfcl/constants/model_metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py index bfb7270844..bedf7da12a 100644 --- a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py @@ -1021,13 +1021,13 @@ "Meta Llama 3 Community", ], "meta-llama/Llama-3.3-70B-Instruct-Turbo-together": [ - "Llama-3.1-8B-Instruct (Prompt)", + "Llama-3.3-70B-Instruct (Prompt)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", ], "meta-llama/Llama-3.3-70B-Instruct-Turbo-FC-together": [ - "Llama-3.1-8B-Instruct (FC)", + "Llama-3.3-70B-Instruct (FC)", "https://llama.meta.com/llama3", "Meta", "Meta Llama 3 Community", From 04c413cef7e81c051d2e0f4c0ae2d4c0b4da11ce Mon Sep 17 00:00:00 2001 From: justin-together Date: Tue, 29 Apr 2025 19:04:13 -0700 Subject: [PATCH 08/11] Add Qwen/Qwen3-235B-A22B-fp8-tput --- .../bfcl/constants/model_metadata.py | 12 ++++++++++++ .../bfcl/model_handler/handler_map.py | 2 ++ 2 files changed, 14 insertions(+) diff --git a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py index bedf7da12a..390dd229fa 100644 --- a/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/constants/model_metadata.py @@ -1116,6 +1116,18 @@ "Qwen", "qwen", ], + "Qwen/Qwen3-235B-A22B-fp8-tput-together": [ + "Qwen3-235B-A22B (Prompt)", + "https://huggingface.co/Qwen/Qwen3-235B-A22B", + "Qwen", + "qwen", + ], + "Qwen/Qwen3-235B-A22B-fp8-tput-FC-together": [ + "Qwen3-235B-A22B (FC)", + "https://huggingface.co/Qwen/Qwen3-235B-A22B", + "Qwen", + "qwen", + ], "deepseek-ai/DeepSeek-V3-together": [ "DeepSeek-V3 (Prompt)", "https://api-docs.deepseek.com/news/news1226", diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 959d9a750a..34a2a786a6 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -220,6 +220,8 @@ "Qwen/Qwen2.5-7B-Instruct-Turbo-FC-together": TogetherHandler, "Qwen/Qwen2.5-72B-Instruct-Turbo-together": TogetherHandler, "Qwen/Qwen2.5-72B-Instruct-Turbo-FC-together": TogetherHandler, + "Qwen/Qwen3-235B-A22B-fp8-tput-together": TogetherHandler, + "Qwen/Qwen3-235B-A22B-fp8-tput-FC-together": TogetherHandler, "deepseek-ai/DeepSeek-V3-together": TogetherHandler, "deepseek-ai/DeepSeek-V3-FC-together": TogetherHandler, } From 28a24e5739e4cf305f5dc86142775a42fbf65cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMark?= <“74402146+TechnoFinch@users.noreply.github.com”> Date: Sun, 13 Jul 2025 21:51:51 -0700 Subject: [PATCH 09/11] together changes --- .../bfcl_eval/constants/model_config.py | 13 +++++++++++++ .../bfcl_eval/constants/supported_models.py | 2 ++ .../model_handler/api_inference/together.py | 6 +++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py index e332d00c79..6569f2ac3d 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py @@ -7,6 +7,7 @@ from bfcl_eval.model_handler.api_inference.deepseek import DeepSeekAPIHandler from bfcl_eval.model_handler.api_inference.dm_cito import DMCitoHandler from bfcl_eval.model_handler.api_inference.fireworks import FireworksHandler +from bfcl_eval.model_handler.api_inference.together import TogetherHandler from bfcl_eval.model_handler.api_inference.functionary import FunctionaryHandler from bfcl_eval.model_handler.api_inference.gemini import GeminiHandler from bfcl_eval.model_handler.api_inference.gogoagent import GoGoAgentHandler @@ -2034,6 +2035,18 @@ class ModelConfig: is_fc_model=True, underscore_to_dot=False, ), + "moonshotai/Kimi-K2-Instruct": ModelConfig( + model_name="moonshotai/Kimi-K2-Instruct", + display_name="Kimi-K2-Instruct", + url="https://huggingface.co/katanemo/Arch-Agent-32B", + org="moonshotai", + license="katanemo-research", + model_handler=TogetherHandler, + input_price=None, + output_price=None, + is_fc_model=True, + underscore_to_dot=False, + ), } diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py index 60483437e8..35ed68b2ea 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py @@ -167,4 +167,6 @@ "katanemo/Arch-Agent-3B", "katanemo/Arch-Agent-7B", "katanemo/Arch-Agent-32B" + "katanemo/Arch-Agent-32B", + "moonshotai/Kimi-K2-Instruct" ] diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py index a0e77008d6..abbc7d0c66 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/together.py @@ -1,11 +1,11 @@ import os -from bfcl.model_handler.api_inference.openai import OpenAIHandler -from bfcl.model_handler.model_style import ModelStyle +from bfcl_eval.model_handler.api_inference.openai_completion import OpenAICompletionsHandler +from bfcl_eval.model_handler.model_style import ModelStyle from openai import OpenAI -class TogetherHandler(OpenAIHandler): +class TogetherHandler(OpenAICompletionsHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) self.model_style = ModelStyle.TOGETHER_AI From 91b00d12196b694fc1271037663556350740715f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMark?= <“74402146+TechnoFinch@users.noreply.github.com”> Date: Sun, 13 Jul 2025 22:38:43 -0700 Subject: [PATCH 10/11] Adding FC and call --- berkeley-function-call-leaderboard/README.md | 5 ++++- .../bfcl_eval/constants/model_config.py | 2 +- .../bfcl_eval/constants/supported_models.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index 777cc0579f..5b4523f2bd 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -150,6 +150,9 @@ You can provide multiple models or test categories by separating them with comma ```bash bfcl generate --model claude-3-5-sonnet-20241022-FC,gpt-4o-2024-11-20-FC --test-category simple,parallel,multiple,multi_turn + + +bfcl generate --model moonshotai/Kimi-K2-Instruct --num-threads 6 ``` #### Selecting Specific Test Cases with `--run-ids` @@ -242,7 +245,7 @@ VLLM_PORT=1053 For those who prefer using script execution instead of the CLI, you can run the following command: ```bash -python -m bfcl_eval.openfunctions_evaluation --model MODEL_NAME --test-category TEST_CATEGORY +python -m bfcl_eval.openfunctions_evaluation --model moonshotai/Kimi-K2-Instruct --test-category TEST_CATEGORY ``` When specifying multiple models or test categories, separate them with **spaces**, not commas. All other flags mentioned earlier are compatible with the script execution method as well. diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py index 6569f2ac3d..a9e9d48757 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py @@ -2036,7 +2036,7 @@ class ModelConfig: underscore_to_dot=False, ), "moonshotai/Kimi-K2-Instruct": ModelConfig( - model_name="moonshotai/Kimi-K2-Instruct", + model_name="moonshotai/Kimi-K2-Instruct-FC", display_name="Kimi-K2-Instruct", url="https://huggingface.co/katanemo/Arch-Agent-32B", org="moonshotai", diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py index 35ed68b2ea..b75a69b00e 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/supported_models.py @@ -168,5 +168,5 @@ "katanemo/Arch-Agent-7B", "katanemo/Arch-Agent-32B" "katanemo/Arch-Agent-32B", - "moonshotai/Kimi-K2-Instruct" + "moonshotai/Kimi-K2-Instruct-FC" ] From 51811ace61b555fa909982dd89a76edc16918db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMark?= <“74402146+TechnoFinch@users.noreply.github.com”> Date: Sun, 13 Jul 2025 22:43:27 -0700 Subject: [PATCH 11/11] Fixing --- .../bfcl_eval/constants/model_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py index a9e9d48757..e85f396b7c 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/model_config.py @@ -2035,8 +2035,8 @@ class ModelConfig: is_fc_model=True, underscore_to_dot=False, ), - "moonshotai/Kimi-K2-Instruct": ModelConfig( - model_name="moonshotai/Kimi-K2-Instruct-FC", + "moonshotai/Kimi-K2-Instruct-FC": ModelConfig( + model_name="moonshotai/Kimi-K2-Instruct", display_name="Kimi-K2-Instruct", url="https://huggingface.co/katanemo/Arch-Agent-32B", org="moonshotai",