From bd4520be3e193b92b9d3ab8966ec034ac785c8f4 Mon Sep 17 00:00:00 2001 From: Kumaran Rajendhiran Date: Tue, 23 Jul 2024 11:32:02 +0530 Subject: [PATCH] Update together model string (#3) --- fastagency/models/llms/together.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fastagency/models/llms/together.py b/fastagency/models/llms/together.py index 7509fd69..64bf5a5b 100644 --- a/fastagency/models/llms/together.py +++ b/fastagency/models/llms/together.py @@ -28,11 +28,8 @@ "Qwen 1.5 Chat (14B)": "Qwen/Qwen1.5-14B-Chat", "Qwen 1.5 Chat (1.8B)": "Qwen/Qwen1.5-1.8B-Chat", "Snowflake Arctic Instruct": "Snowflake/snowflake-arctic-instruct", - "Code Llama Instruct (70B)": "codellama/CodeLlama-70b-Instruct-hf", "Nous Hermes 2 - Mixtral 8x7B-SFT": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT", - "Dolphin 2.5 Mixtral 8x7b": "cognitivecomputations/dolphin-2.5-mixtral-8x7b", "Nous Hermes 2 - Mixtral 8x7B-DPO ": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "Mixtral-8x22B Instruct v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1", "Deepseek Coder Instruct (33B)": "deepseek-ai/deepseek-coder-33b-instruct", "Nous Hermes Llama-2 (13B)": "NousResearch/Nous-Hermes-Llama2-13b", "Vicuna v1.5 (13B)": "lmsys/vicuna-13b-v1.5", @@ -70,16 +67,19 @@ "Gemma-2 Instruct (27B)": "google/gemma-2-27b-it", "01-ai Yi Chat (34B)": "zero-one-ai/Yi-34B-Chat", "Meta Llama 3 70B Instruct Turbo": "meta-llama/Meta-Llama-3-70B-Instruct-Turbo", - "Meta Llama 3 8B Instruct Turbo": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "Meta Llama 3 70B Instruct Lite": "meta-llama/Meta-Llama-3-70B-Instruct-Lite", "Gemma-2 Instruct (9B)": "google/gemma-2-9b-it", - "Meta Llama 3 8B Instruct Lite": "meta-llama/Meta-Llama-3-8B-Instruct-Lite", "Meta Llama 3 8B Reference": "meta-llama/Llama-3-8b-chat-hf", "Mixtral-8x7B Instruct v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "LLaMA-2 Chat (7B)": "togethercomputer/llama-2-7b-chat", - "LLaMA-2 Chat (70B)": "togethercomputer/llama-2-70b-chat", "Reserved - DBRX Instruct": "medaltv/dbrx-instruct", "DBRX Instruct": "databricks/dbrx-instruct", + "Meta Llama 3 8B Instruct Turbo": "meta-llama/Meta-Llama-3-8B-Instruct-Turbo", + "Dolphin 2.5 Mixtral 8x7b": "cognitivecomputations/dolphin-2.5-mixtral-8x7b", + "Mixtral-8x22B Instruct v0.1": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "Code Llama Instruct (70B)": "codellama/CodeLlama-70b-Instruct-hf", + "Meta Llama 3 8B Instruct Lite": "meta-llama/Meta-Llama-3-8B-Instruct-Lite", + "LLaMA-2 Chat (7B)": "togethercomputer/llama-2-7b-chat", + "LLaMA-2 Chat (70B)": "togethercomputer/llama-2-70b-chat", "Koala (7B)": "togethercomputer/Koala-7B", "Guanaco (65B) ": "togethercomputer/guanaco-65b", "Vicuna v1.3 (7B)": "lmsys/vicuna-7b-v1.3", @@ -95,6 +95,9 @@ "Meta Llama 3 8B Instruct": "meta-llama/Meta-Llama-3-8B-Instruct", "Meta Llama 3 70B Instruct": "meta-llama/Meta-Llama-3-70B-Instruct", "Hermes 2 Theta Llama-3 70B": "NousResearch/Hermes-2-Theta-Llama-3-70B", + "carson ml318bit": "carson/ml318bit", + "carson ml31405bit": "carson/ml31405bit", + "carson ml3170bit": "carson/ml3170bit", "Llama-3 70B Instruct Gradient 1048K": "gradientai/Llama-3-70B-Instruct-Gradient-1048k", }