From a0443ab74c2ee3b89bd1913fbe514d5e73a31b54 Mon Sep 17 00:00:00 2001 From: Yi Yan Date: Tue, 15 Jul 2025 10:38:46 +0800 Subject: [PATCH] feat: add Kimi-K2-Instruct-GGUF --- src/emd/models/llms/__init__.py | 1 + src/emd/models/llms/kimi.py | 46 +++++++++++++++++++++++++++++++ src/emd/models/model_series.py | 6 ++++ src/emd/models/utils/constants.py | 1 + 4 files changed, 54 insertions(+) create mode 100644 src/emd/models/llms/kimi.py diff --git a/src/emd/models/llms/__init__.py b/src/emd/models/llms/__init__.py index 9ea306d2..8867f197 100644 --- a/src/emd/models/llms/__init__.py +++ b/src/emd/models/llms/__init__.py @@ -6,6 +6,7 @@ deepseek, baichuan, jina, + kimi, txgemma, medgemma ) diff --git a/src/emd/models/llms/kimi.py b/src/emd/models/llms/kimi.py new file mode 100644 index 00000000..91bcf6bf --- /dev/null +++ b/src/emd/models/llms/kimi.py @@ -0,0 +1,46 @@ +from .. import Model +from ..engines import ktransformers_engine +from ..services import ( + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service +) +from ..frameworks import fastapi_framework +from ..instances import ( + g6e24xlarge_instance, + g6e48xlarge_instance, + local_instance +) +from emd.models.utils.constants import ModelType +from ..model_series import KIMI_SERIES + +Model.register( + dict( + model_id="Kimi-K2-Instruct-Q4_K_M-GGUF", + supported_engines=[ktransformers_engine], + supported_instances=[ + g6e24xlarge_instance, # 4 GPUs, 96 vCPU, 768GB RAM - Minimum viable + g6e48xlarge_instance, # 8 GPUs, 192 vCPU, 1536GB RAM - Optimal + local_instance # Local deployment (600GB+ RAM required) + ], + supported_services=[ + sagemaker_service, + sagemaker_async_service, + ecs_service, + local_service + ], + supported_frameworks=[ + fastapi_framework + ], + allow_china_region=True, + huggingface_model_id="KVCache-ai/Kimi-K2-Instruct-GGUF", + huggingface_model_download_kwargs=dict(allow_patterns=["*Q4_K_M*"]), + require_huggingface_token=False, + application_scenario="Agentic AI, tool use, reasoning, coding, autonomous problem-solving", + description="Kimi K2 1T parameter MoE model with 32B activated parameters in GGUF Q4_K_M format. Optimized for KTransformers deployment with 600GB+ RAM requirement. Achieves 10-14 TPS performance.", + model_type=ModelType.LLM, + model_series=KIMI_SERIES, + need_prepare_model=False + ) +) \ No newline at end of file diff --git a/src/emd/models/model_series.py b/src/emd/models/model_series.py index aa26f6e4..6d141619 100644 --- a/src/emd/models/model_series.py +++ b/src/emd/models/model_series.py @@ -151,3 +151,9 @@ description="Baichuan Intelligent Technology.", reference_link="https://github.com/baichuan-inc" ) + +KIMI_SERIES = ModelSeries( + model_series_name=ModelSeriesType.KIMI, + description="Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.", + reference_link="https://github.com/moonshotai/Kimi-K2" +) diff --git a/src/emd/models/utils/constants.py b/src/emd/models/utils/constants.py index b608ef3f..007241d3 100644 --- a/src/emd/models/utils/constants.py +++ b/src/emd/models/utils/constants.py @@ -234,3 +234,4 @@ class ModelSeriesType(ConstantBase): DEEPSEEK_REASONING_MODEL = "deepseek reasoning model" DEEPSEEK_v3 = "deepseek v3" BAICHUAN = "baichuan" + KIMI = "kimi"