diff --git a/README.md b/README.md index b63efa3c8..80e4aa29f 100644 --- a/README.md +++ b/README.md @@ -365,6 +365,43 @@ Chat completion is available through the [`create_chat_completion`](https://llam For OpenAI API v1 compatibility, you use the [`create_chat_completion_openai_v1`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion_openai_v1) method which will return pydantic models instead of dicts. +#### Contribute + +You may need to have your own chat format completion handler, outside the llama-cpp package. External packages can contribute by +declaring an [entry-points](https://setuptools.pypa.io/en/latest/userguide/entry_point.html). + +Here is an example for `pycharm.toml` + +```toml( +[project.entry-points."llama_cpp_python.register_chat_format"] +load = "timmins-plugin-fancy.llama_cpp_python:load" +``` + +Inside your timmins-plugin-fancy package you could have the following +``` +timmins-plugin-fancy +├── pyproject.toml # and/or setup.cfg, setup.py +└── timmins_plugin_fancy + └── __init__.py +``` + +and inside the `__init__.py` + +```python +import llama_cpp.llama as llama +from llama_cpp.llama_chat_format import LlamaChatCompletionHandlerRegistry + +def load(registry: LlamaChatCompletionHandlerRegistry): + registry.register_chat_completion_handler('custom-chat-format', custom_chat_handler) + +def custom_chat_handler( + llama: llama.Llama, + **kwargs, # type: ignore +): + pass +``` + +You may take example in [llama_cpp/llama_chat_format.py](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama_chat_format.py) for example. ### JSON and JSON Schema Mode diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 21091bf50..d0d6d084b 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -7,6 +7,7 @@ import dataclasses import random import string +import importlib.metadata from contextlib import ExitStack from typing import ( @@ -160,6 +161,15 @@ def decorator(f: LlamaChatCompletionHandler): ### Chat Formatter ### +def load_chat_formats(): + """ + This method will use the entrypoint llama_cpp_python.register_chat_format in the available + packages and load them to allow external packages to contribute to the chat format handler + """ + registry = LlamaChatCompletionHandlerRegistry() + for entry_point in importlib.metadata.entry_points(group='llama_cpp_python.register_chat_format'): + load = entry_point.load() + load(registry) @dataclasses.dataclass class ChatFormatterResponse: diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index bbac4957e..87f40e384 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -30,6 +30,7 @@ import uvicorn +from llama_cpp.llama_chat_format import load_chat_formats from llama_cpp.server.app import create_app from llama_cpp.server.settings import ( Settings, @@ -83,6 +84,10 @@ def main(): sys.exit(1) assert server_settings is not None assert model_settings is not None + + if server_settings.load_external_chat_formats: + load_chat_formats() + app = create_app( server_settings=server_settings, model_settings=model_settings, diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index b20655813..57ae33fd3 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -225,6 +225,10 @@ class ServerSettings(BaseSettings): default="", description="The root path for the server. Useful when running behind a reverse proxy.", ) + load_external_chat_formats: bool = Field( + default=True, + description="Whether we should be looking for external chat formats.", + ) class Settings(ServerSettings, ModelSettings):