abetlen · axel7083 · Aug 29, 2024 · Aug 29, 2024
diff --git a/README.md b/README.md
@@ -365,6 +365,43 @@ Chat completion is available through the [`create_chat_completion`](https://llam
 
 For OpenAI API v1 compatibility, you use the [`create_chat_completion_openai_v1`](https://llama-cpp-python.readthedocs.io/en/latest/api-reference/#llama_cpp.Llama.create_chat_completion_openai_v1) method which will return pydantic models instead of dicts.
 
+#### Contribute
+
+You may need to have your own chat format completion handler, outside the llama-cpp package. External packages can contribute by
+declaring an [entry-points](https://setuptools.pypa.io/en/latest/userguide/entry_point.html). 
+
+Here is an example for `pycharm.toml`
+
+```toml(
+[project.entry-points."llama_cpp_python.register_chat_format"]
+load = "timmins-plugin-fancy.llama_cpp_python:load"
+```
+
+Inside your timmins-plugin-fancy package you could have the following 
+```
+timmins-plugin-fancy
+├── pyproject.toml        # and/or setup.cfg, setup.py
+└── timmins_plugin_fancy
+    └── __init__.py
+```
+
+and inside the `__init__.py`
+
+```python
+import llama_cpp.llama as llama
+from llama_cpp.llama_chat_format import LlamaChatCompletionHandlerRegistry
+
+def load(registry: LlamaChatCompletionHandlerRegistry):
+    registry.register_chat_completion_handler('custom-chat-format', custom_chat_handler)
+
+def custom_chat_handler(
+        llama: llama.Llama,
+        **kwargs,  # type: ignore
+):
+    pass
+```
+
+You may take example in [llama_cpp/llama_chat_format.py](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama_chat_format.py) for example.
 
 ### JSON and JSON Schema Mode
 

diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -7,6 +7,7 @@
 import dataclasses
 import random
 import string
+import importlib.metadata
 
 from contextlib import ExitStack
 from typing import (
@@ -160,6 +161,15 @@ def decorator(f: LlamaChatCompletionHandler):
 
 ### Chat Formatter ###
 
+def load_chat_formats():
+    """
+    This method will use the entrypoint llama_cpp_python.register_chat_format in the available
+    packages and load them to allow external packages to contribute to the chat format handler
+    """
+    registry = LlamaChatCompletionHandlerRegistry()
+    for entry_point in importlib.metadata.entry_points(group='llama_cpp_python.register_chat_format'):
+        load = entry_point.load()
+        load(registry)
 
 @dataclasses.dataclass
 class ChatFormatterResponse:

diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py
@@ -30,6 +30,7 @@
 
 import uvicorn
 
+from llama_cpp.llama_chat_format import load_chat_formats
 from llama_cpp.server.app import create_app
 from llama_cpp.server.settings import (
     Settings,
@@ -83,6 +84,10 @@ def main():
         sys.exit(1)
     assert server_settings is not None
     assert model_settings is not None
+
+    if server_settings.load_external_chat_formats:
+        load_chat_formats()
+
     app = create_app(
         server_settings=server_settings,
         model_settings=model_settings,

diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
@@ -225,6 +225,10 @@ class ServerSettings(BaseSettings):
         default="",
         description="The root path for the server. Useful when running behind a reverse proxy.",
     )
+    load_external_chat_formats: bool = Field(
+        default=True,
+        description="Whether we should be looking for external chat formats.",
+    )
 
 
 class Settings(ServerSettings, ModelSettings):