From b9b7014d4c84644ff94cb7f7a05198bfa9b38979 Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Fri, 3 May 2024 22:15:33 +0200 Subject: [PATCH 1/9] Update __main__.py Replace uvicorn by hypercorn to support ipv6 --- llama_cpp/server/__main__.py | 37 ++++++------------------------------ 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index a6f1f4e9c..b6450c039 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -1,33 +1,10 @@ -"""Example FastAPI server for llama.cpp. - -To run this example: - -```bash -pip install fastapi uvicorn sse-starlette pydantic-settings -export MODEL=../models/7B/... -``` - -Then run: -``` -uvicorn llama_cpp.server.app:create_app --reload -``` - -or - -``` -python3 -m llama_cpp.server -``` - -Then visit http://localhost:8000/docs to see the interactive API docs. - -""" from __future__ import annotations import os import sys import argparse -import uvicorn +import hypercorn.asyncio from llama_cpp.server.app import create_app from llama_cpp.server.settings import ( @@ -84,13 +61,11 @@ def main(): server_settings=server_settings, model_settings=model_settings, ) - uvicorn.run( - app, - host=os.getenv("HOST", server_settings.host), - port=int(os.getenv("PORT", server_settings.port)), - ssl_keyfile=server_settings.ssl_keyfile, - ssl_certfile=server_settings.ssl_certfile, - ) + config = hypercorn.Config() + config.bind = [f"{os.getenv('HOST', server_settings.host)}:{int(os.getenv('PORT', server_settings.port))}"] + config.ssl_keyfile = server_settings.ssl_keyfile + config.ssl_certfile = server_settings.ssl_certfile + hypercorn.asyncio.serve(app, config) if __name__ == "__main__": From 93963dfa13e4a49aab251049a738c59e2e5ff72d Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Fri, 3 May 2024 22:23:51 +0200 Subject: [PATCH 2/9] Update pyproject.toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8345cb1f0..6d50622fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,8 +30,8 @@ classifiers = [ [project.optional-dependencies] server = [ - "uvicorn>=0.22.0", - "fastapi>=0.100.0", + "hypercorn>=0.16.0", + "fastapi>=0.111.0", "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1", "starlette-context>=0.3.6,<0.4", From 75f6812bdaf618dd33eac1e27e844e0f9bafec7c Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Sat, 4 May 2024 01:04:23 +0200 Subject: [PATCH 3/9] Update __main__.py --- llama_cpp/server/__main__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index b6450c039..4a790eb20 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -1,3 +1,24 @@ +"""llama-cpp-python server from scratch in a single file. +""" + +# import llama_cpp + +# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf" + +# model_params = llama_cpp.llama_model_default_params() +# model = llama_cpp.llama_load_model_from_file(path, model_params) + +# if model is None: +# raise RuntimeError(f"Failed to load model from file: {path}") + + +# ctx_params = llama_cpp.llama_context_default_params() +# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params) + +# if ctx is None: +# raise RuntimeError("Failed to create context") + + from __future__ import annotations import os From 690aaa111ad502a7b4b6e4ea31a2e5be081ccd82 Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Sat, 4 May 2024 01:05:48 +0200 Subject: [PATCH 4/9] Update __main__.py --- llama_cpp/server/__main__.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 4a790eb20..b30de2dec 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -1,23 +1,20 @@ -"""llama-cpp-python server from scratch in a single file. +"""Example FastAPI server for llama.cpp. +To run this example: +```bash +pip install fastapi uvicorn sse-starlette pydantic-settings +export MODEL=../models/7B/... +``` +Then run: +``` +uvicorn llama_cpp.server.app:create_app --reload +``` +or +``` +python3 -m llama_cpp.server +``` +Then visit http://localhost:8000/docs to see the interactive API docs. """ -# import llama_cpp - -# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf" - -# model_params = llama_cpp.llama_model_default_params() -# model = llama_cpp.llama_load_model_from_file(path, model_params) - -# if model is None: -# raise RuntimeError(f"Failed to load model from file: {path}") - - -# ctx_params = llama_cpp.llama_context_default_params() -# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params) - -# if ctx is None: -# raise RuntimeError("Failed to create context") - from __future__ import annotations From 2ac9facee4bdd7334a0baa9e2fc61032f29dca4b Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Wed, 29 May 2024 12:08:08 +0200 Subject: [PATCH 5/9] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6d50622fe..8f3ad623b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ [project.optional-dependencies] server = [ - "hypercorn>=0.16.0", + "hypercorn>=0.17.0", "fastapi>=0.111.0", "pydantic-settings>=2.0.1", "sse-starlette>=1.6.1", From c152d077a8b1c24906044a8b8c0b1d2596fa38ee Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Wed, 29 May 2024 12:12:14 +0200 Subject: [PATCH 6/9] Update pyproject.toml --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8f3ad623b..6076bb775 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,10 +32,10 @@ classifiers = [ server = [ "hypercorn>=0.17.0", "fastapi>=0.111.0", - "pydantic-settings>=2.0.1", - "sse-starlette>=1.6.1", + "pydantic-settings>=2.2.1", + "sse-starlette>=2.1.0", "starlette-context>=0.3.6,<0.4", - "PyYAML>=5.1", + "PyYAML>=6.0", ] test = [ "pytest>=7.4.0", From d3b472b23626ace399dc67d5f48fa4e936289c5c Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Mon, 3 Jun 2024 18:58:57 +0200 Subject: [PATCH 7/9] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6076bb775..548ee66b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ [project.optional-dependencies] server = [ - "hypercorn>=0.17.0", + "hypercorn>=0.17.3", "fastapi>=0.111.0", "pydantic-settings>=2.2.1", "sse-starlette>=2.1.0", From 43f2ffe5baa9866d48d05a6e78a81698324f7145 Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Sun, 4 Aug 2024 09:45:59 +0200 Subject: [PATCH 8/9] add IPv6 support --- examples/high_level_api/fastapi_server.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py index 9421db57b..23c512cbd 100644 --- a/examples/high_level_api/fastapi_server.py +++ b/examples/high_level_api/fastapi_server.py @@ -3,13 +3,13 @@ To run this example: ```bash -pip install fastapi uvicorn sse-starlette +pip install fastapi hypercorn sse-starlette export MODEL=../models/7B/... ``` Then run: ``` -uvicorn --factory llama_cpp.server.app:create_app --reload +hypercorn --factory llama_cpp.server.app:create_app --reload ``` or @@ -25,13 +25,19 @@ """ import os -import uvicorn +import hypercorn.asyncio +from hypercorn.config import Config from llama_cpp.server.app import create_app if __name__ == "__main__": app = create_app() - uvicorn.run( - app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000)) - ) + config = Config() + host = os.getenv('HOST', 'localhost') + port = int(os.getenv('PORT', 8000)) + + # Ajouter le support pour IPv6 en écoutant sur les deux adresses IPv4 et IPv6 + config.bind = [f"{host}:{port}", f"[::]:{port}"] + + hypercorn.asyncio.run(app, config) From e7aabaa061a79b2818c300f6c482675ef5a6ed1c Mon Sep 17 00:00:00 2001 From: Olivier DEBAUCHE Date: Sun, 4 Aug 2024 09:46:29 +0200 Subject: [PATCH 9/9] Update fastapi_server.py --- examples/high_level_api/fastapi_server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py index 23c512cbd..2a8ebb9bb 100644 --- a/examples/high_level_api/fastapi_server.py +++ b/examples/high_level_api/fastapi_server.py @@ -37,7 +37,6 @@ host = os.getenv('HOST', 'localhost') port = int(os.getenv('PORT', 8000)) - # Ajouter le support pour IPv6 en écoutant sur les deux adresses IPv4 et IPv6 config.bind = [f"{host}:{port}", f"[::]:{port}"] hypercorn.asyncio.run(app, config)