Skip to content

Commit 5851b02

Browse files
authored
feat: update llama-index + dependencies (#2092)
* chore: update libraries * fix: mypy * chore: more updates * fix: mypy/black * chore: fix docker warnings * fix: mypy * fix: black
1 parent 5fbb402 commit 5851b02

File tree

16 files changed

+2782
-2429
lines changed

16 files changed

+2782
-2429
lines changed

docker-compose.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ services:
6161
ollama:
6262
image: traefik:v2.10
6363
ports:
64-
- "11434:11434"
64+
- "8080:8080"
6565
command:
6666
- "--providers.file.filename=/etc/router.yml"
6767
- "--log.level=ERROR"
@@ -83,6 +83,8 @@ services:
8383
# Ollama service for the CPU mode
8484
ollama-cpu:
8585
image: ollama/ollama:latest
86+
ports:
87+
- "11434:11434"
8688
volumes:
8789
- ./models:/root/.ollama
8890
profiles:
@@ -92,6 +94,8 @@ services:
9294
# Ollama service for the CUDA mode
9395
ollama-cuda:
9496
image: ollama/ollama:latest
97+
ports:
98+
- "11434:11434"
9599
volumes:
96100
- ./models:/root/.ollama
97101
deploy:

poetry.lock

Lines changed: 2680 additions & 2335 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

private_gpt/components/ingest/ingest_component.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None
403403
self.transformations,
404404
show_progress=self.show_progress,
405405
)
406-
self.node_q.put(("process", file_name, documents, nodes))
406+
self.node_q.put(("process", file_name, documents, list(nodes)))
407407
finally:
408408
self.doc_semaphore.release()
409409
self.doc_q.task_done() # unblock Q joins

private_gpt/components/llm/llm_component.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ def __init__(self, settings: Settings) -> None:
120120
api_version="",
121121
temperature=settings.llm.temperature,
122122
context_window=settings.llm.context_window,
123-
max_new_tokens=settings.llm.max_new_tokens,
124123
messages_to_prompt=prompt_style.messages_to_prompt,
125124
completion_to_prompt=prompt_style.completion_to_prompt,
126125
tokenizer=settings.llm.tokenizer,
@@ -184,10 +183,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
184183

185184
return wrapper
186185

187-
Ollama.chat = add_keep_alive(Ollama.chat)
188-
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
189-
Ollama.complete = add_keep_alive(Ollama.complete)
190-
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
186+
Ollama.chat = add_keep_alive(Ollama.chat) # type: ignore
187+
Ollama.stream_chat = add_keep_alive(Ollama.stream_chat) # type: ignore
188+
Ollama.complete = add_keep_alive(Ollama.complete) # type: ignore
189+
Ollama.stream_complete = add_keep_alive(Ollama.stream_complete) # type: ignore
191190

192191
self.llm = llm
193192

private_gpt/components/llm/prompt_helper.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ def messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
4040
logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
4141
return prompt
4242

43-
def completion_to_prompt(self, completion: str) -> str:
43+
def completion_to_prompt(self, prompt: str) -> str:
44+
completion = prompt # Fix: Llama-index parameter has to be named as prompt
4445
prompt = self._completion_to_prompt(completion)
4546
logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
4647
return prompt
@@ -285,8 +286,9 @@ def _completion_to_prompt(self, completion: str) -> str:
285286

286287

287288
def get_prompt_style(
288-
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
289-
| None
289+
prompt_style: (
290+
Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
291+
)
290292
) -> AbstractPromptStyle:
291293
"""Get the prompt style to use from the given string.
292294

private_gpt/components/node_store/node_store_component.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ def __init__(self, settings: Settings) -> None:
3838

3939
case "postgres":
4040
try:
41-
from llama_index.core.storage.docstore.postgres_docstore import (
41+
from llama_index.storage.docstore.postgres import ( # type: ignore
4242
PostgresDocumentStore,
4343
)
44-
from llama_index.core.storage.index_store.postgres_index_store import (
44+
from llama_index.storage.index_store.postgres import ( # type: ignore
4545
PostgresIndexStore,
4646
)
4747
except ImportError:
@@ -55,6 +55,7 @@ def __init__(self, settings: Settings) -> None:
5555
self.index_store = PostgresIndexStore.from_params(
5656
**settings.postgres.model_dump(exclude_none=True)
5757
)
58+
5859
self.doc_store = PostgresDocumentStore.from_params(
5960
**settings.postgres.model_dump(exclude_none=True)
6061
)

private_gpt/components/vector_store/batched_chroma.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
1-
from collections.abc import Generator
2-
from typing import Any
1+
from collections.abc import Generator, Sequence
2+
from typing import TYPE_CHECKING, Any
33

44
from llama_index.core.schema import BaseNode, MetadataMode
55
from llama_index.core.vector_stores.utils import node_to_metadata_dict
66
from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore
77

8+
if TYPE_CHECKING:
9+
from collections.abc import Mapping
10+
811

912
def chunk_list(
10-
lst: list[BaseNode], max_chunk_size: int
11-
) -> Generator[list[BaseNode], None, None]:
13+
lst: Sequence[BaseNode], max_chunk_size: int
14+
) -> Generator[Sequence[BaseNode], None, None]:
1215
"""Yield successive max_chunk_size-sized chunks from lst.
1316
1417
Args:
@@ -60,7 +63,7 @@ def __init__(
6063
)
6164
self.chroma_client = chroma_client
6265

63-
def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
66+
def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
6467
"""Add nodes to index, batching the insertion to avoid issues.
6568
6669
Args:
@@ -78,8 +81,8 @@ def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
7881

7982
all_ids = []
8083
for node_chunk in node_chunks:
81-
embeddings = []
82-
metadatas = []
84+
embeddings: list[Sequence[float]] = []
85+
metadatas: list[Mapping[str, Any]] = []
8386
ids = []
8487
documents = []
8588
for node in node_chunk:

private_gpt/server/chat/chat_service.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataclasses import dataclass
2+
from typing import TYPE_CHECKING
23

34
from injector import inject, singleton
45
from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
@@ -26,6 +27,9 @@
2627
from private_gpt.server.chunks.chunks_service import Chunk
2728
from private_gpt.settings.settings import Settings
2829

30+
if TYPE_CHECKING:
31+
from llama_index.core.postprocessor.types import BaseNodePostprocessor
32+
2933

3034
class Completion(BaseModel):
3135
response: str
@@ -114,12 +118,15 @@ def _chat_engine(
114118
context_filter=context_filter,
115119
similarity_top_k=self.settings.rag.similarity_top_k,
116120
)
117-
node_postprocessors = [
121+
node_postprocessors: list[BaseNodePostprocessor] = [
118122
MetadataReplacementPostProcessor(target_metadata_key="window"),
119-
SimilarityPostprocessor(
120-
similarity_cutoff=settings.rag.similarity_value
121-
),
122123
]
124+
if settings.rag.similarity_value:
125+
node_postprocessors.append(
126+
SimilarityPostprocessor(
127+
similarity_cutoff=settings.rag.similarity_value
128+
)
129+
)
123130

124131
if settings.rag.rerank.enabled:
125132
rerank_postprocessor = SentenceTransformerRerank(

private_gpt/server/recipes/summarize/summarize_service.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def _summarize(
9090
# Add context documents to summarize
9191
if use_context:
9292
# 1. Recover all ref docs
93-
ref_docs: dict[
94-
str, RefDocInfo
95-
] | None = self.storage_context.docstore.get_all_ref_doc_info()
93+
ref_docs: dict[str, RefDocInfo] | None = (
94+
self.storage_context.docstore.get_all_ref_doc_info()
95+
)
9696
if ref_docs is None:
9797
raise ValueError("No documents have been ingested yet.")
9898

private_gpt/settings/settings.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
136136
0.1,
137137
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
138138
)
139-
prompt_style: Literal[
140-
"default", "llama2", "llama3", "tag", "mistral", "chatml"
141-
] = Field(
142-
"llama2",
143-
description=(
144-
"The prompt style to use for the chat engine. "
145-
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
146-
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
147-
"If `llama3` - use the llama3 prompt style from the llama_index."
148-
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
149-
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
150-
"`llama2` is the historic behaviour. `default` might work better with your custom models."
151-
),
139+
prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
140+
Field(
141+
"llama2",
142+
description=(
143+
"The prompt style to use for the chat engine. "
144+
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
145+
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
146+
"If `llama3` - use the llama3 prompt style from the llama_index."
147+
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
148+
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
149+
"`llama2` is the historic behaviour. `default` might work better with your custom models."
150+
),
151+
)
152152
)
153153

154154

private_gpt/ui/ui.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""This file should be imported if and only if you want to run the UI locally."""
2+
23
import base64
34
import logging
45
import time

pyproject.toml

Lines changed: 42 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,63 +7,54 @@ authors = ["Zylon <hi@zylon.ai>"]
77
[tool.poetry.dependencies]
88
python = ">=3.11,<3.12"
99
# PrivateGPT
10-
fastapi = { extras = ["all"], version = "^0.111.0" }
11-
python-multipart = "^0.0.9"
12-
injector = "^0.21.0"
13-
pyyaml = "^6.0.1"
10+
fastapi = { extras = ["all"], version = "^0.115.0" }
11+
python-multipart = "^0.0.10"
12+
injector = "^0.22.0"
13+
pyyaml = "^6.0.2"
1414
watchdog = "^4.0.1"
15-
transformers = "^4.42.3"
15+
transformers = "^4.44.2"
1616
docx2txt = "^0.8"
1717
cryptography = "^3.1"
1818
# LlamaIndex core libs
19-
llama-index-core = "^0.10.52"
20-
llama-index-readers-file = "^0.1.27"
19+
llama-index-core = ">=0.11.2,<0.12.0"
20+
llama-index-readers-file = "*"
2121
# Optional LlamaIndex integration libs
22-
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
23-
llama-index-llms-openai = {version = "^0.1.25", optional = true}
24-
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
25-
llama-index-llms-ollama = {version ="^0.2.2", optional = true}
26-
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
27-
llama-index-llms-gemini = {version ="^0.1.11", optional = true}
28-
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
29-
llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
30-
llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
31-
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
32-
llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
33-
llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true}
34-
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
35-
llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
36-
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
37-
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
38-
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
39-
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
40-
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
22+
llama-index-llms-llama-cpp = {version = "*", optional = true}
23+
llama-index-llms-openai = {version ="*", optional = true}
24+
llama-index-llms-openai-like = {version ="*", optional = true}
25+
llama-index-llms-ollama = {version ="*", optional = true}
26+
llama-index-llms-azure-openai = {version ="*", optional = true}
27+
llama-index-llms-gemini = {version ="*", optional = true}
28+
llama-index-embeddings-ollama = {version ="*", optional = true}
29+
llama-index-embeddings-huggingface = {version ="*", optional = true}
30+
llama-index-embeddings-openai = {version ="*", optional = true}
31+
llama-index-embeddings-azure-openai = {version ="*", optional = true}
32+
llama-index-embeddings-gemini = {version ="*", optional = true}
33+
llama-index-embeddings-mistralai = {version ="*", optional = true}
34+
llama-index-vector-stores-qdrant = {version ="*", optional = true}
35+
llama-index-vector-stores-milvus = {version ="*", optional = true}
36+
llama-index-vector-stores-chroma = {version ="*", optional = true}
37+
llama-index-vector-stores-postgres = {version ="*", optional = true}
38+
llama-index-vector-stores-clickhouse = {version ="*", optional = true}
39+
llama-index-storage-docstore-postgres = {version ="*", optional = true}
40+
llama-index-storage-index-store-postgres = {version ="*", optional = true}
4141
# Postgres
4242
psycopg2-binary = {version ="^2.9.9", optional = true}
4343
asyncpg = {version="^0.29.0", optional = true}
4444

4545
# ClickHouse
46-
clickhouse-connect = {version = "^0.7.15", optional = true}
46+
clickhouse-connect = {version = "^0.7.19", optional = true}
4747

4848
# Optional Sagemaker dependency
49-
boto3 = {version ="^1.34.139", optional = true}
50-
51-
# Optional Qdrant client
52-
qdrant-client = {version ="^1.9.0", optional = true}
49+
boto3 = {version ="^1.35.26", optional = true}
5350

5451
# Optional Reranker dependencies
55-
torch = {version ="^2.3.1", optional = true}
56-
sentence-transformers = {version ="^3.0.1", optional = true}
52+
torch = {version ="^2.4.1", optional = true}
53+
sentence-transformers = {version ="^3.1.1", optional = true}
5754

5855
# Optional UI
59-
gradio = {version ="^4.37.2", optional = true}
60-
ffmpy = "0.4.0"
61-
62-
# Optional Google Gemini dependency
63-
google-generativeai = {version ="^0.5.4", optional = true}
64-
65-
# Optional Ollama client
66-
ollama = {version ="^0.3.0", optional = true}
56+
gradio = {version ="^4.44.0", optional = true}
57+
ffmpy = {version ="^0.4.0", optional = true}
6758

6859
# Optional HF Transformers
6960
einops = {version = "^0.8.0", optional = true}
@@ -74,11 +65,11 @@ ui = ["gradio", "ffmpy"]
7465
llms-llama-cpp = ["llama-index-llms-llama-cpp"]
7566
llms-openai = ["llama-index-llms-openai"]
7667
llms-openai-like = ["llama-index-llms-openai-like"]
77-
llms-ollama = ["llama-index-llms-ollama", "ollama"]
68+
llms-ollama = ["llama-index-llms-ollama"]
7869
llms-sagemaker = ["boto3"]
7970
llms-azopenai = ["llama-index-llms-azure-openai"]
80-
llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
81-
embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
71+
llms-gemini = ["llama-index-llms-gemini"]
72+
embeddings-ollama = ["llama-index-embeddings-ollama"]
8273
embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
8374
embeddings-openai = ["llama-index-embeddings-openai"]
8475
embeddings-sagemaker = ["boto3"]
@@ -94,14 +85,14 @@ storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-ind
9485
rerank-sentence-transformers = ["torch", "sentence-transformers"]
9586

9687
[tool.poetry.group.dev.dependencies]
97-
black = "^22"
98-
mypy = "^1.2"
99-
pre-commit = "^2"
100-
pytest = "^7"
101-
pytest-cov = "^3"
88+
black = "^24"
89+
mypy = "^1.11"
90+
pre-commit = "^3"
91+
pytest = "^8"
92+
pytest-cov = "^5"
10293
ruff = "^0"
103-
pytest-asyncio = "^0.21.1"
104-
types-pyyaml = "^6.0.12.12"
94+
pytest-asyncio = "^0.24.0"
95+
types-pyyaml = "^6.0.12.20240917"
10596

10697
[build-system]
10798
requires = ["poetry-core>=1.0.0"]

tests/fixtures/fast_api_test_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from tests.fixtures.mock_injector import MockInjector
66

77

8-
@pytest.fixture()
8+
@pytest.fixture
99
def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
1010
if request is not None and hasattr(request, "param"):
1111
injector.bind_settings(request.param or {})

tests/fixtures/ingest_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,6 @@ def ingest_file(self, path: Path) -> IngestResponse:
1919
return ingest_result
2020

2121

22-
@pytest.fixture()
22+
@pytest.fixture
2323
def ingest_helper(test_client: TestClient) -> IngestHelper:
2424
return IngestHelper(test_client)

tests/fixtures/mock_injector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,6 @@ def get(self, interface: type[T]) -> T:
3737
return self.test_injector.get(interface)
3838

3939

40-
@pytest.fixture()
40+
@pytest.fixture
4141
def injector() -> MockInjector:
4242
return MockInjector()

tests/server/ingest/test_local_ingest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from fastapi.testclient import TestClient
77

88

9-
@pytest.fixture()
9+
@pytest.fixture
1010
def file_path() -> str:
1111
return "test.txt"
1212

0 commit comments

Comments
 (0)