feat: update llama-index + dependencies (#2092)

jaluma · web-flow · commit 5851b0237831 · 2024-09-26T16:29:52.000+02:00
* chore: update libraries

* fix: mypy

* chore: more updates

* fix: mypy/black

* chore: fix docker warnings

* fix: mypy

* fix: black
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -61,7 +61,7 @@ services:
   ollama:
     image: traefik:v2.10
     ports:
-      - "11434:11434"
+      - "8080:8080"
     command:
       - "--providers.file.filename=/etc/router.yml"
       - "--log.level=ERROR"
@@ -83,6 +83,8 @@ services:
   # Ollama service for the CPU mode
   ollama-cpu:
     image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
     volumes:
       - ./models:/root/.ollama
     profiles:
@@ -92,6 +94,8 @@ services:
   # Ollama service for the CUDA mode
   ollama-cuda:
     image: ollama/ollama:latest
+    ports:
+      - "11434:11434"
     volumes:
       - ./models:/root/.ollama
     deploy:
diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/ingest/ingest_component.py b/private_gpt/components/ingest/ingest_component.py
@@ -403,7 +403,7 @@ def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None
                 self.transformations,
                 show_progress=self.show_progress,
             )
-            self.node_q.put(("process", file_name, documents, nodes))
+            self.node_q.put(("process", file_name, documents, list(nodes)))
         finally:
             self.doc_semaphore.release()
             self.doc_q.task_done()  # unblock Q joins
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -120,7 +120,6 @@ def __init__(self, settings: Settings) -> None:
                     api_version="",
                     temperature=settings.llm.temperature,
                     context_window=settings.llm.context_window,
-                    max_new_tokens=settings.llm.max_new_tokens,
                     messages_to_prompt=prompt_style.messages_to_prompt,
                     completion_to_prompt=prompt_style.completion_to_prompt,
                     tokenizer=settings.llm.tokenizer,
@@ -184,10 +183,10 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
 
                         return wrapper
 
-                    Ollama.chat = add_keep_alive(Ollama.chat)
-                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)
-                    Ollama.complete = add_keep_alive(Ollama.complete)
-                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)
+                    Ollama.chat = add_keep_alive(Ollama.chat)  # type: ignore
+                    Ollama.stream_chat = add_keep_alive(Ollama.stream_chat)  # type: ignore
+                    Ollama.complete = add_keep_alive(Ollama.complete)  # type: ignore
+                    Ollama.stream_complete = add_keep_alive(Ollama.stream_complete)  # type: ignore
 
                 self.llm = llm
 
diff --git a/private_gpt/components/llm/prompt_helper.py b/private_gpt/components/llm/prompt_helper.py
@@ -40,7 +40,8 @@ def messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
         logger.debug("Got for messages='%s' the prompt='%s'", messages, prompt)
         return prompt
 
-    def completion_to_prompt(self, completion: str) -> str:
+    def completion_to_prompt(self, prompt: str) -> str:
+        completion = prompt  # Fix: Llama-index parameter has to be named as prompt
         prompt = self._completion_to_prompt(completion)
         logger.debug("Got for completion='%s' the prompt='%s'", completion, prompt)
         return prompt
@@ -285,8 +286,9 @@ def _completion_to_prompt(self, completion: str) -> str:
 
 
 def get_prompt_style(
-    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"]
-    | None
+    prompt_style: (
+        Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] | None
+    )
 ) -> AbstractPromptStyle:
     """Get the prompt style to use from the given string.
 
diff --git a/private_gpt/components/node_store/node_store_component.py b/private_gpt/components/node_store/node_store_component.py
@@ -38,10 +38,10 @@ def __init__(self, settings: Settings) -> None:
 
             case "postgres":
                 try:
-                    from llama_index.core.storage.docstore.postgres_docstore import (
+                    from llama_index.storage.docstore.postgres import (  # type: ignore
                         PostgresDocumentStore,
                     )
-                    from llama_index.core.storage.index_store.postgres_index_store import (
+                    from llama_index.storage.index_store.postgres import (  # type: ignore
                         PostgresIndexStore,
                     )
                 except ImportError:
@@ -55,6 +55,7 @@ def __init__(self, settings: Settings) -> None:
                 self.index_store = PostgresIndexStore.from_params(
                     **settings.postgres.model_dump(exclude_none=True)
                 )
+
                 self.doc_store = PostgresDocumentStore.from_params(
                     **settings.postgres.model_dump(exclude_none=True)
                 )
diff --git a/private_gpt/components/vector_store/batched_chroma.py b/private_gpt/components/vector_store/batched_chroma.py
@@ -1,14 +1,17 @@
-from collections.abc import Generator
-from typing import Any
+from collections.abc import Generator, Sequence
+from typing import TYPE_CHECKING, Any
 
 from llama_index.core.schema import BaseNode, MetadataMode
 from llama_index.core.vector_stores.utils import node_to_metadata_dict
 from llama_index.vector_stores.chroma import ChromaVectorStore  # type: ignore
 
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+
 
 def chunk_list(
-    lst: list[BaseNode], max_chunk_size: int
-) -> Generator[list[BaseNode], None, None]:
+    lst: Sequence[BaseNode], max_chunk_size: int
+) -> Generator[Sequence[BaseNode], None, None]:
     """Yield successive max_chunk_size-sized chunks from lst.
 
     Args:
@@ -60,7 +63,7 @@ def __init__(
         )
         self.chroma_client = chroma_client
 
-    def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
+    def add(self, nodes: Sequence[BaseNode], **add_kwargs: Any) -> list[str]:
         """Add nodes to index, batching the insertion to avoid issues.
 
         Args:
@@ -78,8 +81,8 @@ def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
 
         all_ids = []
         for node_chunk in node_chunks:
-            embeddings = []
-            metadatas = []
+            embeddings: list[Sequence[float]] = []
+            metadatas: list[Mapping[str, Any]] = []
             ids = []
             documents = []
             for node in node_chunk:
diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass
+from typing import TYPE_CHECKING
 
 from injector import inject, singleton
 from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
@@ -26,6 +27,9 @@
 from private_gpt.server.chunks.chunks_service import Chunk
 from private_gpt.settings.settings import Settings
 
+if TYPE_CHECKING:
+    from llama_index.core.postprocessor.types import BaseNodePostprocessor
+
 
 class Completion(BaseModel):
     response: str
@@ -114,12 +118,15 @@ def _chat_engine(
                 context_filter=context_filter,
                 similarity_top_k=self.settings.rag.similarity_top_k,
             )
-            node_postprocessors = [
+            node_postprocessors: list[BaseNodePostprocessor] = [
                 MetadataReplacementPostProcessor(target_metadata_key="window"),
-                SimilarityPostprocessor(
-                    similarity_cutoff=settings.rag.similarity_value
-                ),
             ]
+            if settings.rag.similarity_value:
+                node_postprocessors.append(
+                    SimilarityPostprocessor(
+                        similarity_cutoff=settings.rag.similarity_value
+                    )
+                )
 
             if settings.rag.rerank.enabled:
                 rerank_postprocessor = SentenceTransformerRerank(
diff --git a/private_gpt/server/recipes/summarize/summarize_service.py b/private_gpt/server/recipes/summarize/summarize_service.py
@@ -90,9 +90,9 @@ def _summarize(
         # Add context documents to summarize
         if use_context:
             # 1. Recover all ref docs
-            ref_docs: dict[
-                str, RefDocInfo
-            ] | None = self.storage_context.docstore.get_all_ref_doc_info()
+            ref_docs: dict[str, RefDocInfo] | None = (
+                self.storage_context.docstore.get_all_ref_doc_info()
+            )
             if ref_docs is None:
                 raise ValueError("No documents have been ingested yet.")
 
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -136,19 +136,19 @@ class LLMSettings(BaseModel):
         0.1,
         description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
     )
-    prompt_style: Literal[
-        "default", "llama2", "llama3", "tag", "mistral", "chatml"
-    ] = Field(
-        "llama2",
-        description=(
-            "The prompt style to use for the chat engine. "
-            "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
-            "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
-            "If `llama3` - use the llama3 prompt style from the llama_index."
-            "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
-            "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
-            "`llama2` is the historic behaviour. `default` might work better with your custom models."
-        ),
+    prompt_style: Literal["default", "llama2", "llama3", "tag", "mistral", "chatml"] = (
+        Field(
+            "llama2",
+            description=(
+                "The prompt style to use for the chat engine. "
+                "If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
+                "If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
+                "If `llama3` - use the llama3 prompt style from the llama_index."
+                "If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
+                "If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
+                "`llama2` is the historic behaviour. `default` might work better with your custom models."
+            ),
+        )
     )
 
 
diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py
@@ -1,4 +1,5 @@
 """This file should be imported if and only if you want to run the UI locally."""
+
 import base64
 import logging
 import time
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,63 +7,54 @@ authors = ["Zylon <hi@zylon.ai>"]
 [tool.poetry.dependencies]
 python = ">=3.11,<3.12"
 # PrivateGPT
-fastapi = { extras = ["all"], version = "^0.111.0" }
-python-multipart = "^0.0.9"
-injector = "^0.21.0"
-pyyaml = "^6.0.1"
+fastapi = { extras = ["all"], version = "^0.115.0" }
+python-multipart = "^0.0.10"
+injector = "^0.22.0"
+pyyaml = "^6.0.2"
 watchdog = "^4.0.1"
-transformers = "^4.42.3"
+transformers = "^4.44.2"
 docx2txt = "^0.8"
 cryptography = "^3.1"
 # LlamaIndex core libs
-llama-index-core = "^0.10.52"
-llama-index-readers-file = "^0.1.27"
+llama-index-core = ">=0.11.2,<0.12.0"
+llama-index-readers-file = "*"
 # Optional LlamaIndex integration libs
-llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
-llama-index-llms-openai = {version = "^0.1.25", optional = true}
-llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
-llama-index-llms-ollama = {version ="^0.2.2", optional = true}
-llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
-llama-index-llms-gemini = {version ="^0.1.11", optional = true}
-llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
-llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
-llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
-llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
-llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
-llama-index-embeddings-mistralai = {version ="^0.1.6", optional = true}
-llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
-llama-index-vector-stores-milvus = {version ="^0.1.20", optional = true}
-llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
-llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
-llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
-llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
-llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
+llama-index-llms-llama-cpp = {version = "*", optional = true}
+llama-index-llms-openai = {version ="*", optional = true}
+llama-index-llms-openai-like = {version ="*", optional = true}
+llama-index-llms-ollama = {version ="*", optional = true}
+llama-index-llms-azure-openai = {version ="*", optional = true}
+llama-index-llms-gemini = {version ="*", optional = true}
+llama-index-embeddings-ollama = {version ="*", optional = true}
+llama-index-embeddings-huggingface = {version ="*", optional = true}
+llama-index-embeddings-openai = {version ="*", optional = true}
+llama-index-embeddings-azure-openai = {version ="*", optional = true}
+llama-index-embeddings-gemini = {version ="*", optional = true}
+llama-index-embeddings-mistralai = {version ="*", optional = true}
+llama-index-vector-stores-qdrant = {version ="*", optional = true}
+llama-index-vector-stores-milvus = {version ="*", optional = true}
+llama-index-vector-stores-chroma = {version ="*", optional = true}
+llama-index-vector-stores-postgres = {version ="*", optional = true}
+llama-index-vector-stores-clickhouse = {version ="*", optional = true}
+llama-index-storage-docstore-postgres = {version ="*", optional = true}
+llama-index-storage-index-store-postgres = {version ="*", optional = true}
 # Postgres
 psycopg2-binary = {version ="^2.9.9", optional = true}
 asyncpg = {version="^0.29.0", optional = true}
 
 # ClickHouse
-clickhouse-connect = {version = "^0.7.15", optional = true}
+clickhouse-connect = {version = "^0.7.19", optional = true}
 
 # Optional Sagemaker dependency
-boto3 = {version ="^1.34.139", optional = true}
-
-# Optional Qdrant client
-qdrant-client = {version ="^1.9.0", optional = true}
+boto3 = {version ="^1.35.26", optional = true}
 
 # Optional Reranker dependencies
-torch = {version ="^2.3.1", optional = true}
-sentence-transformers = {version ="^3.0.1", optional = true}
+torch = {version ="^2.4.1", optional = true}
+sentence-transformers = {version ="^3.1.1", optional = true}
 
 # Optional UI
-gradio = {version ="^4.37.2", optional = true}
-ffmpy = "0.4.0"
-
-# Optional Google Gemini dependency
-google-generativeai = {version ="^0.5.4", optional = true}
-
-# Optional Ollama client
-ollama = {version ="^0.3.0", optional = true}
+gradio = {version ="^4.44.0", optional = true}
+ffmpy = {version ="^0.4.0", optional = true}
 
 # Optional HF Transformers
 einops = {version = "^0.8.0", optional = true}
@@ -74,11 +65,11 @@ ui = ["gradio", "ffmpy"]
 llms-llama-cpp = ["llama-index-llms-llama-cpp"]
 llms-openai = ["llama-index-llms-openai"]
 llms-openai-like = ["llama-index-llms-openai-like"]
-llms-ollama = ["llama-index-llms-ollama", "ollama"]
+llms-ollama = ["llama-index-llms-ollama"]
 llms-sagemaker = ["boto3"]
 llms-azopenai = ["llama-index-llms-azure-openai"]
-llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
-embeddings-ollama = ["llama-index-embeddings-ollama", "ollama"]
+llms-gemini = ["llama-index-llms-gemini"]
+embeddings-ollama = ["llama-index-embeddings-ollama"]
 embeddings-huggingface = ["llama-index-embeddings-huggingface", "einops"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
@@ -94,14 +85,14 @@ storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-ind
 rerank-sentence-transformers = ["torch", "sentence-transformers"]
 
 [tool.poetry.group.dev.dependencies]
-black = "^22"
-mypy = "^1.2"
-pre-commit = "^2"
-pytest = "^7"
-pytest-cov = "^3"
+black = "^24"
+mypy = "^1.11"
+pre-commit = "^3"
+pytest = "^8"
+pytest-cov = "^5"
 ruff = "^0"
-pytest-asyncio = "^0.21.1"
-types-pyyaml = "^6.0.12.12"
+pytest-asyncio = "^0.24.0"
+types-pyyaml = "^6.0.12.20240917"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
diff --git a/tests/fixtures/fast_api_test_client.py b/tests/fixtures/fast_api_test_client.py
@@ -5,7 +5,7 @@
 from tests.fixtures.mock_injector import MockInjector
 
 
-@pytest.fixture()
+@pytest.fixture
 def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
     if request is not None and hasattr(request, "param"):
         injector.bind_settings(request.param or {})
diff --git a/tests/fixtures/ingest_helper.py b/tests/fixtures/ingest_helper.py
@@ -19,6 +19,6 @@ def ingest_file(self, path: Path) -> IngestResponse:
         return ingest_result
 
 
-@pytest.fixture()
+@pytest.fixture
 def ingest_helper(test_client: TestClient) -> IngestHelper:
     return IngestHelper(test_client)
diff --git a/tests/fixtures/mock_injector.py b/tests/fixtures/mock_injector.py
@@ -37,6 +37,6 @@ def get(self, interface: type[T]) -> T:
         return self.test_injector.get(interface)
 
 
-@pytest.fixture()
+@pytest.fixture
 def injector() -> MockInjector:
     return MockInjector()
diff --git a/tests/server/ingest/test_local_ingest.py b/tests/server/ingest/test_local_ingest.py
@@ -6,7 +6,7 @@
 from fastapi.testclient import TestClient
 
 
-@pytest.fixture()
+@pytest.fixture
 def file_path() -> str:
     return "test.txt"
 

Original file line number	Diff line number	Diff line change
`@@ -403,7 +403,7 @@ def _doc_to_node_worker(self, file_name: str, documents: list[Document]) -> None`
`403`	`403`	`self.transformations,`
`404`	`404`	`show_progress=self.show_progress,`
`405`	`405`	`)`
`406`		`- self.node_q.put(("process", file_name, documents, nodes))`
	`406`	`+ self.node_q.put(("process", file_name, documents, list(nodes)))`
`407`	`407`	`finally:`
`408`	`408`	`self.doc_semaphore.release()`
`409`	`409`	`self.doc_q.task_done() # unblock Q joins`
Original file line number	Diff line number	Diff line change
`@@ -38,10 +38,10 @@ def __init__(self, settings: Settings) -> None:`
`38`	`38`
`39`	`39`	`case "postgres":`
`40`	`40`	`try:`
`41`		`- from llama_index.core.storage.docstore.postgres_docstore import (`
	`41`	`+ from llama_index.storage.docstore.postgres import ( # type: ignore`
`42`	`42`	`PostgresDocumentStore,`
`43`	`43`	`)`
`44`		`- from llama_index.core.storage.index_store.postgres_index_store import (`
	`44`	`+ from llama_index.storage.index_store.postgres import ( # type: ignore`
`45`	`45`	`PostgresIndexStore,`
`46`	`46`	`)`
`47`	`47`	`except ImportError:`
`@@ -55,6 +55,7 @@ def __init__(self, settings: Settings) -> None:`
`55`	`55`	`self.index_store = PostgresIndexStore.from_params(`
`56`	`56`	`**settings.postgres.model_dump(exclude_none=True)`
`57`	`57`	`)`
	`58`	`+`
`58`	`59`	`self.doc_store = PostgresDocumentStore.from_params(`
`59`	`60`	`**settings.postgres.model_dump(exclude_none=True)`
`60`	`61`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""This file should be imported if and only if you want to run the UI locally."""`
	`2`	`+`
`2`	`3`	`import base64`
`3`	`4`	`import logging`
`4`	`5`	`import time`