Merge branch 'main' into feat/update-llama-index

jaluma · jaluma · commit 430cbf3722da · 2024-09-26T13:29:28.000+02:00
# Conflicts:
#	poetry.lock
#	pyproject.toml
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -8,11 +8,12 @@ services:
   # This service builds from an external Dockerfile and runs the Ollama mode.
   private-gpt-ollama:
     image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-ollama  # x-release-please-version
+    user: root
     build:
       context: .
       dockerfile: Dockerfile.ollama
     volumes:
-      - ./local_data/:/home/worker/app/local_data
+      - ./local_data:/home/worker/app/local_data
     ports:
       - "8001:8001"
     environment:
@@ -27,11 +28,14 @@ services:
       - ollama-cpu
       - ollama-cuda
       - ollama-api
+    depends_on:
+      - ollama
 
   # Private-GPT service for the local mode
   # This service builds from a local Dockerfile and runs the application in local mode.
   private-gpt-llamacpp-cpu:
     image: ${PGPT_IMAGE:-zylonai/private-gpt}:${PGPT_TAG:-0.6.2}-llamacpp-cpu # x-release-please-version
+    user: root
     build:
       context: .
       dockerfile: Dockerfile.llamacpp-cpu
@@ -44,7 +48,7 @@ services:
     environment:
       PORT: 8001
       PGPT_PROFILES: local
-      HF_TOKEN: ${HF_TOKEN}
+      HF_TOKEN: ${HF_TOKEN:-}
     profiles:
       - llamacpp-cpu
 
@@ -57,7 +61,7 @@ services:
   ollama:
     image: traefik:v2.10
     ports:
-      - "8081:8080"
+      - "8080:8080"
     command:
       - "--providers.file.filename=/etc/router.yml"
       - "--log.level=ERROR"
@@ -102,4 +106,4 @@ services:
               count: 1
               capabilities: [gpu]
     profiles:
-      - ollama-cuda
+      - ollama-cuda
diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -144,6 +144,23 @@ def __init__(self, settings: Settings) -> None:
                     api_key=settings.gemini.api_key,
                     model_name=settings.gemini.embedding_model,
                 )
+            case "mistralai":
+                try:
+                    from llama_index.embeddings.mistralai import (  # type: ignore
+                        MistralAIEmbedding,
+                    )
+                except ImportError as e:
+                    raise ImportError(
+                        "Mistral dependencies not found, install with `poetry install --extras embeddings-mistral`"
+                    ) from e
+
+                api_key = settings.openai.api_key
+                model = settings.openai.embedding_model
+
+                self.embedding_model = MistralAIEmbedding(
+                    api_key=api_key,
+                    model=model,
+                )
             case "mock":
                 # Not a random number, is the dimensionality used by
                 # the default embedding model
diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py
@@ -92,7 +92,13 @@ def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]:
             return string_reader.load_data([file_data.read_text()])
 
         logger.debug("Specific reader found for extension=%s", extension)
-        return reader_cls().load_data(file_data)
+        documents = reader_cls().load_data(file_data)
+
+        # Sanitize NUL bytes in text which can't be stored in Postgres
+        for i in range(len(documents)):
+            documents[i].text = documents[i].text.replace("\u0000", "")
+
+        return documents
 
     @staticmethod
     def _exclude_metadata(documents: list[Document]) -> None:
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -197,7 +197,14 @@ class HuggingFaceSettings(BaseModel):
 
 class EmbeddingSettings(BaseModel):
     mode: Literal[
-        "huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock", "gemini"
+        "huggingface",
+        "openai",
+        "azopenai",
+        "sagemaker",
+        "ollama",
+        "mock",
+        "gemini",
+        "mistralai",
     ]
     ingest_mode: Literal["simple", "batch", "parallel", "pipeline"] = Field(
         "simple",
@@ -350,6 +357,10 @@ class AzureOpenAISettings(BaseModel):
 class UISettings(BaseModel):
     enabled: bool
     path: str
+    default_mode: Literal["RAG", "Search", "Basic", "Summarize"] = Field(
+        "RAG",
+        description="The default mode.",
+    )
     default_chat_system_prompt: str = Field(
         None,
         description="The default system prompt to use for the chat mode.",
diff --git a/private_gpt/ui/ui.py b/private_gpt/ui/ui.py
@@ -100,8 +100,11 @@ def __init__(
         self._selected_filename = None
 
         # Initialize system prompt based on default mode
-        self.mode = MODES[0]
-        self._system_prompt = self._get_default_system_prompt(self.mode)
+        default_mode_map = {mode.value: mode for mode in Modes}
+        self._default_mode = default_mode_map.get(
+            settings().ui.default_mode, Modes.RAG_MODE
+        )
+        self._system_prompt = self._get_default_system_prompt(self._default_mode)
 
     def _chat(
         self, message: str, history: list[list[str]], mode: Modes, *_: Any
@@ -391,7 +394,7 @@ def _build_ui_blocks(self) -> gr.Blocks:
 
             with gr.Row(equal_height=False):
                 with gr.Column(scale=3):
-                    default_mode = MODES[0]
+                    default_mode = self._default_mode
                     mode = gr.Radio(
                         [mode.value for mode in MODES],
                         label="Mode",
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,6 +30,7 @@ llama-index-embeddings-huggingface = {version ="*", optional = true}
 llama-index-embeddings-openai = {version ="*", optional = true}
 llama-index-embeddings-azure-openai = {version ="*", optional = true}
 llama-index-embeddings-gemini = {version ="*", optional = true}
+llama-index-embeddings-mistralai = {version ="*", optional = true}
 llama-index-vector-stores-qdrant = {version ="*", optional = true}
 llama-index-vector-stores-milvus = {version ="*", optional = true}
 llama-index-vector-stores-chroma = {version ="*", optional = true}
@@ -74,6 +75,7 @@ embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
 embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
 embeddings-gemini = ["llama-index-embeddings-gemini"]
+embeddings-mistral = ["llama-index-embeddings-mistralai"]
 vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
diff --git a/settings.yaml b/settings.yaml
@@ -25,21 +25,23 @@ data:
 ui:
   enabled: true
   path: /
+  # "RAG", "Search", "Basic", or "Summarize"
+  default_mode: "RAG"
   default_chat_system_prompt: >
-    You are a helpful, respectful and honest assistant. 
+    You are a helpful, respectful and honest assistant.
     Always answer as helpfully as possible and follow ALL given instructions.
     Do not speculate or make up information.
     Do not reference any given instructions or context.
   default_query_system_prompt: >
-    You can only answer questions about the provided context. 
-    If you know the answer but it is not based in the provided context, don't provide 
+    You can only answer questions about the provided context.
+    If you know the answer but it is not based in the provided context, don't provide
     the answer, just state the answer is not in the context provided.
   default_summarization_system_prompt: >
-    Provide a comprehensive summary of the provided context information. 
+    Provide a comprehensive summary of the provided context information.
     The summary should cover all the key points and main ideas presented in
-    the original text, while also condensing the information into a concise 
+    the original text, while also condensing the information into a concise
     and easy-to-understand format. Please ensure that the summary includes
-    relevant details and examples that support the main ideas, while avoiding 
+    relevant details and examples that support the main ideas, while avoiding
     any unnecessary information or repetition.
   delete_file_button_enabled: true
   delete_all_files_button_enabled: true