Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Postgres for the doc and index store #1706

Merged
merged 14 commits into from
Mar 14, 2024
4 changes: 2 additions & 2 deletions fern/docs/pages/manual/vectordb.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ By default `chroma` will use a disk-based database stored in local_data_path / "

### PGVector

To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `pgvector` extra.
To enable PGVector, set the `vectorstore.database` property in the `settings.yaml` file to `pgvector` and install the `vectors-stores-postgres` extra.
dbzoo marked this conversation as resolved.
Show resolved Hide resolved

```bash
poetry install --extras pgvector
poetry install --extras vectors-stores-postgres
dbzoo marked this conversation as resolved.
Show resolved Hide resolved
```

PGVector settings can be configured by setting values to the `pgvector` property in the `settings.yaml` file.
Expand Down
52 changes: 36 additions & 16 deletions private_gpt/components/node_store/node_store_component.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging

from injector import inject, singleton
from private_gpt.settings.settings import Settings
from llama_index.core.storage.docstore import BaseDocumentStore, SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.storage.index_store.types import BaseIndexStore
Expand All @@ -16,19 +17,38 @@ class NodeStoreComponent:
doc_store: BaseDocumentStore

@inject
def __init__(self) -> None:
try:
self.index_store = SimpleIndexStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local index store not found, creating a new one")
self.index_store = SimpleIndexStore()

try:
self.doc_store = SimpleDocumentStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local document store not found, creating a new one")
self.doc_store = SimpleDocumentStore()
def __init__(self, settings: Settings) -> None:
match settings.docstore.database:
case "local":
dbzoo marked this conversation as resolved.
Show resolved Hide resolved
try:
self.index_store = SimpleIndexStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local index store not found, creating a new one")
self.index_store = SimpleIndexStore()

try:
self.doc_store = SimpleDocumentStore.from_persist_dir(
persist_dir=str(local_data_path)
)
except FileNotFoundError:
logger.debug("Local document store not found, creating a new one")
self.doc_store = SimpleDocumentStore()

case "postgres":
from llama_index.core.storage.index_store.postgres_index_store import PostgresIndexStore
dbzoo marked this conversation as resolved.
Show resolved Hide resolved
from llama_index.core.storage.docstore.postgres_docstore import PostgresDocumentStore

if settings.postgres is None:
raise ValueError("Postgres index/doc store settings not found.")

self.index_store = PostgresIndexStore.from_params(**settings.postgres.model_dump(exclude_none=True))
self.doc_store = PostgresDocumentStore.from_params(**settings.postgres.model_dump(exclude_none=True))

case _:
# Should be unreachable
# The settings validator should have caught this
raise ValueError(
f"Database {settings.docstore.database} not supported"
)
18 changes: 12 additions & 6 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ class LLMSettings(BaseModel):
class VectorstoreSettings(BaseModel):
database: Literal["chroma", "qdrant", "pgvector"]

class DocstoreSettings(BaseModel):
database: Literal["local", "postgres"]

class LlamaCPPSettings(BaseModel):
llm_hf_repo_id: str
Expand Down Expand Up @@ -203,8 +205,7 @@ class UISettings(BaseModel):
False, description="If the button to delete all files is enabled or not."
)


class PGVectorSettings(BaseModel):
class PostgresSettings(BaseModel):
host: str = Field(
"localhost",
description="The server hosting the Postgres database",
Expand All @@ -225,14 +226,17 @@ class PGVectorSettings(BaseModel):
"postgres",
description="The database to use to connect to the Postgres database",
)
schema_name: str = Field(
"public",
description="The name of the schema in the Postgres database to use",
)


class PGVectorSettings(PostgresSettings):
embed_dim: int = Field(
384,
description="The dimension of the embeddings stored in the Postgres database",
)
schema_name: str = Field(
"public",
description="The name of the schema in the Postgres database where the embeddings are stored",
)
table_name: str = Field(
"embeddings",
description="The name of the table in the Postgres database where the embeddings are stored",
Expand Down Expand Up @@ -305,7 +309,9 @@ class Settings(BaseModel):
openai: OpenAISettings
ollama: OllamaSettings
vectorstore: VectorstoreSettings
docstore: DocstoreSettings
qdrant: QdrantSettings | None = None
postgres: PostgresSettings | None = None
pgvector: PGVectorSettings | None = None


Expand Down
41 changes: 41 additions & 0 deletions settings-ollama-pg.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Using ollama and postgres for the vector, doc and index store
server:
env_name: ${APP_ENV:ollama}

llm:
mode: ollama
max_new_tokens: 512
context_window: 3900

embedding:
mode: ollama

ollama:
llm_model: mistral
embedding_model: nomic-embed-text
api_base: http://localhost:11434

docstore:
database: postgres

vectorstore:
database: pgvector

pgvector:
host: localhost
port: 5432
database: postgres
user: postgres
password: admin
embed_dim: 768
schema_name: private_gpt
table_name: embeddings

postgres:
host: localhost
port: 5432
database: postgres
user: postgres
password: admin
schema_name: private_gpt

11 changes: 11 additions & 0 deletions settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ huggingface:
vectorstore:
database: qdrant

docstore:
dbzoo marked this conversation as resolved.
Show resolved Hide resolved
database: local

qdrant:
path: local_data/private_gpt/qdrant

Expand All @@ -69,6 +72,14 @@ pgvector:
schema_name: private_gpt
table_name: embeddings

postgres:
host: localhost
port: 5432
database: postgres
user: postgres
password: postgres
schema_name: private_gpt

sagemaker:
llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479
Expand Down
Loading