Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into add_clickhouse_vect…
Browse files Browse the repository at this point in the history
…ore_store

# Conflicts:
#	poetry.lock
#	pyproject.toml
  • Loading branch information
jaluma committed Jul 8, 2024
2 parents 1775cf6 + b687dc8 commit 48f8aaf
Show file tree
Hide file tree
Showing 13 changed files with 2,258 additions and 2,169 deletions.
8 changes: 4 additions & 4 deletions fern/docs/pages/api-reference/sdks.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ The clients are kept up to date automatically, so we encourage you to use the la

<Cards>
<Card
title="Node.js/TypeScript - WIP"
title="TypeScript"
icon="fa-brands fa-node"
href="https://github.com/imartinez/privateGPT-typescript"
href="https://github.com/zylon-ai/privategpt-ts"
/>
<Card
title="Python - Ready!"
title="Python"
icon="fa-brands fa-python"
href="https://github.com/imartinez/pgpt_python"
href="https://github.com/zylon-ai/pgpt-python"
/>
<br />
</Cards>
Expand Down
4,238 changes: 2,141 additions & 2,097 deletions poetry.lock

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions private_gpt/components/embedding/embedding_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,17 @@ def __init__(self, settings: Settings) -> None:
"OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`"
) from e

openai_settings = settings.openai.api_key
self.embedding_model = OpenAIEmbedding(api_key=openai_settings)
api_base = (
settings.openai.embedding_api_base or settings.openai.api_base
)
api_key = settings.openai.embedding_api_key or settings.openai.api_key
model = settings.openai.embedding_model

self.embedding_model = OpenAIEmbedding(
api_base=api_base,
api_key=api_key,
model=model,
)
case "ollama":
try:
from llama_index.embeddings.ollama import ( # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion private_gpt/components/llm/custom/sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:

response_body = resp["Body"]
response_str = response_body.read().decode("utf-8")
response_dict = eval(response_str)
response_dict = json.loads(response_str)

return CompletionResponse(
text=response_dict[0]["generated_text"][len(prompt) :], raw=resp
Expand Down
14 changes: 11 additions & 3 deletions private_gpt/components/llm/llm_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(self, settings: Settings) -> None:
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
) from e

prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
prompt_style = get_prompt_style(settings.llm.prompt_style)
settings_kwargs = {
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp
Expand Down Expand Up @@ -109,15 +109,23 @@ def __init__(self, settings: Settings) -> None:
raise ImportError(
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
) from e

prompt_style = get_prompt_style(settings.llm.prompt_style)
openai_settings = settings.openai
self.llm = OpenAILike(
api_base=openai_settings.api_base,
api_key=openai_settings.api_key,
model=openai_settings.model,
is_chat_model=True,
max_tokens=None,
max_tokens=settings.llm.max_new_tokens,
api_version="",
temperature=settings.llm.temperature,
context_window=settings.llm.context_window,
max_new_tokens=settings.llm.max_new_tokens,
messages_to_prompt=prompt_style.messages_to_prompt,
completion_to_prompt=prompt_style.completion_to_prompt,
tokenizer=settings.llm.tokenizer,
timeout=openai_settings.request_timeout,
reuse_client=False,
)
case "ollama":
try:
Expand Down
26 changes: 15 additions & 11 deletions private_gpt/components/llm/prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,18 +173,22 @@ def _completion_to_prompt(self, completion: str) -> str:

class MistralPromptStyle(AbstractPromptStyle):
def _messages_to_prompt(self, messages: Sequence[ChatMessage]) -> str:
prompt = "<s>"
inst_buffer = []
text = ""
for message in messages:
role = message.role
content = message.content or ""
if role.lower() == "system":
message_from_user = f"[INST] {content.strip()} [/INST]"
prompt += message_from_user
elif role.lower() == "user":
prompt += "</s>"
message_from_user = f"[INST] {content.strip()} [/INST]"
prompt += message_from_user
return prompt
if message.role == MessageRole.SYSTEM or message.role == MessageRole.USER:
inst_buffer.append(str(message.content).strip())
elif message.role == MessageRole.ASSISTANT:
text += "<s>[INST] " + "\n".join(inst_buffer) + " [/INST]"
text += " " + str(message.content).strip() + "</s>"
inst_buffer.clear()
else:
raise ValueError(f"Unknown message role {message.role}")

if len(inst_buffer) > 0:
text += "<s>[INST] " + "\n".join(inst_buffer) + " [/INST]"

return text

def _completion_to_prompt(self, completion: str) -> str:
return self._messages_to_prompt(
Expand Down
10 changes: 5 additions & 5 deletions private_gpt/components/vector_store/vector_store_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from injector import inject, singleton
from llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex
from llama_index.core.vector_stores.types import (
BasePydanticVectorStore,
FilterCondition,
MetadataFilter,
MetadataFilters,
VectorStore,
)

from private_gpt.open_ai.extensions.context_filter import ContextFilter
Expand All @@ -32,7 +32,7 @@ def _doc_id_metadata_filter(
@singleton
class VectorStoreComponent:
settings: Settings
vector_store: VectorStore
vector_store: BasePydanticVectorStore

@inject
def __init__(self, settings: Settings) -> None:
Expand All @@ -54,7 +54,7 @@ def __init__(self, settings: Settings) -> None:
)

self.vector_store = typing.cast(
VectorStore,
BasePydanticVectorStore,
PGVectorStore.from_params(
**settings.postgres.model_dump(exclude_none=True),
table_name="embeddings",
Expand Down Expand Up @@ -87,7 +87,7 @@ def __init__(self, settings: Settings) -> None:
) # TODO

self.vector_store = typing.cast(
VectorStore,
BasePydanticVectorStore,
BatchedChromaVectorStore(
chroma_client=chroma_client, chroma_collection=chroma_collection
),
Expand Down Expand Up @@ -115,7 +115,7 @@ def __init__(self, settings: Settings) -> None:
**settings.qdrant.model_dump(exclude_none=True)
)
self.vector_store = typing.cast(
VectorStore,
BasePydanticVectorStore,
QdrantVectorStore(
client=client,
collection_name="make_this_parameterizable_per_api_call",
Expand Down
36 changes: 24 additions & 12 deletions private_gpt/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
0.1,
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
)
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)


class VectorstoreSettings(BaseModel):
Expand All @@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
class LlamaCPPSettings(BaseModel):
llm_hf_repo_id: str
llm_hf_model_file: str
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
"llama2",
description=(
"The prompt style to use for the chat engine. "
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
"`llama2` is the historic behaviour. `default` might work better with your custom models."
),
)

tfs_z: float = Field(
1.0,
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",
Expand Down Expand Up @@ -206,6 +205,19 @@ class OpenAISettings(BaseModel):
"gpt-3.5-turbo",
description="OpenAI Model to use. Example: 'gpt-4'.",
)
request_timeout: float = Field(
120.0,
description="Time elapsed until openailike server times out the request. Default is 120s. Format is float. ",
)
embedding_api_base: str = Field(
None,
description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
)
embedding_api_key: str
embedding_model: str = Field(
"text-embedding-ada-002",
description="OpenAI embedding Model to use. Example: 'text-embedding-3-large'.",
)


class OllamaSettings(BaseModel):
Expand Down
47 changes: 25 additions & 22 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,52 @@ authors = ["Zylon <hi@zylon.ai>"]
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
# PrivateGPT
fastapi = { extras = ["all"], version = "^0.110.0" }
fastapi = { extras = ["all"], version = "^0.111.0" }
python-multipart = "^0.0.9"
injector = "^0.21.0"
pyyaml = "^6.0.1"
watchdog = "^4.0.0"
transformers = "^4.38.2"
watchdog = "^4.0.1"
transformers = "^4.42.3"
docx2txt = "^0.8"
cryptography = "^3.1"
# LlamaIndex core libs
llama-index-core = "^0.10.14"
llama-index-readers-file = "^0.1.6"
llama-index-core = "^0.10.52"
llama-index-readers-file = "^0.1.27"
# Optional LlamaIndex integration libs
llama-index-llms-llama-cpp = {version = "^0.1.3", optional = true}
llama-index-llms-openai = {version = "^0.1.6", optional = true}
llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
llama-index-llms-openai = {version = "^0.1.25", optional = true}
llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
llama-index-llms-ollama = {version ="^0.1.2", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
llama-index-llms-ollama = {version ="^0.1.5", optional = true}
llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true}
llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.2", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.2", optional = true}
llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
# Postgres
psycopg2-binary = {version ="^2.9.9", optional = true}
asyncpg = {version="^0.29.0", optional = true}

# ClickHouse
clickhouse-connect = {version = "^0.7.8", optional = true}
clickhouse-connect = {version = "^0.7.15", optional = true}

# Optional Sagemaker dependency
boto3 = {version ="^1.34.51", optional = true}
boto3 = {version ="^1.34.139", optional = true}

# Optional Qdrant client
qdrant-client = {version ="^1.9.0", optional = true}

# Optional Reranker dependencies
torch = {version ="^2.1.2", optional = true}
sentence-transformers = {version ="^2.6.1", optional = true}
torch = {version ="^2.3.1", optional = true}
sentence-transformers = {version ="^3.0.1", optional = true}

# Optional UI
gradio = {version ="^4.19.2", optional = true}
gradio = {version ="^4.37.2", optional = true}

[tool.poetry.extras]
ui = ["gradio"]
Expand Down
4 changes: 2 additions & 2 deletions settings-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ llm:
max_new_tokens: 512
context_window: 3900
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
prompt_style: "mistral"

llamacpp:
prompt_style: "mistral"
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf

Expand All @@ -24,4 +24,4 @@ vectorstore:
database: qdrant

qdrant:
path: local_data/private_gpt/qdrant
path: local_data/private_gpt/qdrant
4 changes: 4 additions & 0 deletions settings-vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ server:

llm:
mode: openailike
max_new_tokens: 512
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
temperature: 0.1

embedding:
mode: huggingface
Expand All @@ -15,3 +18,4 @@ openai:
api_base: http://localhost:8000/v1
api_key: EMPTY
model: facebook/opt-125m
request_timeout: 600.0
5 changes: 3 additions & 2 deletions settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ server:
env_name: ${APP_ENV:prod}
port: ${PORT:8001}
cors:
enabled: false
enabled: true
allow_origins: ["*"]
allow_methods: ["*"]
allow_headers: ["*"]
Expand Down Expand Up @@ -36,6 +36,7 @@ ui:

llm:
mode: llamacpp
prompt_style: "mistral"
# Should be matching the selected model
max_new_tokens: 512
context_window: 3900
Expand All @@ -60,7 +61,6 @@ clickhouse:
database: embeddings

llamacpp:
prompt_style: "mistral"
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
Expand Down Expand Up @@ -102,6 +102,7 @@ sagemaker:
openai:
api_key: ${OPENAI_API_KEY:}
model: gpt-3.5-turbo
embedding_api_key: ${OPENAI_API_KEY:}

ollama:
llm_model: llama2
Expand Down
20 changes: 12 additions & 8 deletions tests/test_prompt_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,21 @@ def test_tag_prompt_style_format_with_system_prompt():
def test_mistral_prompt_style_format():
prompt_style = MistralPromptStyle()
messages = [
ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM),
ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
ChatMessage(content="A", role=MessageRole.SYSTEM),
ChatMessage(content="B", role=MessageRole.USER),
]

expected_prompt = (
"<s>[INST] You are an AI assistant. [/INST]</s>"
"[INST] Hello, how are you doing? [/INST]"
)

expected_prompt = "<s>[INST] A\nB [/INST]"
assert prompt_style.messages_to_prompt(messages) == expected_prompt

messages2 = [
ChatMessage(content="A", role=MessageRole.SYSTEM),
ChatMessage(content="B", role=MessageRole.USER),
ChatMessage(content="C", role=MessageRole.ASSISTANT),
ChatMessage(content="D", role=MessageRole.USER),
]
expected_prompt2 = "<s>[INST] A\nB [/INST] C</s><s>[INST] D [/INST]"
assert prompt_style.messages_to_prompt(messages2) == expected_prompt2


def test_chatml_prompt_style_format():
prompt_style = ChatMLPromptStyle()
Expand Down

0 comments on commit 48f8aaf

Please sign in to comment.