Skip to content

Commit e21bf20

Browse files
authored
feat: prompt_style applied to all LLMs + extra LLM params. (#1835)
* Updated prompt_style to be moved to the main LLM setting since all LLMs from llama_index can utilize this. I also included temperature, context window size, max_tokens, max_new_tokens into the openailike to help ensure the settings are consistent from the other implementations. * Removed prompt_style from llamacpp entirely * Fixed settings-local.yaml to include prompt_style in the LLM settings instead of llamacpp.
1 parent c1802e7 commit e21bf20

File tree

4 files changed

+22
-18
lines changed

4 files changed

+22
-18
lines changed

private_gpt/components/llm/llm_component.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def __init__(self, settings: Settings) -> None:
5151
"Local dependencies not found, install with `poetry install --extras llms-llama-cpp`"
5252
) from e
5353

54-
prompt_style = get_prompt_style(settings.llamacpp.prompt_style)
54+
prompt_style = get_prompt_style(settings.llm.prompt_style)
5555
settings_kwargs = {
5656
"tfs_z": settings.llamacpp.tfs_z, # ollama and llama-cpp
5757
"top_k": settings.llamacpp.top_k, # ollama and llama-cpp
@@ -109,15 +109,20 @@ def __init__(self, settings: Settings) -> None:
109109
raise ImportError(
110110
"OpenAILike dependencies not found, install with `poetry install --extras llms-openai-like`"
111111
) from e
112-
112+
prompt_style = get_prompt_style(settings.llm.prompt_style)
113113
openai_settings = settings.openai
114114
self.llm = OpenAILike(
115115
api_base=openai_settings.api_base,
116116
api_key=openai_settings.api_key,
117117
model=openai_settings.model,
118118
is_chat_model=True,
119-
max_tokens=None,
119+
max_tokens=settings.llm.max_new_tokens,
120120
api_version="",
121+
temperature=settings.llm.temperature,
122+
context_window=settings.llm.context_window,
123+
max_new_tokens=settings.llm.max_new_tokens,
124+
messages_to_prompt=prompt_style.messages_to_prompt,
125+
completion_to_prompt=prompt_style.completion_to_prompt,
121126
)
122127
case "ollama":
123128
try:

private_gpt/settings/settings.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,17 @@ class LLMSettings(BaseModel):
104104
0.1,
105105
description="The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual.",
106106
)
107+
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
108+
"llama2",
109+
description=(
110+
"The prompt style to use for the chat engine. "
111+
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
112+
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
113+
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
114+
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
115+
"`llama2` is the historic behaviour. `default` might work better with your custom models."
116+
),
117+
)
107118

108119

109120
class VectorstoreSettings(BaseModel):
@@ -117,18 +128,6 @@ class NodeStoreSettings(BaseModel):
117128
class LlamaCPPSettings(BaseModel):
118129
llm_hf_repo_id: str
119130
llm_hf_model_file: str
120-
prompt_style: Literal["default", "llama2", "tag", "mistral", "chatml"] = Field(
121-
"llama2",
122-
description=(
123-
"The prompt style to use for the chat engine. "
124-
"If `default` - use the default prompt style from the llama_index. It should look like `role: message`.\n"
125-
"If `llama2` - use the llama2 prompt style from the llama_index. Based on `<s>`, `[INST]` and `<<SYS>>`.\n"
126-
"If `tag` - use the `tag` prompt style. It should look like `<|role|>: message`. \n"
127-
"If `mistral` - use the `mistral prompt style. It shoudl look like <s>[INST] {System Prompt} [/INST]</s>[INST] { UserInstructions } [/INST]"
128-
"`llama2` is the historic behaviour. `default` might work better with your custom models."
129-
),
130-
)
131-
132131
tfs_z: float = Field(
133132
1.0,
134133
description="Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.",

settings-local.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ llm:
88
max_new_tokens: 512
99
context_window: 3900
1010
tokenizer: mistralai/Mistral-7B-Instruct-v0.2
11+
prompt_style: "mistral"
1112

1213
llamacpp:
13-
prompt_style: "mistral"
1414
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
1515
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
1616

@@ -24,4 +24,4 @@ vectorstore:
2424
database: qdrant
2525

2626
qdrant:
27-
path: local_data/private_gpt/qdrant
27+
path: local_data/private_gpt/qdrant

settings.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ ui:
3636

3737
llm:
3838
mode: llamacpp
39+
prompt_style: "mistral"
3940
# Should be matching the selected model
4041
max_new_tokens: 512
4142
context_window: 3900
@@ -53,7 +54,6 @@ rag:
5354
top_n: 1
5455

5556
llamacpp:
56-
prompt_style: "mistral"
5757
llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
5858
llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
5959
tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting

0 commit comments

Comments
 (0)