Skip to content

Commit

Permalink
add LLAMA_CPP_N_CTX env variable to control maximum context length
Browse files Browse the repository at this point in the history
  • Loading branch information
Benjoyo committed Nov 12, 2024
1 parent c1bddde commit 7e4a0f1
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
4 changes: 2 additions & 2 deletions bpm_ai_inference/llm/llama_cpp/llama_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from bpm_ai_inference.llm.llama_cpp._constants import DEFAULT_MODEL, DEFAULT_TEMPERATURE, DEFAULT_MAX_RETRIES, \
DEFAULT_QUANT_BALANCED
from bpm_ai_inference.llm.llama_cpp.util import messages_to_llama_dicts
from bpm_ai_inference.util import FORCE_OFFLINE_FLAG
from bpm_ai_inference.util import FORCE_OFFLINE_FLAG, LLAMA_CPP_N_CTX
from bpm_ai_inference.util.files import find_file
from bpm_ai_inference.util.hf import hf_home

Expand Down Expand Up @@ -47,6 +47,7 @@ def __init__(
temperature: float = DEFAULT_TEMPERATURE,
grammar: str = None,
max_retries: int = DEFAULT_MAX_RETRIES,
n_ctx: int = int(os.getenv(LLAMA_CPP_N_CTX, "4096")),
force_offline: bool = (os.getenv(FORCE_OFFLINE_FLAG, "false").lower() == "true")
):
if not has_llama_cpp_python:
Expand All @@ -58,7 +59,6 @@ def __init__(
retryable_exceptions=[]
)
self.grammar = grammar
n_ctx = 4096
if force_offline:
model_file = find_file(hf_home() + "hub/models--" + model.replace("/", "--"), filename)
self.llm = Llama(
Expand Down
3 changes: 2 additions & 1 deletion bpm_ai_inference/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
FORCE_OFFLINE_FLAG = "FORCE_OFFLINE"
FORCE_OFFLINE_FLAG = "FORCE_OFFLINE"
LLAMA_CPP_N_CTX = "LLAMA_CPP_N_CTX"

0 comments on commit 7e4a0f1

Please sign in to comment.