Skip to content

Commit

Permalink
BT-11899 check for kv cache reuse (#1099)
Browse files Browse the repository at this point in the history
  • Loading branch information
bdubayah authored Aug 27, 2024
1 parent aa35e2e commit 7a7e4ab
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "truss"
version = "0.9.30rc4"
version = "0.9.30rc5"
description = "A seamless bridge from model development to model delivery"
license = "MIT"
readme = "README.md"
Expand Down
2 changes: 1 addition & 1 deletion truss/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"grpcio==1.64.0",
"grpcio-tools==1.64.0",
"transformers==4.43.2",
"truss==0.9.30rc1",
"truss==0.9.30rc3",
"outlines==0.0.46",
"torch==2.4.0",
]
Expand Down
4 changes: 4 additions & 0 deletions truss/templates/trtllm-briton/src/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ def __init__(self, **kwargs):
self._kv_cache_free_gpu_mem_fraction = (
truss_trtllm_build_config.kv_cache_free_gpu_mem_fraction
)
self._enable_kv_cache_reuse = (
truss_trtllm_build_config.plugin_configuration.use_paged_context_fmha
)

self._hf_token = None
try:
Expand All @@ -115,6 +118,7 @@ def load(self):
engine_path: "{self._data_dir.resolve()}"
hf_tokenizer: "{self._tokenizer_repository}"
kv_cache_free_gpu_mem_fraction: {self._kv_cache_free_gpu_mem_fraction}
enable_kv_cache_reuse: {"true" if self._enable_kv_cache_reuse else "false"}
fsm_cache_dir: "{FSM_CACHE_DIR}"
"""
config_pbtxt_path = (self._data_dir / "briton_config.pbtxt").resolve()
Expand Down

0 comments on commit 7a7e4ab

Please sign in to comment.