Skip to content

Commit 9dd6a4f

Browse files
committed
chore: cache access to tokeniser
1 parent 8bf80a4 commit 9dd6a4f

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

server/api/debug/generate.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,15 @@ def generate(self, state: AppState, data: Query) -> ServerSentEvent:
2525
an endpoint for generating text directly from the LLM model
2626
"""
2727
chat = state.chat
28+
tokeniser = chat.tokeniser
2829

29-
prompt = chat.tokeniser.apply_chat_template(
30+
prompt = tokeniser.apply_chat_template(
3031
[{'role': 'user', 'content': data.query}],
3132
tokenize=False,
3233
add_generation_prompt=True,
3334
)
3435

35-
return ServerSentEvent(chat.generate(chat.tokeniser(prompt).tokens()))
36+
return ServerSentEvent(chat.generate(tokeniser(prompt).tokens()))
3637

3738
@post('/benchmark', sync_to_thread=True)
3839
def benchmark(self, state: AppState, data: Query) -> Benchmark:
@@ -42,15 +43,17 @@ def benchmark(self, state: AppState, data: Query) -> Benchmark:
4243
an endpoint for benchmarking the LLM model
4344
"""
4445
chat = state.chat
46+
tokeniser = chat.tokeniser
47+
4548
message: Message = {'role': 'user', 'content': data.query}
46-
prompt = chat.tokeniser.apply_chat_template([message], add_generation_prompt=True, tokenize=False)
47-
tokenised_prompt = chat.tokeniser(prompt).tokens()
49+
prompt = tokeniser.apply_chat_template([message], add_generation_prompt=True, tokenize=False)
50+
tokenised_prompt = tokeniser(prompt).tokens()
4851

4952
start = perf_counter()
5053
response = ''.join(chat.generate(tokenised_prompt))
5154
total_time = perf_counter() - start
5255

53-
output_tokens = chat.tokeniser(response).tokens()
56+
output_tokens = tokeniser(response).tokens()
5457
total_tokens = len(tokenised_prompt) + len(chat) + len(output_tokens)
5558

5659
return Benchmark(

0 commit comments

Comments
 (0)