From cb680ef1b8ffb9d67ec0984b237d77fc53c3e2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Rame=CC=81?= Date: Sun, 17 Mar 2024 12:36:40 +0100 Subject: [PATCH] fix(llm): the model tokens limit is for input+output and we did not take this into account --- src/features/llm-langchain.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/features/llm-langchain.ts b/src/features/llm-langchain.ts index a2f3b30..f297f96 100644 --- a/src/features/llm-langchain.ts +++ b/src/features/llm-langchain.ts @@ -511,7 +511,23 @@ CONTEXTE : // Due to using chained `.bind().withRetry()` above, callbacks and others must be defined there (here they won't be called) }); - if (finishReason !== 'stop') { + if (finishReason === 'length') { + // The model has reach its length limit + // The `maxTokens` property of `ChatMistralAI` indicates something important: "The token count of your prompt plus max_tokens cannot exceed the model's context length" + // Note: we don't want to use `maxTokens` since it caps the response tokens, and we prefer to let the LLM tells the maximum about the initiative being computed + + // Just in case, we check we did configure local limit accordingly to the LLM used + if (tokenUsage !== null) { + const usage = tokenUsage as TokenUsage; // TypeScript messes up due to the assignation being into `callbacks`, it tells it's `never` without casting + + if (usage.totalTokens !== undefined && usage.totalTokens > this.gptInstance.modelTokenLimit) { + throw new Error('the maximum model tokens length we defined locally seems to not correspond to the real model limit'); + } + } + + // If the settings check is fine and since we were not able to know in advance the total of the input+output length, we just throw an error so the parent can adjust the content to reduce the input length until it passes + throw tokensReachTheLimitError; + } else if (finishReason !== 'stop') { throw new Error(`the generation has not completed fully according to the returned reason: ${finishReason}`); }