From cb680ef1b8ffb9d67ec0984b237d77fc53c3e2a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Rame=CC=81?= <thomas.rame@outlook.com>
Date: Sun, 17 Mar 2024 12:36:40 +0100
Subject: [PATCH] fix(llm): the model tokens limit is for input+output and we
 did not take this into account

---
 src/features/llm-langchain.ts | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/features/llm-langchain.ts b/src/features/llm-langchain.ts
index a2f3b30..f297f96 100644
--- a/src/features/llm-langchain.ts
+++ b/src/features/llm-langchain.ts
@@ -511,7 +511,23 @@ CONTEXTE :
       // Due to using chained `.bind().withRetry()` above, callbacks and others must be defined there (here they won't be called)
     });
 
-    if (finishReason !== 'stop') {
+    if (finishReason === 'length') {
+      // The model has reach its length limit
+      // The `maxTokens` property of `ChatMistralAI` indicates something important: "The token count of your prompt plus max_tokens cannot exceed the model's context length"
+      // Note: we don't want to use `maxTokens` since it caps the response tokens, and we prefer to let the LLM tells the maximum about the initiative being computed
+
+      // Just in case, we check we did configure local limit accordingly to the LLM used
+      if (tokenUsage !== null) {
+        const usage = tokenUsage as TokenUsage; // TypeScript messes up due to the assignation being into `callbacks`, it tells it's `never` without casting
+
+        if (usage.totalTokens !== undefined && usage.totalTokens > this.gptInstance.modelTokenLimit) {
+          throw new Error('the maximum model tokens length we defined locally seems to not correspond to the real model limit');
+        }
+      }
+
+      // If the settings check is fine and since we were not able to know in advance the total of the input+output length, we just throw an error so the parent can adjust the content to reduce the input length until it passes
+      throw tokensReachTheLimitError;
+    } else if (finishReason !== 'stop') {
       throw new Error(`the generation has not completed fully according to the returned reason: ${finishReason}`);
     }