@@ -9,7 +9,11 @@ import {
9
9
renderRetrievalActionForModel ,
10
10
retrievalMetaPrompt ,
11
11
} from "@app/lib/api/assistant/actions/retrieval" ;
12
- import { getSupportedModelConfig } from "@app/lib/assistant" ;
12
+ import {
13
+ getSupportedModelConfig ,
14
+ GPT_4_32K_MODEL_ID ,
15
+ GPT_4_MODEL_CONFIG ,
16
+ } from "@app/lib/assistant" ;
13
17
import { Authenticator } from "@app/lib/auth" ;
14
18
import { CoreAPI } from "@app/lib/core_api" ;
15
19
import { redisClient } from "@app/lib/redis" ;
@@ -328,13 +332,15 @@ export async function* runGeneration(
328
332
return ;
329
333
}
330
334
331
- const contextSize = getSupportedModelConfig ( c . model ) . contextSize ;
335
+ let model = c . model ;
336
+
337
+ const contextSize = getSupportedModelConfig ( model ) . contextSize ;
332
338
333
339
const MIN_GENERATION_TOKENS = 2048 ;
334
340
335
341
if ( contextSize < MIN_GENERATION_TOKENS ) {
336
342
throw new Error (
337
- `Model contextSize unexpectedly small for model: ${ c . model . providerId } ${ c . model . modelId } `
343
+ `Model contextSize unexpectedly small for model: ${ model . providerId } ${ model . modelId } `
338
344
) ;
339
345
}
340
346
@@ -343,7 +349,7 @@ export async function* runGeneration(
343
349
// Turn the conversation into a digest that can be presented to the model.
344
350
const modelConversationRes = await renderConversationForModel ( {
345
351
conversation,
346
- model : c . model ,
352
+ model,
347
353
prompt,
348
354
allowedTokenCount : contextSize - MIN_GENERATION_TOKENS ,
349
355
} ) ;
@@ -356,17 +362,30 @@ export async function* runGeneration(
356
362
messageId : agentMessage . sId ,
357
363
error : {
358
364
code : "internal_server_error" ,
359
- message : `Failed tokenization for ${ c . model . providerId } ${ c . model . modelId } : ${ modelConversationRes . error . message } ` ,
365
+ message : `Failed tokenization for ${ model . providerId } ${ model . modelId } : ${ modelConversationRes . error . message } ` ,
360
366
} ,
361
367
} ;
362
368
return ;
363
369
}
364
370
371
+ // If model is gpt4-32k but tokens used is less than GPT_4_CONTEXT_SIZE-MIN_GENERATION_TOKENS,
372
+ // then we override the model to gpt4 standard (8k context, cheaper).
373
+ if (
374
+ model . modelId === GPT_4_32K_MODEL_ID &&
375
+ modelConversationRes . value . tokensUsed <
376
+ GPT_4_MODEL_CONFIG . contextSize - MIN_GENERATION_TOKENS
377
+ ) {
378
+ model = {
379
+ modelId : GPT_4_MODEL_CONFIG . modelId ,
380
+ providerId : GPT_4_MODEL_CONFIG . providerId ,
381
+ } ;
382
+ }
383
+
365
384
const config = cloneBaseConfig (
366
385
DustProdActionRegistry [ "assistant-v2-generator" ] . config
367
386
) ;
368
- config . MODEL . provider_id = c . model . providerId ;
369
- config . MODEL . model_id = c . model . modelId ;
387
+ config . MODEL . provider_id = model . providerId ;
388
+ config . MODEL . model_id = model . modelId ;
370
389
config . MODEL . temperature = c . temperature ;
371
390
372
391
// This is the console.log you want to uncomment to generate inputs for the generator app.
@@ -381,7 +400,7 @@ export async function* runGeneration(
381
400
{
382
401
workspaceId : conversation . owner . sId ,
383
402
conversationId : conversation . sId ,
384
- model : c . model ,
403
+ model : model ,
385
404
temperature : c . temperature ,
386
405
} ,
387
406
"[ASSISTANT_TRACE] Generation exection"
0 commit comments