Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
cbf52ea
🌍 i18n: Update translation.json with latest translations (#10175)
github-actions[bot] Oct 22, 2025
e3d33fe
📦 chore: update `@librechat/agents` to v2.4.86 (#10216)
danny-avila Oct 22, 2025
d8d5d59
♻️ refactor: Message Cache Clearing Logic into Reusable Helper (#10226)
danny-avila Oct 22, 2025
87d7ee4
🌐 feat: Configurable Domain and Port for Vite Dev Server (#10180)
sbruel Oct 22, 2025
9495520
📦 chore: update `vite` to v6.4.1 and `@playwright/test` to v1.56.1 (#…
danny-avila Oct 22, 2025
05c9195
🛠️ fix: Agent Tools Modal on First-Time Agent Creation (#10234)
sbruel Oct 27, 2025
cbbbde3
🌍 i18n: Update translation.json with latest translations (#10229)
github-actions[bot] Oct 27, 2025
90e610c
🎪 refactor: Allow Last Model Spec Selection without Prioritizing (#10…
danny-avila Oct 27, 2025
13b784a
🧼 fix: Sanitize MCP Server Selection Against Config (#10243)
ruggishop Oct 27, 2025
d46dde4
👫 fix: Update Entra ID group retrieval to use getMemberGroups and add…
maxesse Oct 27, 2025
64df545
🌍 i18n: Update translation.json with latest translations (#10259)
github-actions[bot] Oct 27, 2025
33d6b33
📛 feat: Chat Badges via Model Specs (#10272)
danny-avila Oct 27, 2025
0446d0e
♿ fix: Address Accessibility Issues (#10260)
dustinhealy Oct 27, 2025
7973cb4
🔃 refactor: Clear MCP only on Model Spec Selection without MCP Server…
danny-avila Oct 28, 2025
9e77f83
🎛️ feat: Custom Environment Variable Support to RAG API Helm Chart (#…
PaulusTM Oct 28, 2025
2502098
🌍 i18n: Update translation.json with latest translations (#10274)
github-actions[bot] Oct 28, 2025
0e05ff4
🔄 refactor: OAI Image Edit Proxy, Speech Settings Handling, Import Qu…
danny-avila Oct 28, 2025
a7f4a47
chore: initial work
iamariezflores Oct 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 102 additions & 26 deletions api/app/clients/BaseClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,47 @@ class BaseClient {
}, '');
}

/**
* Assigns priority scores to messages based on content type and recency
* @param {TMessage[]} messages - Array of messages
* @returns {Array<{message: TMessage, priority: number, index: number}>}
*/
assignMessagePriorities(messages) {
return messages.map((message, index) => {
let priority = 1.0; // Base priority

// Recency bonus (more recent = higher priority)
// Note: messages are ordered oldest to newest, so higher index = more recent
const recencyFactor = index / Math.max(messages.length - 1, 1);
priority += recencyFactor * 0.3;

// Check for tool calls
if (message.content && Array.isArray(message.content)) {
const hasToolCalls = message.content.some((item) => item.type === 'tool_call');
const hasMCPToolCalls = message.content.some(
(item) =>
item.type === 'tool_call' &&
item.tool_call?.name?.includes?.(Constants.mcp_delimiter),
);

if (hasToolCalls) {
priority += 0.5; // Tool calls are important
}

if (hasMCPToolCalls) {
priority += 0.3; // MCP tool calls are extra important
}
}

// User messages slightly higher priority than assistant
if (message.role === 'user') {
priority += 0.2;
}

return { message, priority, index };
});
}

/**
* This method processes an array of messages and returns a context of messages that fit within a specified token limit.
* It iterates over the messages from newest to oldest, adding them to the context until the token limit is reached.
Expand All @@ -395,42 +436,66 @@ class BaseClient {
// start with 3 tokens for the label after all messages have been counted.
let currentTokenCount = 3;
const instructionsTokenCount = instructions?.tokenCount ?? 0;
let remainingContextTokens =
const maxTokensForContext =
(maxContextTokens ?? this.maxContextTokens) - instructionsTokenCount;
const messages = [..._messages];

const context = [];

if (currentTokenCount < remainingContextTokens) {
while (messages.length > 0 && currentTokenCount < remainingContextTokens) {
if (messages.length === 1 && instructions) {
break;
}
const poppedMessage = messages.pop();
const { tokenCount } = poppedMessage;

if (poppedMessage && currentTokenCount + tokenCount <= remainingContextTokens) {
context.push(poppedMessage);
currentTokenCount += tokenCount;
} else {
messages.push(poppedMessage);
break;
}
// Combine messages with instructions if instructions exist
const orderedMessages = instructions
? this.addInstructions(_messages, instructions)
: [..._messages];

// Assign priorities to messages (excluding instructions from priority calculation)
// Map original indices when instructions are present
const messagesToPrioritize = instructions ? orderedMessages.slice(1) : orderedMessages;
const prioritizedMessages = this.assignMessagePriorities(messagesToPrioritize).map((item) => ({
...item,
originalIndex: instructions ? item.index + 1 : item.index, // Adjust index if instructions were added
}));

// Sort by priority (descending) but keep track of original order
const sortedByPriority = [...prioritizedMessages].sort((a, b) => b.priority - a.priority);

const selectedMessages = [];
const selectedOriginalIndices = new Set();

// Select messages based on priority until we hit token limit
for (const { message, originalIndex } of sortedByPriority) {
const tokenCount = message.tokenCount || 0;
if (currentTokenCount + tokenCount <= maxTokensForContext) {
selectedMessages.push({ message, originalIndex });
selectedOriginalIndices.add(originalIndex);
currentTokenCount += tokenCount;
}
}

// Re-sort by original order
selectedMessages.sort((a, b) => a.originalIndex - b.originalIndex);

const context = selectedMessages.map(({ message }) => message);
const messagesToRefine = prioritizedMessages
.filter(({ originalIndex }) => !selectedOriginalIndices.has(originalIndex))
.map(({ message }) => message);

// Add instructions back to context if they exist
if (instructions) {
context.push(_messages[0]);
messages.shift();
context.unshift(instructions);
}

const prunedMemory = messages;
remainingContextTokens -= currentTokenCount;
const remainingContextTokens = maxTokensForContext - currentTokenCount;

logger.debug('[BaseClient] Priority-based context selection:', {
total: _messages.length,
selected: context.length - (instructions ? 1 : 0),
refined: messagesToRefine.length,
tokenCount: currentTokenCount,
maxTokens: maxTokensForContext,
instructionsTokens: instructionsTokenCount,
});

return {
context: context.reverse(),
context,
remainingContextTokens,
messagesToRefine: prunedMemory,
messagesToRefine,
};
}

Expand All @@ -456,14 +521,25 @@ class BaseClient {
}

if (this.clientName === EModelEndpoint.agents) {
const hasMCPTools = this.options?.agent?.tools?.some(tool =>
tool.name?.includes?.(Constants.mcp_delimiter)
);

const { dbMessages, editedIndices } = truncateToolCallOutputs(
orderedMessages,
this.maxContextTokens,
this.getTokenCountForMessage.bind(this),
{
threshold: 0.75,
mcpPriorityBoost: hasMCPTools
}
);

if (editedIndices.length > 0) {
logger.debug('[BaseClient] Truncated tool call outputs:', editedIndices);
logger.debug('[BaseClient] Truncated tool call outputs:', {
indices: editedIndices,
stats
});
for (const index of editedIndices) {
formattedMessages[index].content = dbMessages[index].content;
}
Expand Down
38 changes: 37 additions & 1 deletion api/app/clients/memory/summaryBuffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,45 @@ const summaryBuffer = async ({
previous_summary = '',
prompt = SUMMARY_PROMPT,
signal,
preserveToolCalls = false,
}) => {
if (previous_summary) {
logger.debug('[summaryBuffer]', { previous_summary });
}

let toolCallSummary = '';
if(preserveToolCalls) {
const Constants = require('librechat-data-provider').Constants;
const toolCalls = context
.filter(msg => msg.tool_calls || msg.content?.some?.(c => c,type ===
'tool_call'))
.map(msg => {
const calls = msg.tool_calls ||
msg.content?.filter?.(c => c.type === 'tool_call') || [];

return calls.map(call => {
const isMCP = call.name?.includes?.(Constants.mcp_delimiter);
return {
name: call.name,
isMCP,
input: call.input,
output: call.output ?
(typeof call.output === 'string' ? call.output.slice(0, 200) :
JSON.stringify(call.output).slice(0, 200)) : null
};
});
})
.flat()
.filter(Boolean);

if(toolCalls.length > 0) {
toolCallSummary = '\n\nTool calls in this conversation:\n' +
toolCalls.map(tc =>
`-${tc.name}${tc.isMCP ? ' (MCP)' : ''}: ${tc.output || 'pending'}`
).join('\n');
}
}

const formattedMessages = formatLangChainMessages(context, formatOptions);
const memoryOptions = {
llm,
Expand Down Expand Up @@ -60,7 +94,9 @@ const summaryBuffer = async ({
logger.debug('[summaryBuffer]', { summary: predictSummary });
}

return { role: 'system', content: predictSummary };
const finalSummary = predictSummary + toolCallSummary;

return { role: 'system', content: finalSummary };
};

module.exports = { createSummaryBufferMemory, summaryBuffer };
Loading