Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ describe('CallToolsLLMReducer error isolation', () => {
const [payload] = runEvents.completeToolExecution.mock.calls[0];
expect(payload.status).toBe('error');
expect(payload.errorMessage).toBe('[exit code 42] compiler error: missing semicolon');
expect(payload.errorCode ?? null).toBeNull();
});

it('invokes manage tool via reducer without relying on instance logger field', async () => {
Expand Down
119 changes: 119 additions & 0 deletions packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import { describe, it, expect, vi } from 'vitest';
import { ResponseMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
import { CallToolsLLMReducer } from '../src/llm/reducers/callTools.llm.reducer';
import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.stub';
import { Signal } from '../src/signal';
import z from 'zod';
import { McpError } from '../src/nodes/mcp/types';
import { LocalMCPServerTool } from '../src/nodes/mcp/localMcpServer.tool';
import type { LocalMCPServerNode } from '../src/nodes/mcp/localMcpServer.node';
import { ShellCommandTool } from '../src/nodes/tools/shell_command/shell_command.tool';

const buildState = (toolName: string, callId: string, args: string) => {
const call = new ToolCallMessage({ type: 'function_call', name: toolName, call_id: callId, arguments: args } as any);
const response = new ResponseMessage({ output: [call.toPlain() as any] } as any);
return {
messages: [response],
meta: { lastLLMEventId: `evt-${callId}` },
context: { messageIds: [], memory: [] },
} as any;
};

const createCtx = () => ({
threadId: 'thread-span',
runId: 'run-span',
finishSignal: new Signal(),
terminateSignal: new Signal(),
callerAgent: { getAgentNodeId: () => 'agent-node' },
});

const createMcpNode = (callTool: ReturnType<typeof vi.fn>) =>
({
config: { namespace: 'demo' },
callTool,
}) as unknown as LocalMCPServerNode;

describe('CallToolsLLMReducer tracing via run events', () => {
it('marks MCP exceptions as failed tool executions', async () => {
const callTool = vi.fn(async () => {
throw new McpError('upstream failure', { code: 'BAD_INPUT' });
});
const node = createMcpNode(callTool);
const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node);

const runEvents = createRunEventsStub();
const eventsBus = createEventsBusStub();
const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
const ctx = createCtx();
const state = buildState(tool.name, 'call-mcp-throw', JSON.stringify({}));

await reducer.invoke(state, ctx as any);

expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
const [completion] = runEvents.completeToolExecution.mock.calls[0];
expect(completion.status).toBe('error');
expect(String(completion.errorMessage ?? '')).toContain('upstream failure');
expect(completion.errorCode).toBe('MCP_CALL_ERROR');
});

it('does not reclassify MCP payloads without isError flags', async () => {
const payload = JSON.stringify({ status: 500, error: 'Search failed' });
const callTool = vi.fn(async () => ({ isError: false, content: payload }));
const node = createMcpNode(callTool);
const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node);

const runEvents = createRunEventsStub();
const eventsBus = createEventsBusStub();
const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
const ctx = createCtx();
const state = buildState(tool.name, 'call-mcp-logical', JSON.stringify({}));

const result = await reducer.invoke(state, ctx as any);
const output = result.messages.at(-1) as ToolCallOutputMessage;
expect(output).toBeInstanceOf(ToolCallOutputMessage);
expect(output.text).toContain('Search failed');

expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
const [completion] = runEvents.completeToolExecution.mock.calls[0];
expect(completion.status).toBe('success');
expect(completion.errorMessage).toBeNull();
expect(completion.errorCode ?? null).toBeNull();
});

it('keeps shell command tracing flagged on non-zero exit codes', async () => {
const runEvents = createRunEventsStub();
const eventsBus = createEventsBusStub();
const archiveStub = { createSingleFileTar: vi.fn(async () => Buffer.from('')) };
const prismaStub = {
getClient: vi.fn(() => ({
container: { findUnique: vi.fn(async () => null) },
containerEvent: { findFirst: vi.fn(async () => null) },
})),
};

class StubShellCommandTool extends ShellCommandTool {
constructor() {
super(archiveStub as any, runEvents as any, eventsBus as any, prismaStub as any);
}

override async executeStreaming(): Promise<string> {
return '[exit code 2] compiler failure';
}
}

const tool = new StubShellCommandTool();
const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
const ctx = createCtx();
const state = buildState(tool.name, 'call-shell-span', JSON.stringify({ command: 'fail' }));

const result = await reducer.invoke(state, ctx as any);
const message = result.messages.at(-1) as ToolCallOutputMessage;
expect(message.text).toContain('exit code 2');

expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
const [completion] = runEvents.completeToolExecution.mock.calls[0];
expect(completion.status).toBe('error');
expect(String(completion.errorMessage ?? '')).toContain('exit code 2');
expect(completion.errorCode ?? null).toBeNull();
});
});
64 changes: 64 additions & 0 deletions packages/platform-server/__tests__/mcp.error.mapping.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ describe('CallToolsLLMReducer MCP error mapping', () => {
const [completionPayload] = runEvents.completeToolExecution.mock.calls[0];
expect(completionPayload.status).toBe('error');
expect(completionPayload.errorMessage).toContain('apply_patch failed (code=PATCH_FAIL retriable=false)');
expect(completionPayload.errorCode).toBe('MCP_CALL_ERROR');

const lastMessage = result.messages.at(-1) as ToolCallOutputMessage;
expect(lastMessage).toBeInstanceOf(ToolCallOutputMessage);
Expand All @@ -72,3 +73,66 @@ describe('CallToolsLLMReducer MCP error mapping', () => {
);
});
});

describe('CallToolsLLMReducer MCP payload handling (protocol-only)', () => {
const invokeWithPayload = async (payload: Record<string, unknown>) => {
const callTool = vi.fn(async () => ({ isError: false, content: JSON.stringify(payload) }));
const node = createNode(callTool);
const tool = new LocalMCPServerTool('codex_apply_patch', 'Codex patch', z.object({}), node);
const runEvents = createRunEventsStub();
const eventsBus = createEventsBusStub();
const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
const state = buildState(tool.name, `call-${Math.random().toString(36).slice(2, 6)}`, JSON.stringify({}));
const ctx = createContext();
const result = await reducer.invoke(state, ctx as any);
const completion = runEvents.completeToolExecution.mock.calls[0]?.[0];
return { result, completion };
};

it('treats HTTP-looking payloads as success when isError is false', async () => {
const { result, completion } = await invokeWithPayload({ status: 401, error: 'Search failed' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
expect(completion.errorMessage).toBeNull();

const last = result.messages.at(-1) as ToolCallOutputMessage;
expect(last).toBeInstanceOf(ToolCallOutputMessage);
expect(last.text).toContain('Search failed');
});

it('does not infer failures from statusCode when isError is false', async () => {
const { completion } = await invokeWithPayload({ statusCode: 403, message: 'Forbidden' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
expect(completion.errorMessage).toBeNull();
});

it('does not treat string status with numeric statusCode as failure without isError', async () => {
const { completion } = await invokeWithPayload({ status: 'error', statusCode: 500, message: 'Internal error' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
expect(completion.errorMessage).toBeNull();
});

it('still returns success for payloads without status metadata', async () => {
const { result, completion } = await invokeWithPayload({ error: 'domain data' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
expect(completion.errorMessage).toBeNull();

const last = result.messages.at(-1) as ToolCallOutputMessage;
expect(last.text).toContain('domain data');
});

it('keeps success for payloads with non-error status codes', async () => {
const { completion } = await invokeWithPayload({ status: 200, error: 'none' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
});

it('treats non-numeric status strings as success without isError flags', async () => {
const { completion } = await invokeWithPayload({ status: 'error', error: 'Bad' });
expect(completion.status).toBe('success');
expect(completion.errorCode ?? null).toBeNull();
});
});
2 changes: 2 additions & 0 deletions packages/platform-server/src/events/run-events.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ export interface ToolExecutionCompleteArgs {
status: ToolExecStatus;
output?: Prisma.InputJsonValue | null;
errorMessage?: string | null;
errorCode?: string | null;
raw?: Prisma.InputJsonValue | null;
endedAt?: Date;
}
Expand Down Expand Up @@ -1409,6 +1410,7 @@ export class RunEventsService {
status: execStatus === ToolExecStatus.success ? RunEventStatus.success : RunEventStatus.error,
endedAt,
errorMessage: args.errorMessage ?? null,
errorCode: args.errorCode ?? null,
},
});
const durationMs = event.startedAt ? Math.max(0, endedAt.getTime() - event.startedAt.getTime()) : null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import type { ResponseFunctionCallOutputItemList } from 'openai/resources/respon
import { contextItemInputFromMessage } from '../services/context-items.utils';
import { persistContextItems } from '../services/context-items.append';
import { ShellCommandTool } from '../../nodes/tools/shell_command/shell_command.tool';
import { LocalMCPServerTool } from '../../nodes/mcp/localMcpServer.tool';

type ToolCallErrorCode =
| 'BAD_JSON_ARGS'
Expand All @@ -32,12 +33,27 @@ type ToolCallErrorPayload = {
};
type ToolCallStructuredOutput = ToolCallRaw | ToolCallErrorPayload;

const isToolCallErrorPayload = (value: unknown): value is ToolCallErrorPayload => {
if (!value || typeof value !== 'object' || Array.isArray(value)) return false;
const candidate = value as Partial<ToolCallErrorPayload>;
return candidate.status === 'error' && typeof candidate.error_code === 'string';
};

type ToolCallResult = {
status: 'success' | 'error';
raw: ToolCallRaw;
output: ToolCallStructuredOutput;
};

type ToolTracingFailure = {
toolCallId: string;
toolName: string;
toolSource?: string;
errorCode?: string;
errorMessage: string;
retriable?: boolean;
};

const isToolCallRaw = (value: unknown): value is ToolCallRaw =>
typeof value === 'string' || Array.isArray(value);

Expand Down Expand Up @@ -177,6 +193,15 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
let startedEventId: string | null = null;
let caughtError: unknown | null = null;
let response: ToolCallResult | undefined;
let traceFailure: ToolTracingFailure | null = null;

const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
const trace = this.buildToolTracingFailure({ response: result, tool, toolCall });
if (trace) {
traceFailure = trace;
}
return result;
};

const createErrorResponse = (args: {
code: ToolCallErrorCode;
Expand Down Expand Up @@ -212,7 +237,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
message: `Tool ${toolCall.name} is not registered.`,
originalArgs: toolCall.args,
});
return response;
return finalizeResponse(response);
}

let parsedArgs: unknown;
Expand All @@ -233,7 +258,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
originalArgs: toolCall.args,
details,
});
return response;
return finalizeResponse(response);
}

const validation = tool.schema.safeParse(parsedArgs);
Expand All @@ -245,7 +270,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
originalArgs: parsedArgs,
details: issues,
});
return response;
return finalizeResponse(response);
}
const input = validation.data;

Expand Down Expand Up @@ -349,14 +374,14 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
throw new Error('tool_response_missing');
}

return response;
return finalizeResponse(response);
} catch (err) {
caughtError = err;
throw err instanceof Error ? err : new Error(String(err));
} finally {
if (startedEventId) {
try {
await this.finalizeToolExecutionEvent(startedEventId, response, caughtError);
await this.finalizeToolExecutionEvent(startedEventId, response, caughtError, traceFailure);
} catch (finalizeErr: unknown) {
this.logger.warn(
`Failed to finalize tool execution event${this.format({
Expand Down Expand Up @@ -443,13 +468,15 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
eventId: string,
response: ToolCallResult | undefined,
caughtError: unknown | null,
traceFailure: ToolTracingFailure | null,
): Promise<void> {
if (caughtError !== null) {
const errorMessage = caughtError instanceof Error ? caughtError.message : String(caughtError);
await this.runEvents.completeToolExecution({
eventId,
status: ToolExecStatus.error,
errorMessage,
errorCode: traceFailure?.errorCode ?? null,
raw: null,
});
await this.eventsBus.publishEvent(eventId, 'update');
Expand All @@ -459,13 +486,51 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
if (!response) return;

const status = response.status === 'success' ? ToolExecStatus.success : ToolExecStatus.error;
const resolvedErrorMessage =
status === ToolExecStatus.success ? null : traceFailure?.errorMessage ?? this.extractErrorMessage(response);
const resolvedErrorCode =
status === ToolExecStatus.success ? null : traceFailure?.errorCode ?? this.extractErrorCode(response);
await this.runEvents.completeToolExecution({
eventId,
status,
output: this.toJson(response.output ?? response.raw),
raw: this.toJson(response.raw),
errorMessage: status === ToolExecStatus.success ? null : this.extractErrorMessage(response),
errorMessage: resolvedErrorMessage,
errorCode: resolvedErrorCode,
});
await this.eventsBus.publishEvent(eventId, 'update');
}

private buildToolTracingFailure(params: {
response: ToolCallResult;
tool: FunctionTool | undefined;
toolCall: ToolCallMessage;
}): ToolTracingFailure | null {
if (params.response.status !== 'error') return null;
const errorPayload = isToolCallErrorPayload(params.response.output) ? params.response.output : null;
const message = errorPayload?.message ?? this.extractErrorMessage(params.response) ?? 'Tool execution failed';
const errorCode = errorPayload?.error_code;
return {
toolCallId: params.toolCall.callId,
toolName: params.tool?.name ?? params.toolCall.name,
toolSource: this.resolveToolSource(params.tool),
errorCode,
errorMessage: message,
retriable: errorPayload?.retriable,
};
}

private extractErrorCode(response: ToolCallResult | undefined): string | null {
if (!response) return null;
const payload = isToolCallErrorPayload(response.output) ? response.output : null;
return payload?.error_code ?? null;
}

private resolveToolSource(tool: FunctionTool | undefined): string | undefined {
if (!tool) return undefined;
if (tool instanceof LocalMCPServerTool) return 'mcp';
if (tool instanceof ShellCommandTool) return 'shell';
return undefined;
}

}