diff --git a/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts b/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts index 071b77171..bb141c183 100644 --- a/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts +++ b/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts @@ -171,6 +171,7 @@ describe('CallToolsLLMReducer error isolation', () => { const [payload] = runEvents.completeToolExecution.mock.calls[0]; expect(payload.status).toBe('error'); expect(payload.errorMessage).toBe('[exit code 42] compiler error: missing semicolon'); + expect(payload.errorCode ?? null).toBeNull(); }); it('invokes manage tool via reducer without relying on instance logger field', async () => { diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts new file mode 100644 index 000000000..328bd7c79 --- /dev/null +++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts @@ -0,0 +1,119 @@ +import { describe, it, expect, vi } from 'vitest'; +import { ResponseMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm'; +import { CallToolsLLMReducer } from '../src/llm/reducers/callTools.llm.reducer'; +import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.stub'; +import { Signal } from '../src/signal'; +import z from 'zod'; +import { McpError } from '../src/nodes/mcp/types'; +import { LocalMCPServerTool } from '../src/nodes/mcp/localMcpServer.tool'; +import type { LocalMCPServerNode } from '../src/nodes/mcp/localMcpServer.node'; +import { ShellCommandTool } from '../src/nodes/tools/shell_command/shell_command.tool'; + +const buildState = (toolName: string, callId: string, args: string) => { + const call = new ToolCallMessage({ type: 'function_call', name: toolName, call_id: callId, arguments: args } as any); + const response = new ResponseMessage({ output: [call.toPlain() as any] } as any); + return { + messages: [response], + meta: { lastLLMEventId: `evt-${callId}` }, + context: { messageIds: [], memory: [] }, + } as any; +}; + +const createCtx = () => ({ + threadId: 'thread-span', + runId: 'run-span', + finishSignal: new Signal(), + terminateSignal: new Signal(), + callerAgent: { getAgentNodeId: () => 'agent-node' }, +}); + +const createMcpNode = (callTool: ReturnType) => + ({ + config: { namespace: 'demo' }, + callTool, + }) as unknown as LocalMCPServerNode; + +describe('CallToolsLLMReducer tracing via run events', () => { + it('marks MCP exceptions as failed tool executions', async () => { + const callTool = vi.fn(async () => { + throw new McpError('upstream failure', { code: 'BAD_INPUT' }); + }); + const node = createMcpNode(callTool); + const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node); + + const runEvents = createRunEventsStub(); + const eventsBus = createEventsBusStub(); + const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] }); + const ctx = createCtx(); + const state = buildState(tool.name, 'call-mcp-throw', JSON.stringify({})); + + await reducer.invoke(state, ctx as any); + + expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1); + const [completion] = runEvents.completeToolExecution.mock.calls[0]; + expect(completion.status).toBe('error'); + expect(String(completion.errorMessage ?? '')).toContain('upstream failure'); + expect(completion.errorCode).toBe('MCP_CALL_ERROR'); + }); + + it('does not reclassify MCP payloads without isError flags', async () => { + const payload = JSON.stringify({ status: 500, error: 'Search failed' }); + const callTool = vi.fn(async () => ({ isError: false, content: payload })); + const node = createMcpNode(callTool); + const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node); + + const runEvents = createRunEventsStub(); + const eventsBus = createEventsBusStub(); + const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] }); + const ctx = createCtx(); + const state = buildState(tool.name, 'call-mcp-logical', JSON.stringify({})); + + const result = await reducer.invoke(state, ctx as any); + const output = result.messages.at(-1) as ToolCallOutputMessage; + expect(output).toBeInstanceOf(ToolCallOutputMessage); + expect(output.text).toContain('Search failed'); + + expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1); + const [completion] = runEvents.completeToolExecution.mock.calls[0]; + expect(completion.status).toBe('success'); + expect(completion.errorMessage).toBeNull(); + expect(completion.errorCode ?? null).toBeNull(); + }); + + it('keeps shell command tracing flagged on non-zero exit codes', async () => { + const runEvents = createRunEventsStub(); + const eventsBus = createEventsBusStub(); + const archiveStub = { createSingleFileTar: vi.fn(async () => Buffer.from('')) }; + const prismaStub = { + getClient: vi.fn(() => ({ + container: { findUnique: vi.fn(async () => null) }, + containerEvent: { findFirst: vi.fn(async () => null) }, + })), + }; + + class StubShellCommandTool extends ShellCommandTool { + constructor() { + super(archiveStub as any, runEvents as any, eventsBus as any, prismaStub as any); + } + + override async executeStreaming(): Promise { + return '[exit code 2] compiler failure'; + } + } + + const tool = new StubShellCommandTool(); + const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] }); + const ctx = createCtx(); + const state = buildState(tool.name, 'call-shell-span', JSON.stringify({ command: 'fail' })); + + const result = await reducer.invoke(state, ctx as any); + const message = result.messages.at(-1) as ToolCallOutputMessage; + expect(message.text).toContain('exit code 2'); + + expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1); + const [completion] = runEvents.completeToolExecution.mock.calls[0]; + expect(completion.status).toBe('error'); + expect(String(completion.errorMessage ?? '')).toContain('exit code 2'); + expect(completion.errorCode ?? null).toBeNull(); + }); +}); diff --git a/packages/platform-server/__tests__/mcp.error.mapping.test.ts b/packages/platform-server/__tests__/mcp.error.mapping.test.ts index 0386f45e6..cc8266ab4 100644 --- a/packages/platform-server/__tests__/mcp.error.mapping.test.ts +++ b/packages/platform-server/__tests__/mcp.error.mapping.test.ts @@ -61,6 +61,7 @@ describe('CallToolsLLMReducer MCP error mapping', () => { const [completionPayload] = runEvents.completeToolExecution.mock.calls[0]; expect(completionPayload.status).toBe('error'); expect(completionPayload.errorMessage).toContain('apply_patch failed (code=PATCH_FAIL retriable=false)'); + expect(completionPayload.errorCode).toBe('MCP_CALL_ERROR'); const lastMessage = result.messages.at(-1) as ToolCallOutputMessage; expect(lastMessage).toBeInstanceOf(ToolCallOutputMessage); @@ -72,3 +73,66 @@ describe('CallToolsLLMReducer MCP error mapping', () => { ); }); }); + +describe('CallToolsLLMReducer MCP payload handling (protocol-only)', () => { + const invokeWithPayload = async (payload: Record) => { + const callTool = vi.fn(async () => ({ isError: false, content: JSON.stringify(payload) })); + const node = createNode(callTool); + const tool = new LocalMCPServerTool('codex_apply_patch', 'Codex patch', z.object({}), node); + const runEvents = createRunEventsStub(); + const eventsBus = createEventsBusStub(); + const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] }); + const state = buildState(tool.name, `call-${Math.random().toString(36).slice(2, 6)}`, JSON.stringify({})); + const ctx = createContext(); + const result = await reducer.invoke(state, ctx as any); + const completion = runEvents.completeToolExecution.mock.calls[0]?.[0]; + return { result, completion }; + }; + + it('treats HTTP-looking payloads as success when isError is false', async () => { + const { result, completion } = await invokeWithPayload({ status: 401, error: 'Search failed' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + expect(completion.errorMessage).toBeNull(); + + const last = result.messages.at(-1) as ToolCallOutputMessage; + expect(last).toBeInstanceOf(ToolCallOutputMessage); + expect(last.text).toContain('Search failed'); + }); + + it('does not infer failures from statusCode when isError is false', async () => { + const { completion } = await invokeWithPayload({ statusCode: 403, message: 'Forbidden' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + expect(completion.errorMessage).toBeNull(); + }); + + it('does not treat string status with numeric statusCode as failure without isError', async () => { + const { completion } = await invokeWithPayload({ status: 'error', statusCode: 500, message: 'Internal error' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + expect(completion.errorMessage).toBeNull(); + }); + + it('still returns success for payloads without status metadata', async () => { + const { result, completion } = await invokeWithPayload({ error: 'domain data' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + expect(completion.errorMessage).toBeNull(); + + const last = result.messages.at(-1) as ToolCallOutputMessage; + expect(last.text).toContain('domain data'); + }); + + it('keeps success for payloads with non-error status codes', async () => { + const { completion } = await invokeWithPayload({ status: 200, error: 'none' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + }); + + it('treats non-numeric status strings as success without isError flags', async () => { + const { completion } = await invokeWithPayload({ status: 'error', error: 'Bad' }); + expect(completion.status).toBe('success'); + expect(completion.errorCode ?? null).toBeNull(); + }); +}); diff --git a/packages/platform-server/src/events/run-events.service.ts b/packages/platform-server/src/events/run-events.service.ts index 0c39e639a..f9aa8f5c6 100644 --- a/packages/platform-server/src/events/run-events.service.ts +++ b/packages/platform-server/src/events/run-events.service.ts @@ -364,6 +364,7 @@ export interface ToolExecutionCompleteArgs { status: ToolExecStatus; output?: Prisma.InputJsonValue | null; errorMessage?: string | null; + errorCode?: string | null; raw?: Prisma.InputJsonValue | null; endedAt?: Date; } @@ -1409,6 +1410,7 @@ export class RunEventsService { status: execStatus === ToolExecStatus.success ? RunEventStatus.success : RunEventStatus.error, endedAt, errorMessage: args.errorMessage ?? null, + errorCode: args.errorCode ?? null, }, }); const durationMs = event.startedAt ? Math.max(0, endedAt.getTime() - event.startedAt.getTime()) : null; diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts index a1db7a936..293ad1f68 100644 --- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts +++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts @@ -10,6 +10,7 @@ import type { ResponseFunctionCallOutputItemList } from 'openai/resources/respon import { contextItemInputFromMessage } from '../services/context-items.utils'; import { persistContextItems } from '../services/context-items.append'; import { ShellCommandTool } from '../../nodes/tools/shell_command/shell_command.tool'; +import { LocalMCPServerTool } from '../../nodes/mcp/localMcpServer.tool'; type ToolCallErrorCode = | 'BAD_JSON_ARGS' @@ -32,12 +33,27 @@ type ToolCallErrorPayload = { }; type ToolCallStructuredOutput = ToolCallRaw | ToolCallErrorPayload; +const isToolCallErrorPayload = (value: unknown): value is ToolCallErrorPayload => { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false; + const candidate = value as Partial; + return candidate.status === 'error' && typeof candidate.error_code === 'string'; +}; + type ToolCallResult = { status: 'success' | 'error'; raw: ToolCallRaw; output: ToolCallStructuredOutput; }; +type ToolTracingFailure = { + toolCallId: string; + toolName: string; + toolSource?: string; + errorCode?: string; + errorMessage: string; + retriable?: boolean; +}; + const isToolCallRaw = (value: unknown): value is ToolCallRaw => typeof value === 'string' || Array.isArray(value); @@ -177,6 +193,15 @@ export class CallToolsLLMReducer extends Reducer { let startedEventId: string | null = null; let caughtError: unknown | null = null; let response: ToolCallResult | undefined; + let traceFailure: ToolTracingFailure | null = null; + + const finalizeResponse = (result: ToolCallResult): ToolCallResult => { + const trace = this.buildToolTracingFailure({ response: result, tool, toolCall }); + if (trace) { + traceFailure = trace; + } + return result; + }; const createErrorResponse = (args: { code: ToolCallErrorCode; @@ -212,7 +237,7 @@ export class CallToolsLLMReducer extends Reducer { message: `Tool ${toolCall.name} is not registered.`, originalArgs: toolCall.args, }); - return response; + return finalizeResponse(response); } let parsedArgs: unknown; @@ -233,7 +258,7 @@ export class CallToolsLLMReducer extends Reducer { originalArgs: toolCall.args, details, }); - return response; + return finalizeResponse(response); } const validation = tool.schema.safeParse(parsedArgs); @@ -245,7 +270,7 @@ export class CallToolsLLMReducer extends Reducer { originalArgs: parsedArgs, details: issues, }); - return response; + return finalizeResponse(response); } const input = validation.data; @@ -349,14 +374,14 @@ export class CallToolsLLMReducer extends Reducer { throw new Error('tool_response_missing'); } - return response; + return finalizeResponse(response); } catch (err) { caughtError = err; throw err instanceof Error ? err : new Error(String(err)); } finally { if (startedEventId) { try { - await this.finalizeToolExecutionEvent(startedEventId, response, caughtError); + await this.finalizeToolExecutionEvent(startedEventId, response, caughtError, traceFailure); } catch (finalizeErr: unknown) { this.logger.warn( `Failed to finalize tool execution event${this.format({ @@ -443,6 +468,7 @@ export class CallToolsLLMReducer extends Reducer { eventId: string, response: ToolCallResult | undefined, caughtError: unknown | null, + traceFailure: ToolTracingFailure | null, ): Promise { if (caughtError !== null) { const errorMessage = caughtError instanceof Error ? caughtError.message : String(caughtError); @@ -450,6 +476,7 @@ export class CallToolsLLMReducer extends Reducer { eventId, status: ToolExecStatus.error, errorMessage, + errorCode: traceFailure?.errorCode ?? null, raw: null, }); await this.eventsBus.publishEvent(eventId, 'update'); @@ -459,13 +486,51 @@ export class CallToolsLLMReducer extends Reducer { if (!response) return; const status = response.status === 'success' ? ToolExecStatus.success : ToolExecStatus.error; + const resolvedErrorMessage = + status === ToolExecStatus.success ? null : traceFailure?.errorMessage ?? this.extractErrorMessage(response); + const resolvedErrorCode = + status === ToolExecStatus.success ? null : traceFailure?.errorCode ?? this.extractErrorCode(response); await this.runEvents.completeToolExecution({ eventId, status, output: this.toJson(response.output ?? response.raw), raw: this.toJson(response.raw), - errorMessage: status === ToolExecStatus.success ? null : this.extractErrorMessage(response), + errorMessage: resolvedErrorMessage, + errorCode: resolvedErrorCode, }); await this.eventsBus.publishEvent(eventId, 'update'); } + + private buildToolTracingFailure(params: { + response: ToolCallResult; + tool: FunctionTool | undefined; + toolCall: ToolCallMessage; + }): ToolTracingFailure | null { + if (params.response.status !== 'error') return null; + const errorPayload = isToolCallErrorPayload(params.response.output) ? params.response.output : null; + const message = errorPayload?.message ?? this.extractErrorMessage(params.response) ?? 'Tool execution failed'; + const errorCode = errorPayload?.error_code; + return { + toolCallId: params.toolCall.callId, + toolName: params.tool?.name ?? params.toolCall.name, + toolSource: this.resolveToolSource(params.tool), + errorCode, + errorMessage: message, + retriable: errorPayload?.retriable, + }; + } + + private extractErrorCode(response: ToolCallResult | undefined): string | null { + if (!response) return null; + const payload = isToolCallErrorPayload(response.output) ? response.output : null; + return payload?.error_code ?? null; + } + + private resolveToolSource(tool: FunctionTool | undefined): string | undefined { + if (!tool) return undefined; + if (tool instanceof LocalMCPServerTool) return 'mcp'; + if (tool instanceof ShellCommandTool) return 'shell'; + return undefined; + } + }