From bbc241a35881dbe03bcfffe8f5430fa36f9e1d85 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Tue, 10 Feb 2026 19:00:43 +0000
Subject: [PATCH 1/7] feat(tracing): surface tool error spans

---
 .../callTools.reducer.tracing.test.ts         | 158 ++++++++++++++++++
 packages/platform-server/package.json         |   3 +
 .../src/llm/reducers/callTools.llm.reducer.ts | 110 +++++++++++-
 pnpm-lock.yaml                                |  28 +++-
 4 files changed, 287 insertions(+), 12 deletions(-)
 create mode 100644 packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
new file mode 100644
index 000000000..df2e2f154
--- /dev/null
+++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
@@ -0,0 +1,158 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { ResponseMessage, ToolCallMessage } from '@agyn/llm';
+import { CallToolsLLMReducer } from '../src/llm/reducers/callTools.llm.reducer';
+import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.stub';
+import { Signal } from '../src/signal';
+import z from 'zod';
+import { McpError } from '../src/nodes/mcp/types';
+import { LocalMCPServerTool } from '../src/nodes/mcp/localMcpServer.tool';
+import type { LocalMCPServerNode } from '../src/nodes/mcp/localMcpServer.node';
+import { ShellCommandTool } from '../src/nodes/tools/shell_command/shell_command.tool';
+import { context, trace, SpanStatusCode } from '@opentelemetry/api';
+import { AsyncLocalStorageContextManager } from '@opentelemetry/context-async-hooks';
+import { BasicTracerProvider, InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
+import type { ReadableSpan } from '@opentelemetry/sdk-trace-base';
+
+const buildState = (toolName: string, callId: string, args: string) => {
+  const call = new ToolCallMessage({ type: 'function_call', name: toolName, call_id: callId, arguments: args } as any);
+  const response = new ResponseMessage({ output: [call.toPlain() as any] } as any);
+  return {
+    messages: [response],
+    meta: { lastLLMEventId: `evt-${callId}` },
+    context: { messageIds: [], memory: [] },
+  } as any;
+};
+
+const createCtx = () => ({
+  threadId: 'thread-span',
+  runId: 'run-span',
+  finishSignal: new Signal(),
+  terminateSignal: new Signal(),
+  callerAgent: { getAgentNodeId: () => 'agent-node' },
+});
+
+const createMcpNode = (callTool: ReturnType<typeof vi.fn>) =>
+  ({
+    config: { namespace: 'demo' },
+    callTool,
+  }) as unknown as LocalMCPServerNode;
+
+describe('CallToolsLLMReducer tracing instrumentation', () => {
+  let exporter: InMemorySpanExporter;
+  let provider: BasicTracerProvider;
+  let contextManager: AsyncLocalStorageContextManager | undefined;
+
+  const runWithSpan = async (fn: () => Promise<unknown>): Promise<ReadableSpan[]> => {
+    const tracer = provider.getTracer('call-tools-tracing');
+    const span = tracer.startSpan('tool-execution');
+    await context.with(trace.setSpan(context.active(), span), async () => {
+      await fn();
+    });
+    span.end();
+    await provider.forceFlush();
+    const spans = exporter.getFinishedSpans();
+    exporter.reset();
+    return spans;
+  };
+
+  beforeEach(() => {
+    exporter = new InMemorySpanExporter();
+    provider = new BasicTracerProvider({ spanProcessors: [new SimpleSpanProcessor(exporter)] });
+    contextManager = new AsyncLocalStorageContextManager().enable();
+    context.setGlobalContextManager(contextManager);
+  });
+
+  afterEach(async () => {
+    await provider.shutdown();
+    exporter.reset();
+    context.disable();
+    contextManager?.disable();
+  });
+
+  it('records MCP exceptions as error spans with metadata', async () => {
+    const tool = {
+      name: 'mcp_demo',
+      description: 'demo tool',
+      schema: z.object({}),
+      async execute() {
+        throw new McpError('upstream failure', { code: 'BAD_INPUT' });
+      },
+    };
+
+    const runEvents = createRunEventsStub();
+    const eventsBus = createEventsBusStub();
+    const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
+    const ctx = createCtx();
+    const state = buildState(tool.name, 'call-mcp-throw', JSON.stringify({}));
+
+    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
+    expect(spans).toHaveLength(1);
+    const span = spans[0];
+    expect(span.status.code).toBe(SpanStatusCode.ERROR);
+    expect(span.attributes['tool.name']).toBe(tool.name);
+    expect(span.attributes['tool.call_id']).toBe('call-mcp-throw');
+    expect(span.attributes['tool.source']).toBe('mcp');
+    expect(span.attributes['error.type']).toBe('McpError');
+    expect(span.attributes['error.message']).toContain('upstream failure');
+    expect(span.attributes['mcp.error_code']).toBe('BAD_INPUT');
+    expect(span.events.some((event) => event.name === 'exception')).toBe(true);
+  });
+
+  it('marks MCP logical failures as error spans with tool error metadata', async () => {
+    const largeOutput = 'x'.repeat(60000);
+    const callTool = vi.fn(async () => ({ isError: false, content: largeOutput }));
+    const node = createMcpNode(callTool);
+    const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node);
+
+    const runEvents = createRunEventsStub();
+    const eventsBus = createEventsBusStub();
+    const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
+    const ctx = createCtx();
+    const state = buildState(tool.name, 'call-mcp-logical', JSON.stringify({}));
+
+    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
+    const span = spans[0];
+    expect(span.status.code).toBe(SpanStatusCode.ERROR);
+    expect(span.attributes['tool.name']).toBe(tool.name);
+    expect(span.attributes['tool.source']).toBe('mcp');
+    expect(span.attributes['tool.error_code']).toBe('TOOL_OUTPUT_TOO_LARGE');
+    expect(span.attributes['tool.retriable']).toBe(false);
+    const errorEvent = span.events.find((event) => event.name === 'tool.error');
+    expect(errorEvent?.attributes?.['tool.error_code']).toBe('TOOL_OUTPUT_TOO_LARGE');
+    expect(String(errorEvent?.attributes?.['tool.error_message'] ?? '')).toContain('longer than 50000');
+  });
+
+  it('keeps shell command error spans flagged on non-zero exit', async () => {
+    const runEvents = createRunEventsStub();
+    const eventsBus = createEventsBusStub();
+    const archiveStub = { createSingleFileTar: vi.fn(async () => Buffer.from('')) };
+    const prismaStub = {
+      getClient: vi.fn(() => ({
+        container: { findUnique: vi.fn(async () => null) },
+        containerEvent: { findFirst: vi.fn(async () => null) },
+      })),
+    };
+
+    class StubShellCommandTool extends ShellCommandTool {
+      constructor() {
+        super(archiveStub as any, runEvents as any, eventsBus as any, prismaStub as any);
+      }
+
+      override async executeStreaming(): Promise<string> {
+        return '[exit code 2] compiler failure';
+      }
+    }
+
+    const tool = new StubShellCommandTool();
+    const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
+    const ctx = createCtx();
+    const state = buildState(tool.name, 'call-shell-span', JSON.stringify({ command: 'fail' }));
+
+    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
+    const span = spans[0];
+    expect(span.status.code).toBe(SpanStatusCode.ERROR);
+    expect(span.attributes['tool.source']).toBe('shell');
+    const errorEvent = span.events.find((event) => event.name === 'tool.error');
+    expect(String(errorEvent?.attributes?.['tool.error_message'] ?? '')).toContain('exit code 2');
+  });
+});
diff --git a/packages/platform-server/package.json b/packages/platform-server/package.json
index 4bcc03331..377b1c31d 100644
--- a/packages/platform-server/package.json
+++ b/packages/platform-server/package.json
@@ -21,6 +21,7 @@
     "prisma:studio": "prisma studio"
   },
   "dependencies": {
+    "@opentelemetry/api": "^1.9.0",
     "@agyn/shared": "workspace:*",
     "@agyn/json-schema-to-zod": "workspace:*",
     "@agyn/llm": "workspace:*",
@@ -68,6 +69,8 @@
     "zod": "^4.1.9"
   },
   "devDependencies": {
+    "@opentelemetry/context-async-hooks": "^1.9.0",
+    "@opentelemetry/sdk-trace-base": "^2.1.0",
     "@eslint/js": "^9.13.0",
     "@langchain/langgraph-cli": "0.0.66",
     "@nestjs/testing": "^11.1.8",
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index a1db7a936..68bd50694 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -1,5 +1,6 @@
 import { LLMContext, LLMContextState, LLMMessage, LLMState } from '../types';
 import { FunctionTool, Reducer, ResponseMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
+import { SpanStatusCode, trace } from '@opentelemetry/api';
 import { Inject, Injectable, Logger, Scope } from '@nestjs/common';
 import { McpError } from '../../nodes/mcp/types';
 import { RunEventsService } from '../../events/run-events.service';
@@ -10,6 +11,8 @@ import type { ResponseFunctionCallOutputItemList } from 'openai/resources/respon
 import { contextItemInputFromMessage } from '../services/context-items.utils';
 import { persistContextItems } from '../services/context-items.append';
 import { ShellCommandTool } from '../../nodes/tools/shell_command/shell_command.tool';
+import { LocalMCPServerTool } from '../../nodes/mcp/localMcpServer.tool';
+import type { SpanAttributes } from '@opentelemetry/api';
 
 type ToolCallErrorCode =
   | 'BAD_JSON_ARGS'
@@ -32,6 +35,12 @@ type ToolCallErrorPayload = {
 };
 type ToolCallStructuredOutput = ToolCallRaw | ToolCallErrorPayload;
 
+const isToolCallErrorPayload = (value: unknown): value is ToolCallErrorPayload => {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) return false;
+  const candidate = value as Partial<ToolCallErrorPayload>;
+  return candidate.status === 'error' && typeof candidate.error_code === 'string';
+};
+
 type ToolCallResult = {
   status: 'success' | 'error';
   raw: ToolCallRaw;
@@ -177,6 +186,17 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     let startedEventId: string | null = null;
     let caughtError: unknown | null = null;
     let response: ToolCallResult | undefined;
+    let toolSourceOverride: string | undefined;
+
+    const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
+      this.annotateToolSpanFailure({
+        response: result,
+        toolCallId: toolCall.callId,
+        toolName: tool?.name ?? toolCall.name,
+        toolSource: toolSourceOverride ?? this.resolveToolSource(tool),
+      });
+      return result;
+    };
 
     const createErrorResponse = (args: {
       code: ToolCallErrorCode;
@@ -212,7 +232,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           message: `Tool ${toolCall.name} is not registered.`,
           originalArgs: toolCall.args,
         });
-        return response;
+        return finalizeResponse(response);
       }
 
       let parsedArgs: unknown;
@@ -233,7 +253,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: toolCall.args,
           details,
         });
-        return response;
+        return finalizeResponse(response);
       }
 
       const validation = tool.schema.safeParse(parsedArgs);
@@ -245,7 +265,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: parsedArgs,
           details: issues,
         });
-        return response;
+        return finalizeResponse(response);
       }
       const input = validation.data;
 
@@ -336,20 +356,31 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
         const message = err instanceof Error && err.message ? err.message : 'Unknown error';
         const details =
           err instanceof Error ? { message: err.message, name: err.name, stack: err.stack } : { error: err };
-        const code = err instanceof McpError ? 'MCP_CALL_ERROR' : 'TOOL_EXECUTION_ERROR';
+        const isMcpError = err instanceof McpError;
+        if (isMcpError) {
+          toolSourceOverride = 'mcp';
+        }
+        const code = isMcpError ? 'MCP_CALL_ERROR' : 'TOOL_EXECUTION_ERROR';
         response = createErrorResponse({
           code,
           message: `Tool ${toolCall.name} execution failed: ${message}`,
           originalArgs: input,
           details,
         });
+        this.annotateToolSpanException({
+          toolCallId: toolCall.callId,
+          toolName: tool?.name ?? toolCall.name,
+          error: err,
+          toolSource: toolSourceOverride ?? this.resolveToolSource(tool),
+          mcpErrorCode: isMcpError ? (err as McpError).code : undefined,
+        });
       }
 
       if (!response) {
         throw new Error('tool_response_missing');
       }
 
-      return response;
+      return finalizeResponse(response);
     } catch (err) {
       caughtError = err;
       throw err instanceof Error ? err : new Error(String(err));
@@ -369,6 +400,75 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     }
   }
 
+  private annotateToolSpanException(params: {
+    toolCallId: string;
+    toolName: string;
+    error: unknown;
+    toolSource?: string;
+    mcpErrorCode?: string;
+  }): void {
+    const span = trace.getActiveSpan();
+    if (!span) return;
+    const message = params.error instanceof Error && params.error.message ? params.error.message : String(params.error);
+    const type = params.error instanceof Error ? params.error.name : typeof params.error;
+    span.setAttribute('tool.name', params.toolName);
+    span.setAttribute('tool.call_id', params.toolCallId);
+    if (params.toolSource) span.setAttribute('tool.source', params.toolSource);
+    if (params.mcpErrorCode) span.setAttribute('mcp.error_code', params.mcpErrorCode);
+    span.setAttribute('error.type', type);
+    span.setAttribute('error.message', message);
+    if (params.error instanceof Error && typeof params.error.stack === 'string') {
+      span.setAttribute('error.stack', params.error.stack);
+    }
+    if (params.error instanceof Error) {
+      span.recordException(params.error);
+    } else {
+      span.recordException({ name: type, message });
+    }
+    span.setStatus({ code: SpanStatusCode.ERROR, message });
+  }
+
+  private annotateToolSpanFailure(params: {
+    response: ToolCallResult;
+    toolCallId: string;
+    toolName: string;
+    toolSource?: string;
+  }): void {
+    if (params.response.status !== 'error') return;
+    const span = trace.getActiveSpan();
+    if (!span) return;
+    span.setAttribute('tool.name', params.toolName);
+    span.setAttribute('tool.call_id', params.toolCallId);
+    if (params.toolSource) span.setAttribute('tool.source', params.toolSource);
+
+    const errorPayload = isToolCallErrorPayload(params.response.output) ? params.response.output : null;
+    if (errorPayload?.error_code) {
+      span.setAttribute('tool.error_code', errorPayload.error_code);
+    }
+    if (typeof errorPayload?.retriable === 'boolean') {
+      span.setAttribute('tool.retriable', errorPayload.retriable);
+    }
+
+    const message = errorPayload?.message ?? this.extractErrorMessage(params.response) ?? 'Tool execution failed';
+    span.setStatus({ code: SpanStatusCode.ERROR, message });
+
+    const eventAttributes: SpanAttributes = { 'tool.error_message': message };
+    if (errorPayload?.error_code) {
+      eventAttributes['tool.error_code'] = errorPayload.error_code;
+    }
+    if (typeof errorPayload?.retriable === 'boolean') {
+      eventAttributes['tool.retriable'] = errorPayload.retriable;
+    }
+    span.addEvent('tool.error', eventAttributes);
+  }
+
+  private resolveToolSource(tool: FunctionTool | undefined): string | undefined {
+    if (!tool) return undefined;
+    if (tool instanceof LocalMCPServerTool) return 'mcp';
+    if (tool instanceof ShellCommandTool) return 'shell';
+    return undefined;
+  }
+
   private buildToolMessagePayload(response: ToolCallResult): ToolCallRaw {
     const payload = response.output ?? response.raw;
     if (response.status === 'success') {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b955edc0d..80c13904c 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -129,6 +129,9 @@ importers:
       '@octokit/rest':
         specifier: ^22.0.0
         version: 22.0.0
+      '@opentelemetry/api':
+        specifier: ^1.9.0
+        version: 1.9.0
       '@prisma/client':
         specifier: ^6.18.0
         version: 6.18.0(prisma@6.18.0(typescript@5.8.3))(typescript@5.8.3)
@@ -226,6 +229,12 @@ importers:
       '@nestjs/testing':
         specifier: ^11.1.8
         version: 11.1.8(@nestjs/common@11.1.7(class-transformer@0.5.1)(class-validator@0.14.2)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.7(@nestjs/common@11.1.7(class-transformer@0.5.1)(class-validator@0.14.2)(reflect-metadata@0.2.2)(rxjs@7.8.2))(reflect-metadata@0.2.2)(rxjs@7.8.2))
+      '@opentelemetry/context-async-hooks':
+        specifier: ^1.9.0
+        version: 1.30.1(@opentelemetry/api@1.9.0)
+      '@opentelemetry/sdk-trace-base':
+        specifier: ^2.1.0
+        version: 2.1.0(@opentelemetry/api@1.9.0)
       '@types/dockerode':
         specifier: ^3.3.44
         version: 3.3.44
@@ -1957,6 +1966,12 @@ packages:
     resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
     engines: {node: '>=8.0.0'}
 
+  '@opentelemetry/context-async-hooks@1.30.1':
+    resolution: {integrity: sha512-s5vvxXPVdjqS3kTLKMeBMvop9hbWkwzBpu+mUO2M7sZtlkyDJGwFe33wRKnbaYDo8ExRVBIIdwIGrqpxHuKttA==}
+    engines: {node: '>=14'}
+    peerDependencies:
+      '@opentelemetry/api': '>=1.0.0 <1.10.0'
+
   '@opentelemetry/core@2.0.1':
     resolution: {integrity: sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw==}
     engines: {node: ^18.19.0 || >=20.6.0}
@@ -10021,8 +10036,11 @@ snapshots:
       '@opentelemetry/api': 1.9.0
     optional: true
 
-  '@opentelemetry/api@1.9.0':
-    optional: true
+  '@opentelemetry/api@1.9.0': {}
+
+  '@opentelemetry/context-async-hooks@1.30.1(@opentelemetry/api@1.9.0)':
+    dependencies:
+      '@opentelemetry/api': 1.9.0
 
   '@opentelemetry/core@2.0.1(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10034,7 +10052,6 @@ snapshots:
     dependencies:
       '@opentelemetry/api': 1.9.0
       '@opentelemetry/semantic-conventions': 1.37.0
-    optional: true
 
   '@opentelemetry/exporter-trace-otlp-proto@0.203.0(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10077,7 +10094,6 @@ snapshots:
       '@opentelemetry/api': 1.9.0
       '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/semantic-conventions': 1.37.0
-    optional: true
 
   '@opentelemetry/sdk-logs@0.203.0(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10108,10 +10124,8 @@ snapshots:
       '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/semantic-conventions': 1.37.0
-    optional: true
 
-  '@opentelemetry/semantic-conventions@1.37.0':
-    optional: true
+  '@opentelemetry/semantic-conventions@1.37.0': {}
 
   '@pinojs/redact@0.4.0': {}
 

From 936e931b0123898fa12e1ec552ca131ebdcfd2d9 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Tue, 10 Feb 2026 19:18:04 +0000
Subject: [PATCH 2/7] test(tracing): flush spans on failure

---
 .../callTools.reducer.tracing.test.ts         | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
index df2e2f154..32096dd2f 100644
--- a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
+++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
@@ -45,13 +45,22 @@ describe('CallToolsLLMReducer tracing instrumentation', () => {
   const runWithSpan = async (fn: () => Promise<unknown>): Promise<ReadableSpan[]> => {
     const tracer = provider.getTracer('call-tools-tracing');
     const span = tracer.startSpan('tool-execution');
-    await context.with(trace.setSpan(context.active(), span), async () => {
-      await fn();
-    });
-    span.end();
-    await provider.forceFlush();
+    let capturedError: unknown;
+    try {
+      await context.with(trace.setSpan(context.active(), span), async () => {
+        await fn();
+      });
+    } catch (err) {
+      capturedError = err;
+    } finally {
+      span.end();
+      await provider.forceFlush();
+    }
     const spans = exporter.getFinishedSpans();
     exporter.reset();
+    if (capturedError) {
+      throw capturedError;
+    }
     return spans;
   };
 

From 1af165adf847a2f46373a2f7f86d0cbebb6139d8 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Wed, 11 Feb 2026 12:42:54 +0000
Subject: [PATCH 3/7] refactor(tracing): rely on run events

---
 .../callTools.reducer.tracing.test.ts         | 119 +++++-------------
 packages/platform-server/package.json         |   3 -
 .../src/llm/reducers/callTools.llm.reducer.ts | 110 +---------------
 pnpm-lock.yaml                                |  28 ++---
 4 files changed, 46 insertions(+), 214 deletions(-)

diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
index 32096dd2f..8eae25498 100644
--- a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
+++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
@@ -1,5 +1,5 @@
-import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
-import { ResponseMessage, ToolCallMessage } from '@agyn/llm';
+import { describe, it, expect, vi } from 'vitest';
+import { ResponseMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
 import { CallToolsLLMReducer } from '../src/llm/reducers/callTools.llm.reducer';
 import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.stub';
 import { Signal } from '../src/signal';
@@ -8,10 +8,6 @@ import { McpError } from '../src/nodes/mcp/types';
 import { LocalMCPServerTool } from '../src/nodes/mcp/localMcpServer.tool';
 import type { LocalMCPServerNode } from '../src/nodes/mcp/localMcpServer.node';
 import { ShellCommandTool } from '../src/nodes/tools/shell_command/shell_command.tool';
-import { context, trace, SpanStatusCode } from '@opentelemetry/api';
-import { AsyncLocalStorageContextManager } from '@opentelemetry/context-async-hooks';
-import { BasicTracerProvider, InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
-import type { ReadableSpan } from '@opentelemetry/sdk-trace-base';
 
 const buildState = (toolName: string, callId: string, args: string) => {
   const call = new ToolCallMessage({ type: 'function_call', name: toolName, call_id: callId, arguments: args } as any);
@@ -37,56 +33,13 @@ const createMcpNode = (callTool: ReturnType<typeof vi.fn>) =>
     callTool,
   }) as unknown as LocalMCPServerNode;
 
-describe('CallToolsLLMReducer tracing instrumentation', () => {
-  let exporter: InMemorySpanExporter;
-  let provider: BasicTracerProvider;
-  let contextManager: AsyncLocalStorageContextManager | undefined;
-
-  const runWithSpan = async (fn: () => Promise<unknown>): Promise<ReadableSpan[]> => {
-    const tracer = provider.getTracer('call-tools-tracing');
-    const span = tracer.startSpan('tool-execution');
-    let capturedError: unknown;
-    try {
-      await context.with(trace.setSpan(context.active(), span), async () => {
-        await fn();
-      });
-    } catch (err) {
-      capturedError = err;
-    } finally {
-      span.end();
-      await provider.forceFlush();
-    }
-    const spans = exporter.getFinishedSpans();
-    exporter.reset();
-    if (capturedError) {
-      throw capturedError;
-    }
-    return spans;
-  };
-
-  beforeEach(() => {
-    exporter = new InMemorySpanExporter();
-    provider = new BasicTracerProvider({ spanProcessors: [new SimpleSpanProcessor(exporter)] });
-    contextManager = new AsyncLocalStorageContextManager().enable();
-    context.setGlobalContextManager(contextManager);
-  });
-
-  afterEach(async () => {
-    await provider.shutdown();
-    exporter.reset();
-    context.disable();
-    contextManager?.disable();
-  });
-
-  it('records MCP exceptions as error spans with metadata', async () => {
-    const tool = {
-      name: 'mcp_demo',
-      description: 'demo tool',
-      schema: z.object({}),
-      async execute() {
-        throw new McpError('upstream failure', { code: 'BAD_INPUT' });
-      },
-    };
+describe('CallToolsLLMReducer tracing via run events', () => {
+  it('marks MCP exceptions as failed tool executions', async () => {
+    const callTool = vi.fn(async () => {
+      throw new McpError('upstream failure', { code: 'BAD_INPUT' });
+    });
+    const node = createMcpNode(callTool);
+    const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node);
 
     const runEvents = createRunEventsStub();
     const eventsBus = createEventsBusStub();
@@ -94,20 +47,15 @@ describe('CallToolsLLMReducer tracing instrumentation', () => {
     const ctx = createCtx();
     const state = buildState(tool.name, 'call-mcp-throw', JSON.stringify({}));
 
-    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
-    expect(spans).toHaveLength(1);
-    const span = spans[0];
-    expect(span.status.code).toBe(SpanStatusCode.ERROR);
-    expect(span.attributes['tool.name']).toBe(tool.name);
-    expect(span.attributes['tool.call_id']).toBe('call-mcp-throw');
-    expect(span.attributes['tool.source']).toBe('mcp');
-    expect(span.attributes['error.type']).toBe('McpError');
-    expect(span.attributes['error.message']).toContain('upstream failure');
-    expect(span.attributes['mcp.error_code']).toBe('BAD_INPUT');
-    expect(span.events.some((event) => event.name === 'exception')).toBe(true);
+    await reducer.invoke(state, ctx as any);
+
+    expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
+    const [completion] = runEvents.completeToolExecution.mock.calls[0];
+    expect(completion.status).toBe('error');
+    expect(String(completion.errorMessage ?? '')).toContain('upstream failure');
   });
 
-  it('marks MCP logical failures as error spans with tool error metadata', async () => {
+  it('marks MCP logical failures as failed tool executions', async () => {
     const largeOutput = 'x'.repeat(60000);
     const callTool = vi.fn(async () => ({ isError: false, content: largeOutput }));
     const node = createMcpNode(callTool);
@@ -119,19 +67,18 @@ describe('CallToolsLLMReducer tracing instrumentation', () => {
     const ctx = createCtx();
     const state = buildState(tool.name, 'call-mcp-logical', JSON.stringify({}));
 
-    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
-    const span = spans[0];
-    expect(span.status.code).toBe(SpanStatusCode.ERROR);
-    expect(span.attributes['tool.name']).toBe(tool.name);
-    expect(span.attributes['tool.source']).toBe('mcp');
-    expect(span.attributes['tool.error_code']).toBe('TOOL_OUTPUT_TOO_LARGE');
-    expect(span.attributes['tool.retriable']).toBe(false);
-    const errorEvent = span.events.find((event) => event.name === 'tool.error');
-    expect(errorEvent?.attributes?.['tool.error_code']).toBe('TOOL_OUTPUT_TOO_LARGE');
-    expect(String(errorEvent?.attributes?.['tool.error_message'] ?? '')).toContain('longer than 50000');
+    const result = await reducer.invoke(state, ctx as any);
+    const output = result.messages.at(-1) as ToolCallOutputMessage;
+    expect(output).toBeInstanceOf(ToolCallOutputMessage);
+    expect(output.text).toContain('longer than 50000');
+
+    expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
+    const [completion] = runEvents.completeToolExecution.mock.calls[0];
+    expect(completion.status).toBe('error');
+    expect(String(completion.errorMessage ?? '')).toContain('longer than 50000');
   });
 
-  it('keeps shell command error spans flagged on non-zero exit', async () => {
+  it('keeps shell command tracing flagged on non-zero exit codes', async () => {
     const runEvents = createRunEventsStub();
     const eventsBus = createEventsBusStub();
     const archiveStub = { createSingleFileTar: vi.fn(async () => Buffer.from('')) };
@@ -157,11 +104,13 @@ describe('CallToolsLLMReducer tracing instrumentation', () => {
     const ctx = createCtx();
     const state = buildState(tool.name, 'call-shell-span', JSON.stringify({ command: 'fail' }));
 
-    const spans = await runWithSpan(() => reducer.invoke(state, ctx as any));
-    const span = spans[0];
-    expect(span.status.code).toBe(SpanStatusCode.ERROR);
-    expect(span.attributes['tool.source']).toBe('shell');
-    const errorEvent = span.events.find((event) => event.name === 'tool.error');
-    expect(String(errorEvent?.attributes?.['tool.error_message'] ?? '')).toContain('exit code 2');
+    const result = await reducer.invoke(state, ctx as any);
+    const message = result.messages.at(-1) as ToolCallOutputMessage;
+    expect(message.text).toContain('exit code 2');
+
+    expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
+    const [completion] = runEvents.completeToolExecution.mock.calls[0];
+    expect(completion.status).toBe('error');
+    expect(String(completion.errorMessage ?? '')).toContain('exit code 2');
   });
 });
diff --git a/packages/platform-server/package.json b/packages/platform-server/package.json
index 377b1c31d..4bcc03331 100644
--- a/packages/platform-server/package.json
+++ b/packages/platform-server/package.json
@@ -21,7 +21,6 @@
     "prisma:studio": "prisma studio"
   },
   "dependencies": {
-    "@opentelemetry/api": "^1.9.0",
     "@agyn/shared": "workspace:*",
     "@agyn/json-schema-to-zod": "workspace:*",
     "@agyn/llm": "workspace:*",
@@ -69,8 +68,6 @@
     "zod": "^4.1.9"
   },
   "devDependencies": {
-    "@opentelemetry/context-async-hooks": "^1.9.0",
-    "@opentelemetry/sdk-trace-base": "^2.1.0",
     "@eslint/js": "^9.13.0",
     "@langchain/langgraph-cli": "0.0.66",
     "@nestjs/testing": "^11.1.8",
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index 68bd50694..a1db7a936 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -1,6 +1,5 @@
 import { LLMContext, LLMContextState, LLMMessage, LLMState } from '../types';
 import { FunctionTool, Reducer, ResponseMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
-import { SpanStatusCode, trace } from '@opentelemetry/api';
 import { Inject, Injectable, Logger, Scope } from '@nestjs/common';
 import { McpError } from '../../nodes/mcp/types';
 import { RunEventsService } from '../../events/run-events.service';
@@ -11,8 +10,6 @@ import type { ResponseFunctionCallOutputItemList } from 'openai/resources/respon
 import { contextItemInputFromMessage } from '../services/context-items.utils';
 import { persistContextItems } from '../services/context-items.append';
 import { ShellCommandTool } from '../../nodes/tools/shell_command/shell_command.tool';
-import { LocalMCPServerTool } from '../../nodes/mcp/localMcpServer.tool';
-import type { SpanAttributes } from '@opentelemetry/api';
 
 type ToolCallErrorCode =
   | 'BAD_JSON_ARGS'
@@ -35,12 +32,6 @@ type ToolCallErrorPayload = {
 };
 type ToolCallStructuredOutput = ToolCallRaw | ToolCallErrorPayload;
 
-const isToolCallErrorPayload = (value: unknown): value is ToolCallErrorPayload => {
-  if (!value || typeof value !== 'object' || Array.isArray(value)) return false;
-  const candidate = value as Partial<ToolCallErrorPayload>;
-  return candidate.status === 'error' && typeof candidate.error_code === 'string';
-};
-
 type ToolCallResult = {
   status: 'success' | 'error';
   raw: ToolCallRaw;
@@ -186,17 +177,6 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     let startedEventId: string | null = null;
     let caughtError: unknown | null = null;
     let response: ToolCallResult | undefined;
-    let toolSourceOverride: string | undefined;
-
-    const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
-      this.annotateToolSpanFailure({
-        response: result,
-        toolCallId: toolCall.callId,
-        toolName: tool?.name ?? toolCall.name,
-        toolSource: toolSourceOverride ?? this.resolveToolSource(tool),
-      });
-      return result;
-    };
 
     const createErrorResponse = (args: {
       code: ToolCallErrorCode;
@@ -232,7 +212,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           message: `Tool ${toolCall.name} is not registered.`,
           originalArgs: toolCall.args,
         });
-        return finalizeResponse(response);
+        return response;
       }
 
       let parsedArgs: unknown;
@@ -253,7 +233,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: toolCall.args,
           details,
         });
-        return finalizeResponse(response);
+        return response;
       }
 
       const validation = tool.schema.safeParse(parsedArgs);
@@ -265,7 +245,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: parsedArgs,
           details: issues,
         });
-        return finalizeResponse(response);
+        return response;
       }
       const input = validation.data;
 
@@ -356,31 +336,20 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
         const message = err instanceof Error && err.message ? err.message : 'Unknown error';
         const details =
           err instanceof Error ? { message: err.message, name: err.name, stack: err.stack } : { error: err };
-        const isMcpError = err instanceof McpError;
-        if (isMcpError) {
-          toolSourceOverride = 'mcp';
-        }
-        const code = isMcpError ? 'MCP_CALL_ERROR' : 'TOOL_EXECUTION_ERROR';
+        const code = err instanceof McpError ? 'MCP_CALL_ERROR' : 'TOOL_EXECUTION_ERROR';
         response = createErrorResponse({
           code,
           message: `Tool ${toolCall.name} execution failed: ${message}`,
           originalArgs: input,
           details,
         });
-        this.annotateToolSpanException({
-          toolCallId: toolCall.callId,
-          toolName: tool?.name ?? toolCall.name,
-          error: err,
-          toolSource: toolSourceOverride ?? this.resolveToolSource(tool),
-          mcpErrorCode: isMcpError ? (err as McpError).code : undefined,
-        });
       }
 
       if (!response) {
         throw new Error('tool_response_missing');
       }
 
-      return finalizeResponse(response);
+      return response;
     } catch (err) {
       caughtError = err;
       throw err instanceof Error ? err : new Error(String(err));
@@ -400,75 +369,6 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     }
   }
 
-  private annotateToolSpanException(params: {
-    toolCallId: string;
-    toolName: string;
-    error: unknown;
-    toolSource?: string;
-    mcpErrorCode?: string;
-  }): void {
-    const span = trace.getActiveSpan();
-    if (!span) return;
-    const message = params.error instanceof Error && params.error.message ? params.error.message : String(params.error);
-    const type = params.error instanceof Error ? params.error.name : typeof params.error;
-    span.setAttribute('tool.name', params.toolName);
-    span.setAttribute('tool.call_id', params.toolCallId);
-    if (params.toolSource) span.setAttribute('tool.source', params.toolSource);
-    if (params.mcpErrorCode) span.setAttribute('mcp.error_code', params.mcpErrorCode);
-    span.setAttribute('error.type', type);
-    span.setAttribute('error.message', message);
-    if (params.error instanceof Error && typeof params.error.stack === 'string') {
-      span.setAttribute('error.stack', params.error.stack);
-    }
-    if (params.error instanceof Error) {
-      span.recordException(params.error);
-    } else {
-      span.recordException({ name: type, message });
-    }
-    span.setStatus({ code: SpanStatusCode.ERROR, message });
-  }
-
-  private annotateToolSpanFailure(params: {
-    response: ToolCallResult;
-    toolCallId: string;
-    toolName: string;
-    toolSource?: string;
-  }): void {
-    if (params.response.status !== 'error') return;
-    const span = trace.getActiveSpan();
-    if (!span) return;
-    span.setAttribute('tool.name', params.toolName);
-    span.setAttribute('tool.call_id', params.toolCallId);
-    if (params.toolSource) span.setAttribute('tool.source', params.toolSource);
-
-    const errorPayload = isToolCallErrorPayload(params.response.output) ? params.response.output : null;
-    if (errorPayload?.error_code) {
-      span.setAttribute('tool.error_code', errorPayload.error_code);
-    }
-    if (typeof errorPayload?.retriable === 'boolean') {
-      span.setAttribute('tool.retriable', errorPayload.retriable);
-    }
-
-    const message = errorPayload?.message ?? this.extractErrorMessage(params.response) ?? 'Tool execution failed';
-    span.setStatus({ code: SpanStatusCode.ERROR, message });
-
-    const eventAttributes: SpanAttributes = { 'tool.error_message': message };
-    if (errorPayload?.error_code) {
-      eventAttributes['tool.error_code'] = errorPayload.error_code;
-    }
-    if (typeof errorPayload?.retriable === 'boolean') {
-      eventAttributes['tool.retriable'] = errorPayload.retriable;
-    }
-    span.addEvent('tool.error', eventAttributes);
-  }
-
-  private resolveToolSource(tool: FunctionTool | undefined): string | undefined {
-    if (!tool) return undefined;
-    if (tool instanceof LocalMCPServerTool) return 'mcp';
-    if (tool instanceof ShellCommandTool) return 'shell';
-    return undefined;
-  }
-
   private buildToolMessagePayload(response: ToolCallResult): ToolCallRaw {
     const payload = response.output ?? response.raw;
     if (response.status === 'success') {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 80c13904c..b955edc0d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -129,9 +129,6 @@ importers:
       '@octokit/rest':
         specifier: ^22.0.0
         version: 22.0.0
-      '@opentelemetry/api':
-        specifier: ^1.9.0
-        version: 1.9.0
       '@prisma/client':
         specifier: ^6.18.0
         version: 6.18.0(prisma@6.18.0(typescript@5.8.3))(typescript@5.8.3)
@@ -229,12 +226,6 @@ importers:
       '@nestjs/testing':
         specifier: ^11.1.8
         version: 11.1.8(@nestjs/common@11.1.7(class-transformer@0.5.1)(class-validator@0.14.2)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.7(@nestjs/common@11.1.7(class-transformer@0.5.1)(class-validator@0.14.2)(reflect-metadata@0.2.2)(rxjs@7.8.2))(reflect-metadata@0.2.2)(rxjs@7.8.2))
-      '@opentelemetry/context-async-hooks':
-        specifier: ^1.9.0
-        version: 1.30.1(@opentelemetry/api@1.9.0)
-      '@opentelemetry/sdk-trace-base':
-        specifier: ^2.1.0
-        version: 2.1.0(@opentelemetry/api@1.9.0)
       '@types/dockerode':
         specifier: ^3.3.44
         version: 3.3.44
@@ -1966,12 +1957,6 @@ packages:
     resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
     engines: {node: '>=8.0.0'}
 
-  '@opentelemetry/context-async-hooks@1.30.1':
-    resolution: {integrity: sha512-s5vvxXPVdjqS3kTLKMeBMvop9hbWkwzBpu+mUO2M7sZtlkyDJGwFe33wRKnbaYDo8ExRVBIIdwIGrqpxHuKttA==}
-    engines: {node: '>=14'}
-    peerDependencies:
-      '@opentelemetry/api': '>=1.0.0 <1.10.0'
-
   '@opentelemetry/core@2.0.1':
     resolution: {integrity: sha512-MaZk9SJIDgo1peKevlbhP6+IwIiNPNmswNL4AF0WaQJLbHXjr9SrZMgS12+iqr9ToV4ZVosCcc0f8Rg67LXjxw==}
     engines: {node: ^18.19.0 || >=20.6.0}
@@ -10036,11 +10021,8 @@ snapshots:
       '@opentelemetry/api': 1.9.0
     optional: true
 
-  '@opentelemetry/api@1.9.0': {}
-
-  '@opentelemetry/context-async-hooks@1.30.1(@opentelemetry/api@1.9.0)':
-    dependencies:
-      '@opentelemetry/api': 1.9.0
+  '@opentelemetry/api@1.9.0':
+    optional: true
 
   '@opentelemetry/core@2.0.1(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10052,6 +10034,7 @@ snapshots:
     dependencies:
       '@opentelemetry/api': 1.9.0
       '@opentelemetry/semantic-conventions': 1.37.0
+    optional: true
 
   '@opentelemetry/exporter-trace-otlp-proto@0.203.0(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10094,6 +10077,7 @@ snapshots:
       '@opentelemetry/api': 1.9.0
       '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/semantic-conventions': 1.37.0
+    optional: true
 
   '@opentelemetry/sdk-logs@0.203.0(@opentelemetry/api@1.9.0)':
     dependencies:
@@ -10124,8 +10108,10 @@ snapshots:
       '@opentelemetry/core': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/resources': 2.1.0(@opentelemetry/api@1.9.0)
       '@opentelemetry/semantic-conventions': 1.37.0
+    optional: true
 
-  '@opentelemetry/semantic-conventions@1.37.0': {}
+  '@opentelemetry/semantic-conventions@1.37.0':
+    optional: true
 
   '@pinojs/redact@0.4.0': {}
 

From c777ea4240d04734b473624cccbce31f5f7d82e6 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Wed, 11 Feb 2026 14:44:21 +0000
Subject: [PATCH 4/7] fix(tracing): record tool error metadata

---
 .../callTools.reducer.errorHandling.test.ts   |  1 +
 .../callTools.reducer.tracing.test.ts         |  3 +
 .../__tests__/mcp.error.mapping.test.ts       |  1 +
 .../src/events/run-events.service.ts          |  2 +
 .../src/llm/reducers/callTools.llm.reducer.ts | 76 +++++++++++++++++--
 5 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts b/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts
index 071b77171..bb141c183 100644
--- a/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts
+++ b/packages/platform-server/__tests__/callTools.reducer.errorHandling.test.ts
@@ -171,6 +171,7 @@ describe('CallToolsLLMReducer error isolation', () => {
     const [payload] = runEvents.completeToolExecution.mock.calls[0];
     expect(payload.status).toBe('error');
     expect(payload.errorMessage).toBe('[exit code 42] compiler error: missing semicolon');
+    expect(payload.errorCode ?? null).toBeNull();
   });
 
   it('invokes manage tool via reducer without relying on instance logger field', async () => {
diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
index 8eae25498..c2c4d4656 100644
--- a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
+++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
@@ -53,6 +53,7 @@ describe('CallToolsLLMReducer tracing via run events', () => {
     const [completion] = runEvents.completeToolExecution.mock.calls[0];
     expect(completion.status).toBe('error');
     expect(String(completion.errorMessage ?? '')).toContain('upstream failure');
+    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
   });
 
   it('marks MCP logical failures as failed tool executions', async () => {
@@ -76,6 +77,7 @@ describe('CallToolsLLMReducer tracing via run events', () => {
     const [completion] = runEvents.completeToolExecution.mock.calls[0];
     expect(completion.status).toBe('error');
     expect(String(completion.errorMessage ?? '')).toContain('longer than 50000');
+    expect(completion.errorCode).toBe('TOOL_OUTPUT_TOO_LARGE');
   });
 
   it('keeps shell command tracing flagged on non-zero exit codes', async () => {
@@ -112,5 +114,6 @@ describe('CallToolsLLMReducer tracing via run events', () => {
     const [completion] = runEvents.completeToolExecution.mock.calls[0];
     expect(completion.status).toBe('error');
     expect(String(completion.errorMessage ?? '')).toContain('exit code 2');
+    expect(completion.errorCode ?? null).toBeNull();
   });
 });
diff --git a/packages/platform-server/__tests__/mcp.error.mapping.test.ts b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
index 0386f45e6..9fb7b3e53 100644
--- a/packages/platform-server/__tests__/mcp.error.mapping.test.ts
+++ b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
@@ -61,6 +61,7 @@ describe('CallToolsLLMReducer MCP error mapping', () => {
     const [completionPayload] = runEvents.completeToolExecution.mock.calls[0];
     expect(completionPayload.status).toBe('error');
     expect(completionPayload.errorMessage).toContain('apply_patch failed (code=PATCH_FAIL retriable=false)');
+    expect(completionPayload.errorCode).toBe('MCP_CALL_ERROR');
 
     const lastMessage = result.messages.at(-1) as ToolCallOutputMessage;
     expect(lastMessage).toBeInstanceOf(ToolCallOutputMessage);
diff --git a/packages/platform-server/src/events/run-events.service.ts b/packages/platform-server/src/events/run-events.service.ts
index 0c39e639a..f9aa8f5c6 100644
--- a/packages/platform-server/src/events/run-events.service.ts
+++ b/packages/platform-server/src/events/run-events.service.ts
@@ -364,6 +364,7 @@ export interface ToolExecutionCompleteArgs {
   status: ToolExecStatus;
   output?: Prisma.InputJsonValue | null;
   errorMessage?: string | null;
+  errorCode?: string | null;
   raw?: Prisma.InputJsonValue | null;
   endedAt?: Date;
 }
@@ -1409,6 +1410,7 @@ export class RunEventsService {
         status: execStatus === ToolExecStatus.success ? RunEventStatus.success : RunEventStatus.error,
         endedAt,
         errorMessage: args.errorMessage ?? null,
+        errorCode: args.errorCode ?? null,
       },
     });
     const durationMs = event.startedAt ? Math.max(0, endedAt.getTime() - event.startedAt.getTime()) : null;
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index a1db7a936..5628a55b7 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -10,6 +10,7 @@ import type { ResponseFunctionCallOutputItemList } from 'openai/resources/respon
 import { contextItemInputFromMessage } from '../services/context-items.utils';
 import { persistContextItems } from '../services/context-items.append';
 import { ShellCommandTool } from '../../nodes/tools/shell_command/shell_command.tool';
+import { LocalMCPServerTool } from '../../nodes/mcp/localMcpServer.tool';
 
 type ToolCallErrorCode =
   | 'BAD_JSON_ARGS'
@@ -32,12 +33,27 @@ type ToolCallErrorPayload = {
 };
 type ToolCallStructuredOutput = ToolCallRaw | ToolCallErrorPayload;
 
+const isToolCallErrorPayload = (value: unknown): value is ToolCallErrorPayload => {
+  if (!value || typeof value !== 'object' || Array.isArray(value)) return false;
+  const candidate = value as Partial<ToolCallErrorPayload>;
+  return candidate.status === 'error' && typeof candidate.error_code === 'string';
+};
+
 type ToolCallResult = {
   status: 'success' | 'error';
   raw: ToolCallRaw;
   output: ToolCallStructuredOutput;
 };
 
+type ToolTracingFailure = {
+  toolCallId: string;
+  toolName: string;
+  toolSource?: string;
+  errorCode?: string;
+  errorMessage: string;
+  retriable?: boolean;
+};
+
 const isToolCallRaw = (value: unknown): value is ToolCallRaw =>
   typeof value === 'string' || Array.isArray(value);
 
@@ -177,6 +193,15 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     let startedEventId: string | null = null;
     let caughtError: unknown | null = null;
     let response: ToolCallResult | undefined;
+    let traceFailure: ToolTracingFailure | null = null;
+
+    const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
+      const trace = this.buildToolTracingFailure({ response: result, tool, toolCall });
+      if (trace) {
+        traceFailure = trace;
+      }
+      return result;
+    };
 
     const createErrorResponse = (args: {
       code: ToolCallErrorCode;
@@ -212,7 +237,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           message: `Tool ${toolCall.name} is not registered.`,
           originalArgs: toolCall.args,
         });
-        return response;
+        return finalizeResponse(response);
       }
 
       let parsedArgs: unknown;
@@ -233,7 +258,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: toolCall.args,
           details,
         });
-        return response;
+        return finalizeResponse(response);
       }
 
       const validation = tool.schema.safeParse(parsedArgs);
@@ -245,7 +270,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
           originalArgs: parsedArgs,
           details: issues,
         });
-        return response;
+        return finalizeResponse(response);
       }
       const input = validation.data;
 
@@ -349,14 +374,14 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
         throw new Error('tool_response_missing');
       }
 
-      return response;
+      return finalizeResponse(response);
     } catch (err) {
       caughtError = err;
       throw err instanceof Error ? err : new Error(String(err));
     } finally {
       if (startedEventId) {
         try {
-          await this.finalizeToolExecutionEvent(startedEventId, response, caughtError);
+          await this.finalizeToolExecutionEvent(startedEventId, response, caughtError, traceFailure);
         } catch (finalizeErr: unknown) {
           this.logger.warn(
             `Failed to finalize tool execution event${this.format({
@@ -443,6 +468,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     eventId: string,
     response: ToolCallResult | undefined,
     caughtError: unknown | null,
+    traceFailure: ToolTracingFailure | null,
   ): Promise<void> {
     if (caughtError !== null) {
       const errorMessage = caughtError instanceof Error ? caughtError.message : String(caughtError);
@@ -450,6 +476,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
         eventId,
         status: ToolExecStatus.error,
         errorMessage,
+        errorCode: traceFailure?.errorCode ?? null,
         raw: null,
       });
       await this.eventsBus.publishEvent(eventId, 'update');
@@ -459,13 +486,50 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     if (!response) return;
 
     const status = response.status === 'success' ? ToolExecStatus.success : ToolExecStatus.error;
+    const resolvedErrorMessage =
+      status === ToolExecStatus.success ? null : traceFailure?.errorMessage ?? this.extractErrorMessage(response);
+    const resolvedErrorCode =
+      status === ToolExecStatus.success ? null : traceFailure?.errorCode ?? this.extractErrorCode(response);
     await this.runEvents.completeToolExecution({
       eventId,
       status,
       output: this.toJson(response.output ?? response.raw),
       raw: this.toJson(response.raw),
-      errorMessage: status === ToolExecStatus.success ? null : this.extractErrorMessage(response),
+      errorMessage: resolvedErrorMessage,
+      errorCode: resolvedErrorCode,
     });
     await this.eventsBus.publishEvent(eventId, 'update');
   }
+
+  private buildToolTracingFailure(params: {
+    response: ToolCallResult;
+    tool: FunctionTool | undefined;
+    toolCall: ToolCallMessage;
+  }): ToolTracingFailure | null {
+    if (params.response.status !== 'error') return null;
+    const errorPayload = isToolCallErrorPayload(params.response.output) ? params.response.output : null;
+    const message = errorPayload?.message ?? this.extractErrorMessage(params.response) ?? 'Tool execution failed';
+    const errorCode = errorPayload?.error_code;
+    return {
+      toolCallId: params.toolCall.callId,
+      toolName: params.tool?.name ?? params.toolCall.name,
+      toolSource: this.resolveToolSource(params.tool),
+      errorCode,
+      errorMessage: message,
+      retriable: errorPayload?.retriable,
+    };
+  }
+
+  private extractErrorCode(response: ToolCallResult | undefined): string | null {
+    if (!response) return null;
+    const payload = isToolCallErrorPayload(response.output) ? response.output : null;
+    return payload?.error_code ?? null;
+  }
+
+  private resolveToolSource(tool: FunctionTool | undefined): string | undefined {
+    if (!tool) return undefined;
+    if (tool instanceof LocalMCPServerTool) return 'mcp';
+    if (tool instanceof ShellCommandTool) return 'shell';
+    return undefined;
+  }
 }

From 9b685825a072214c64dc2a45fa390f07ea4a05c0 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Wed, 11 Feb 2026 17:48:32 +0000
Subject: [PATCH 5/7] fix(mcp): detect logical failure payloads

---
 .../__tests__/mcp.error.mapping.test.ts       |  58 ++++++++++
 .../src/llm/reducers/callTools.llm.reducer.ts | 106 +++++++++++++++++-
 2 files changed, 161 insertions(+), 3 deletions(-)

diff --git a/packages/platform-server/__tests__/mcp.error.mapping.test.ts b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
index 9fb7b3e53..cc1f66dc1 100644
--- a/packages/platform-server/__tests__/mcp.error.mapping.test.ts
+++ b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
@@ -73,3 +73,61 @@ describe('CallToolsLLMReducer MCP error mapping', () => {
     );
   });
 });
+
+describe('CallToolsLLMReducer MCP logical failure heuristic', () => {
+  const invokeWithPayload = async (payload: Record<string, unknown>) => {
+    const callTool = vi.fn(async () => ({ isError: false, content: JSON.stringify(payload) }));
+    const node = createNode(callTool);
+    const tool = new LocalMCPServerTool('codex_apply_patch', 'Codex patch', z.object({}), node);
+    const runEvents = createRunEventsStub();
+    const eventsBus = createEventsBusStub();
+    const reducer = new CallToolsLLMReducer(runEvents as any, eventsBus as any).init({ tools: [tool as any] });
+    const state = buildState(tool.name, `call-${Math.random().toString(36).slice(2, 6)}`, JSON.stringify({}));
+    const ctx = createContext();
+    const result = await reducer.invoke(state, ctx as any);
+    const completion = runEvents.completeToolExecution.mock.calls[0]?.[0];
+    return { result, completion };
+  };
+
+  it('classifies payloads with status>=400 and error string as failures', async () => {
+    const { result, completion } = await invokeWithPayload({ status: 401, error: 'Search failed' });
+    expect(completion.status).toBe('error');
+    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
+    expect(String(completion.errorMessage ?? '')).toContain('status=401');
+
+    const last = result.messages.at(-1) as ToolCallOutputMessage;
+    expect(last).toBeInstanceOf(ToolCallOutputMessage);
+    const payload = JSON.parse(last.text);
+    expect(payload.status).toBe('error');
+    expect(payload.error_code).toBe('MCP_CALL_ERROR');
+  });
+
+  it('uses statusCode field as fallback for logical failures', async () => {
+    const { completion } = await invokeWithPayload({ statusCode: 403, message: 'Forbidden' });
+    expect(completion.status).toBe('error');
+    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
+    expect(String(completion.errorMessage ?? '')).toContain('status=403');
+  });
+
+  it('ignores payloads without status metadata', async () => {
+    const { result, completion } = await invokeWithPayload({ error: 'domain data' });
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+    expect(completion.errorMessage).toBeNull();
+
+    const last = result.messages.at(-1) as ToolCallOutputMessage;
+    expect(last.text).toContain('domain data');
+  });
+
+  it('ignores payloads with non-error status codes', async () => {
+    const { completion } = await invokeWithPayload({ status: 200, error: 'none' });
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+  });
+
+  it('ignores non-numeric status strings', async () => {
+    const { completion } = await invokeWithPayload({ status: 'error', error: 'Bad' });
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+  });
+});
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index 5628a55b7..ecc41db8b 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -54,6 +54,12 @@ type ToolTracingFailure = {
   retriable?: boolean;
 };
 
+type McpLogicalFailure = {
+  status: number;
+  errorText: string;
+  payload: Record<string, unknown>;
+};
+
 const isToolCallRaw = (value: unknown): value is ToolCallRaw =>
   typeof value === 'string' || Array.isArray(value);
 
@@ -193,6 +199,7 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     let startedEventId: string | null = null;
     let caughtError: unknown | null = null;
     let response: ToolCallResult | undefined;
+    let normalizedRaw: ToolCallRaw | undefined;
     let traceFailure: ToolTracingFailure | null = null;
 
     const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
@@ -341,15 +348,28 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
             details: { receivedType: typeof raw },
           });
         } else {
+          normalizedRaw = raw;
           const shouldFlagNonZeroShellExit =
-            tool instanceof ShellCommandTool && isNonZeroShellExitMessage(raw);
+            tool instanceof ShellCommandTool && isNonZeroShellExitMessage(normalizedRaw);
 
           response = {
             status: shouldFlagNonZeroShellExit ? 'error' : 'success',
-            raw,
-            output: raw,
+            raw: normalizedRaw,
+            output: normalizedRaw,
           };
         }
+
+        if (response?.status === 'success' && normalizedRaw !== undefined) {
+          const logicalFailure = this.detectMcpLogicalFailure({ tool, raw: normalizedRaw });
+          if (logicalFailure) {
+            response = createErrorResponse({
+              code: 'MCP_CALL_ERROR',
+              message: `MCP tool returned error payload (status=${logicalFailure.status}): ${logicalFailure.errorText}`,
+              originalArgs: input,
+              details: { payload: logicalFailure.payload },
+            });
+          }
+        }
       } catch (err) {
         this.logger.error(
           `Error occurred while executing tool${this.format({
@@ -532,4 +552,84 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     if (tool instanceof ShellCommandTool) return 'shell';
     return undefined;
   }
+
+  private detectMcpLogicalFailure(params: { tool: FunctionTool | undefined; raw: ToolCallRaw }): McpLogicalFailure | null {
+    if (!(params.tool instanceof LocalMCPServerTool)) return null;
+    if (typeof params.raw !== 'string') return null;
+    const parsed = this.safeParseJsonObject(params.raw);
+    if (!parsed) return null;
+    const status = this.extractNumericStatusCode(parsed);
+    if (status === null || status < 400) return null;
+    const errorText = this.extractMcpErrorMessage(parsed);
+    if (!errorText) return null;
+    return { status, errorText, payload: parsed };
+  }
+
+  private safeParseJsonObject(value: string): Record<string, unknown> | null {
+    const trimmed = value.trim();
+    if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) return null;
+    try {
+      const parsed: unknown = JSON.parse(trimmed);
+      if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+        return null;
+      }
+      return parsed as Record<string, unknown>;
+    } catch {
+      return null;
+    }
+  }
+
+  private extractNumericStatusCode(payload: Record<string, unknown>): number | null {
+    const rawStatus = payload.status ?? payload.statusCode;
+    const numeric = this.toNumber(rawStatus);
+    if (numeric === null || Number.isNaN(numeric)) return null;
+    return numeric;
+  }
+
+  private toNumber(value: unknown): number | null {
+    if (typeof value === 'number' && Number.isFinite(value)) return value;
+    if (typeof value === 'string') {
+      const trimmed = value.trim();
+      if (!trimmed) return null;
+      const parsed = Number.parseInt(trimmed, 10);
+      return Number.isFinite(parsed) ? parsed : null;
+    }
+    return null;
+  }
+
+  private extractMcpErrorMessage(payload: Record<string, unknown>): string | null {
+    const direct = this.normalizeErrorText(payload.error) ?? this.normalizeErrorText(payload.message);
+    if (direct) return direct;
+    if ('detail' in payload) {
+      return this.extractDetailError(payload.detail as unknown);
+    }
+    return null;
+  }
+
+  private extractDetailError(detail: unknown): string | null {
+    if (!detail) return null;
+    if (typeof detail === 'string') return this.normalizeErrorText(detail);
+    if (Array.isArray(detail)) {
+      for (const entry of detail) {
+        const text = this.extractDetailError(entry);
+        if (text) return text;
+      }
+      return null;
+    }
+    if (typeof detail === 'object') {
+      const record = detail as Record<string, unknown>;
+      const nested = this.normalizeErrorText(record.error) ?? this.normalizeErrorText(record.message);
+      if (nested) return nested;
+      if ('detail' in record) {
+        return this.extractDetailError(record.detail as unknown);
+      }
+    }
+    return null;
+  }
+
+  private normalizeErrorText(value: unknown): string | null {
+    if (typeof value !== 'string') return null;
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : null;
+  }
 }

From 5f108f2cfc705c4f6fa5702b3d2056ec13f604d3 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Wed, 11 Feb 2026 17:56:39 +0000
Subject: [PATCH 6/7] fix(mcp): fall back to statusCode

---
 .../__tests__/mcp.error.mapping.test.ts           |  7 +++++++
 .../src/llm/reducers/callTools.llm.reducer.ts     | 15 +++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/packages/platform-server/__tests__/mcp.error.mapping.test.ts b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
index cc1f66dc1..be5b5920e 100644
--- a/packages/platform-server/__tests__/mcp.error.mapping.test.ts
+++ b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
@@ -109,6 +109,13 @@ describe('CallToolsLLMReducer MCP logical failure heuristic', () => {
     expect(String(completion.errorMessage ?? '')).toContain('status=403');
   });
 
+  it('falls back to statusCode when status is non-numeric', async () => {
+    const { completion } = await invokeWithPayload({ status: 'error', statusCode: 500, message: 'Internal error' });
+    expect(completion.status).toBe('error');
+    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
+    expect(String(completion.errorMessage ?? '')).toContain('status=500');
+  });
+
   it('ignores payloads without status metadata', async () => {
     const { result, completion } = await invokeWithPayload({ error: 'domain data' });
     expect(completion.status).toBe('success');
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index ecc41db8b..d8aa2270c 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -580,10 +580,17 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
   }
 
   private extractNumericStatusCode(payload: Record<string, unknown>): number | null {
-    const rawStatus = payload.status ?? payload.statusCode;
-    const numeric = this.toNumber(rawStatus);
-    if (numeric === null || Number.isNaN(numeric)) return null;
-    return numeric;
+    const status = this.toNumber(payload.status);
+    if (status !== null && !Number.isNaN(status)) {
+      return status;
+    }
+
+    const statusCode = this.toNumber(payload.statusCode);
+    if (statusCode !== null && !Number.isNaN(statusCode)) {
+      return statusCode;
+    }
+
+    return null;
   }
 
   private toNumber(value: unknown): number | null {

From e2e27a8adfab5e6468a2fc3411c7995bcc5f6a64 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Thu, 12 Feb 2026 18:46:48 +0000
Subject: [PATCH 7/7] refactor(mcp): remove payload heuristic

---
 .../callTools.reducer.tracing.test.ts         |  14 +--
 .../__tests__/mcp.error.mapping.test.ts       |  36 +++---
 .../src/llm/reducers/callTools.llm.reducer.ts | 112 +-----------------
 3 files changed, 27 insertions(+), 135 deletions(-)

diff --git a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
index c2c4d4656..328bd7c79 100644
--- a/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
+++ b/packages/platform-server/__tests__/callTools.reducer.tracing.test.ts
@@ -56,9 +56,9 @@ describe('CallToolsLLMReducer tracing via run events', () => {
     expect(completion.errorCode).toBe('MCP_CALL_ERROR');
   });
 
-  it('marks MCP logical failures as failed tool executions', async () => {
-    const largeOutput = 'x'.repeat(60000);
-    const callTool = vi.fn(async () => ({ isError: false, content: largeOutput }));
+  it('does not reclassify MCP payloads without isError flags', async () => {
+    const payload = JSON.stringify({ status: 500, error: 'Search failed' });
+    const callTool = vi.fn(async () => ({ isError: false, content: payload }));
     const node = createMcpNode(callTool);
     const tool = new LocalMCPServerTool('codex_apply_patch', 'Patch tool', z.object({}), node);
 
@@ -71,13 +71,13 @@ describe('CallToolsLLMReducer tracing via run events', () => {
     const result = await reducer.invoke(state, ctx as any);
     const output = result.messages.at(-1) as ToolCallOutputMessage;
     expect(output).toBeInstanceOf(ToolCallOutputMessage);
-    expect(output.text).toContain('longer than 50000');
+    expect(output.text).toContain('Search failed');
 
     expect(runEvents.completeToolExecution).toHaveBeenCalledTimes(1);
     const [completion] = runEvents.completeToolExecution.mock.calls[0];
-    expect(completion.status).toBe('error');
-    expect(String(completion.errorMessage ?? '')).toContain('longer than 50000');
-    expect(completion.errorCode).toBe('TOOL_OUTPUT_TOO_LARGE');
+    expect(completion.status).toBe('success');
+    expect(completion.errorMessage).toBeNull();
+    expect(completion.errorCode ?? null).toBeNull();
   });
 
   it('keeps shell command tracing flagged on non-zero exit codes', async () => {
diff --git a/packages/platform-server/__tests__/mcp.error.mapping.test.ts b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
index be5b5920e..cc8266ab4 100644
--- a/packages/platform-server/__tests__/mcp.error.mapping.test.ts
+++ b/packages/platform-server/__tests__/mcp.error.mapping.test.ts
@@ -74,7 +74,7 @@ describe('CallToolsLLMReducer MCP error mapping', () => {
   });
 });
 
-describe('CallToolsLLMReducer MCP logical failure heuristic', () => {
+describe('CallToolsLLMReducer MCP payload handling (protocol-only)', () => {
   const invokeWithPayload = async (payload: Record<string, unknown>) => {
     const callTool = vi.fn(async () => ({ isError: false, content: JSON.stringify(payload) }));
     const node = createNode(callTool);
@@ -89,34 +89,32 @@ describe('CallToolsLLMReducer MCP logical failure heuristic', () => {
     return { result, completion };
   };
 
-  it('classifies payloads with status>=400 and error string as failures', async () => {
+  it('treats HTTP-looking payloads as success when isError is false', async () => {
     const { result, completion } = await invokeWithPayload({ status: 401, error: 'Search failed' });
-    expect(completion.status).toBe('error');
-    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
-    expect(String(completion.errorMessage ?? '')).toContain('status=401');
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+    expect(completion.errorMessage).toBeNull();
 
     const last = result.messages.at(-1) as ToolCallOutputMessage;
     expect(last).toBeInstanceOf(ToolCallOutputMessage);
-    const payload = JSON.parse(last.text);
-    expect(payload.status).toBe('error');
-    expect(payload.error_code).toBe('MCP_CALL_ERROR');
+    expect(last.text).toContain('Search failed');
   });
 
-  it('uses statusCode field as fallback for logical failures', async () => {
+  it('does not infer failures from statusCode when isError is false', async () => {
     const { completion } = await invokeWithPayload({ statusCode: 403, message: 'Forbidden' });
-    expect(completion.status).toBe('error');
-    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
-    expect(String(completion.errorMessage ?? '')).toContain('status=403');
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+    expect(completion.errorMessage).toBeNull();
   });
 
-  it('falls back to statusCode when status is non-numeric', async () => {
+  it('does not treat string status with numeric statusCode as failure without isError', async () => {
     const { completion } = await invokeWithPayload({ status: 'error', statusCode: 500, message: 'Internal error' });
-    expect(completion.status).toBe('error');
-    expect(completion.errorCode).toBe('MCP_CALL_ERROR');
-    expect(String(completion.errorMessage ?? '')).toContain('status=500');
+    expect(completion.status).toBe('success');
+    expect(completion.errorCode ?? null).toBeNull();
+    expect(completion.errorMessage).toBeNull();
   });
 
-  it('ignores payloads without status metadata', async () => {
+  it('still returns success for payloads without status metadata', async () => {
     const { result, completion } = await invokeWithPayload({ error: 'domain data' });
     expect(completion.status).toBe('success');
     expect(completion.errorCode ?? null).toBeNull();
@@ -126,13 +124,13 @@ describe('CallToolsLLMReducer MCP logical failure heuristic', () => {
     expect(last.text).toContain('domain data');
   });
 
-  it('ignores payloads with non-error status codes', async () => {
+  it('keeps success for payloads with non-error status codes', async () => {
     const { completion } = await invokeWithPayload({ status: 200, error: 'none' });
     expect(completion.status).toBe('success');
     expect(completion.errorCode ?? null).toBeNull();
   });
 
-  it('ignores non-numeric status strings', async () => {
+  it('treats non-numeric status strings as success without isError flags', async () => {
     const { completion } = await invokeWithPayload({ status: 'error', error: 'Bad' });
     expect(completion.status).toBe('success');
     expect(completion.errorCode ?? null).toBeNull();
diff --git a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
index d8aa2270c..293ad1f68 100644
--- a/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/callTools.llm.reducer.ts
@@ -54,12 +54,6 @@ type ToolTracingFailure = {
   retriable?: boolean;
 };
 
-type McpLogicalFailure = {
-  status: number;
-  errorText: string;
-  payload: Record<string, unknown>;
-};
-
 const isToolCallRaw = (value: unknown): value is ToolCallRaw =>
   typeof value === 'string' || Array.isArray(value);
 
@@ -199,7 +193,6 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     let startedEventId: string | null = null;
     let caughtError: unknown | null = null;
     let response: ToolCallResult | undefined;
-    let normalizedRaw: ToolCallRaw | undefined;
     let traceFailure: ToolTracingFailure | null = null;
 
     const finalizeResponse = (result: ToolCallResult): ToolCallResult => {
@@ -348,28 +341,15 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
             details: { receivedType: typeof raw },
           });
         } else {
-          normalizedRaw = raw;
           const shouldFlagNonZeroShellExit =
-            tool instanceof ShellCommandTool && isNonZeroShellExitMessage(normalizedRaw);
+            tool instanceof ShellCommandTool && isNonZeroShellExitMessage(raw);
 
           response = {
             status: shouldFlagNonZeroShellExit ? 'error' : 'success',
-            raw: normalizedRaw,
-            output: normalizedRaw,
+            raw,
+            output: raw,
           };
         }
-
-        if (response?.status === 'success' && normalizedRaw !== undefined) {
-          const logicalFailure = this.detectMcpLogicalFailure({ tool, raw: normalizedRaw });
-          if (logicalFailure) {
-            response = createErrorResponse({
-              code: 'MCP_CALL_ERROR',
-              message: `MCP tool returned error payload (status=${logicalFailure.status}): ${logicalFailure.errorText}`,
-              originalArgs: input,
-              details: { payload: logicalFailure.payload },
-            });
-          }
-        }
       } catch (err) {
         this.logger.error(
           `Error occurred while executing tool${this.format({
@@ -553,90 +533,4 @@ export class CallToolsLLMReducer extends Reducer<LLMState, LLMContext> {
     return undefined;
   }
 
-  private detectMcpLogicalFailure(params: { tool: FunctionTool | undefined; raw: ToolCallRaw }): McpLogicalFailure | null {
-    if (!(params.tool instanceof LocalMCPServerTool)) return null;
-    if (typeof params.raw !== 'string') return null;
-    const parsed = this.safeParseJsonObject(params.raw);
-    if (!parsed) return null;
-    const status = this.extractNumericStatusCode(parsed);
-    if (status === null || status < 400) return null;
-    const errorText = this.extractMcpErrorMessage(parsed);
-    if (!errorText) return null;
-    return { status, errorText, payload: parsed };
-  }
-
-  private safeParseJsonObject(value: string): Record<string, unknown> | null {
-    const trimmed = value.trim();
-    if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) return null;
-    try {
-      const parsed: unknown = JSON.parse(trimmed);
-      if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
-        return null;
-      }
-      return parsed as Record<string, unknown>;
-    } catch {
-      return null;
-    }
-  }
-
-  private extractNumericStatusCode(payload: Record<string, unknown>): number | null {
-    const status = this.toNumber(payload.status);
-    if (status !== null && !Number.isNaN(status)) {
-      return status;
-    }
-
-    const statusCode = this.toNumber(payload.statusCode);
-    if (statusCode !== null && !Number.isNaN(statusCode)) {
-      return statusCode;
-    }
-
-    return null;
-  }
-
-  private toNumber(value: unknown): number | null {
-    if (typeof value === 'number' && Number.isFinite(value)) return value;
-    if (typeof value === 'string') {
-      const trimmed = value.trim();
-      if (!trimmed) return null;
-      const parsed = Number.parseInt(trimmed, 10);
-      return Number.isFinite(parsed) ? parsed : null;
-    }
-    return null;
-  }
-
-  private extractMcpErrorMessage(payload: Record<string, unknown>): string | null {
-    const direct = this.normalizeErrorText(payload.error) ?? this.normalizeErrorText(payload.message);
-    if (direct) return direct;
-    if ('detail' in payload) {
-      return this.extractDetailError(payload.detail as unknown);
-    }
-    return null;
-  }
-
-  private extractDetailError(detail: unknown): string | null {
-    if (!detail) return null;
-    if (typeof detail === 'string') return this.normalizeErrorText(detail);
-    if (Array.isArray(detail)) {
-      for (const entry of detail) {
-        const text = this.extractDetailError(entry);
-        if (text) return text;
-      }
-      return null;
-    }
-    if (typeof detail === 'object') {
-      const record = detail as Record<string, unknown>;
-      const nested = this.normalizeErrorText(record.error) ?? this.normalizeErrorText(record.message);
-      if (nested) return nested;
-      if ('detail' in record) {
-        return this.extractDetailError(record.detail as unknown);
-      }
-    }
-    return null;
-  }
-
-  private normalizeErrorText(value: unknown): string | null {
-    if (typeof value !== 'string') return null;
-    const trimmed = value.trim();
-    return trimmed.length > 0 ? trimmed : null;
-  }
 }