Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions src/lib/__tests__/agent-interface.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import { runAgent } from '../agent-interface';
import type { WizardOptions } from '../../utils/types';

// Mock dependencies
jest.mock('../../utils/clack');
jest.mock('../../utils/analytics');
jest.mock('../../utils/debug');

// Mock the SDK module
const mockQuery = jest.fn();
jest.mock('@anthropic-ai/claude-agent-sdk', () => ({
query: (...args: unknown[]) => mockQuery(...args),
}));

// Get mocked clack for spinner
import clack from '../../utils/clack';
const mockClack = clack as jest.Mocked<typeof clack>;

describe('runAgent', () => {
let mockSpinner: {
start: jest.Mock;
stop: jest.Mock;
message: string;
};

const defaultOptions: WizardOptions = {
debug: false,
installDir: '/test/dir',
forceInstall: false,
default: false,
signup: false,
localMcp: false,
ci: false,
};

const defaultAgentConfig = {
workingDirectory: '/test/dir',
mcpServers: {},
model: 'claude-opus-4-5-20251101',
};

beforeEach(() => {
jest.clearAllMocks();

mockSpinner = {
start: jest.fn(),
stop: jest.fn(),
message: '',
};

mockClack.spinner = jest.fn().mockReturnValue(mockSpinner);
mockClack.log = {
step: jest.fn(),
success: jest.fn(),
error: jest.fn(),
warn: jest.fn(),
warning: jest.fn(),
info: jest.fn(),
message: jest.fn(),
};
});

describe('race condition handling', () => {
it('should return success when agent completes successfully then SDK cleanup fails', async () => {
// This simulates the race condition:
// 1. Agent completes with success result
// 2. signalDone() is called, completing the prompt generator
// 3. SDK tries to send cleanup command while streaming is active
// 4. SDK throws an error
// The fix should recognize we already got a success and return success anyway

function* mockGeneratorWithCleanupError() {
yield {
type: 'system',
subtype: 'init',
model: 'claude-opus-4-5-20251101',
tools: [],
mcp_servers: [],
};

yield {
type: 'result',
subtype: 'success',
is_error: false,
result: 'Agent completed successfully',
};

// Simulate the SDK cleanup error that occurs after success
throw new Error('only prompt commands are supported in streaming mode');
}

mockQuery.mockReturnValue(mockGeneratorWithCleanupError());

const result = await runAgent(
defaultAgentConfig,
'test prompt',
defaultOptions,
mockSpinner as unknown as ReturnType<typeof clack.spinner>,
{
successMessage: 'Test success',
errorMessage: 'Test error',
},
);

// Should return success (empty object), not throw
expect(result).toEqual({});
expect(mockSpinner.stop).toHaveBeenCalledWith('Test success');
});

it('should still throw when no success result was received before error', async () => {
// If we never got a success result, errors should propagate normally

function* mockGeneratorWithOnlyError() {
yield {
type: 'system',
subtype: 'init',
model: 'claude-opus-4-5-20251101',
tools: [],
mcp_servers: [],
};

// No success result, just an error
throw new Error('Actual SDK error');
}

mockQuery.mockReturnValue(mockGeneratorWithOnlyError());

await expect(
runAgent(
defaultAgentConfig,
'test prompt',
defaultOptions,
mockSpinner as unknown as ReturnType<typeof clack.spinner>,
{
successMessage: 'Test success',
errorMessage: 'Test error',
},
),
).rejects.toThrow('Actual SDK error');

expect(mockSpinner.stop).toHaveBeenCalledWith('Test error');
});

it('should not treat error results as success', async () => {
// A result with is_error: true should not count as success
// Even if subtype is 'success', the is_error flag takes precedence

function* mockGeneratorWithErrorResult() {
yield {
type: 'system',
subtype: 'init',
model: 'claude-opus-4-5-20251101',
tools: [],
mcp_servers: [],
};

yield {
type: 'result',
subtype: 'success', // subtype can be success but is_error true
is_error: true,
result: 'API Error: 500 Internal Server Error',
};

throw new Error('Process exited with code 1');
}

mockQuery.mockReturnValue(mockGeneratorWithErrorResult());

const result = await runAgent(
defaultAgentConfig,
'test prompt',
defaultOptions,
mockSpinner as unknown as ReturnType<typeof clack.spinner>,
{
successMessage: 'Test success',
errorMessage: 'Test error',
},
);

// Should return API error, not success
expect(result.error).toBe('WIZARD_API_ERROR');
expect(result.message).toContain('API Error');
});
});
});
59 changes: 48 additions & 11 deletions src/lib/agent-interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,8 @@ export function initializeAgent(
const gatewayUrl = getLlmGatewayUrlFromHost(config.posthogApiHost);
process.env.ANTHROPIC_BASE_URL = gatewayUrl;
process.env.ANTHROPIC_AUTH_TOKEN = config.posthogApiKey;
// Use CLAUDE_CODE_OAUTH_TOKEN to override any stored /login credentials
process.env.CLAUDE_CODE_OAUTH_TOKEN = config.posthogApiKey;
// Disable experimental betas (like input_examples) that the LLM gateway doesn't support
process.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS = 'true';

Expand Down Expand Up @@ -377,6 +379,8 @@ export async function runAgent(

const startTime = Date.now();
const collectedText: string[] = [];
// Track if we received a successful result (before any cleanup errors)
let receivedSuccessResult = false;

// Workaround for SDK bug: stdin closes before canUseTool responses can be sent.
// The fix is to use an async generator for the prompt that stays open until
Expand All @@ -398,6 +402,31 @@ export async function runAgent(
await resultReceived;
};

// Helper to handle successful completion (used in normal path and race condition recovery)
const completeWithSuccess = (
suppressedError?: Error,
): { error?: AgentErrorType; message?: string } => {
const durationMs = Date.now() - startTime;
const durationSeconds = Math.round(durationMs / 1000);

if (suppressedError) {
logToFile(
`Ignoring post-completion error, agent completed successfully in ${durationSeconds}s`,
);
logToFile('Suppressed error:', suppressedError.message);
} else {
logToFile(`Agent run completed in ${durationSeconds}s`);
}

analytics.capture(WIZARD_INTERACTION_EVENT_NAME, {
action: 'agent integration completed',
duration_ms: durationMs,
duration_seconds: durationSeconds,
});
spinner.stop(successMessage);
return {};
};

try {
// Tools needed for the wizard:
// - File operations: Read, Write, Edit
Expand Down Expand Up @@ -428,7 +457,11 @@ export async function runAgent(
settingSources: ['project'],
// Explicitly enable required tools including Skill
allowedTools,
env: { ...process.env },
env: {
...process.env,
// Prevent user's Anthropic API key from overriding the wizard's OAuth token
ANTHROPIC_API_KEY: undefined,
},
canUseTool: (toolName: string, input: unknown) => {
logToFile('canUseTool called:', { toolName, input });
const result = wizardCanUseTool(
Expand All @@ -454,11 +487,15 @@ export async function runAgent(
handleSDKMessage(message, options, spinner, collectedText);
// Signal completion when result received
if (message.type === 'result') {
// Track successful results before any potential cleanup errors
// The SDK may emit a second error result during cleanup due to a race condition
if (message.subtype === 'success' && !message.is_error) {
receivedSuccessResult = true;
}
signalDone!();
}
}

const durationMs = Date.now() - startTime;
const outputText = collectedText.join('\n');

// Check for error markers in the agent's output
Expand Down Expand Up @@ -487,19 +524,19 @@ export async function runAgent(
return { error: AgentErrorType.API_ERROR, message: outputText };
}

logToFile(`Agent run completed in ${Math.round(durationMs / 1000)}s`);
analytics.capture(WIZARD_INTERACTION_EVENT_NAME, {
action: 'agent integration completed',
duration_ms: durationMs,
duration_seconds: Math.round(durationMs / 1000),
});

spinner.stop(successMessage);
return {};
return completeWithSuccess();
} catch (error) {
// Signal done to unblock the async generator
signalDone!();

// If we already received a successful result, the error is from SDK cleanup
// This happens due to a race condition: the SDK tries to send a cleanup command
// after the prompt stream closes, but streaming mode is still active.
// See: https://github.com/anthropics/claude-agent-sdk-typescript/issues/41
if (receivedSuccessResult) {
return completeWithSuccess(error as Error);
}

// Check if we collected an API error before the exception was thrown
const outputText = collectedText.join('\n');

Expand Down
Loading