From 3ac0a6eb1e0713976f5000a698badb58d0c0581b Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Wed, 4 Mar 2026 14:19:27 +0000 Subject: [PATCH 1/3] fix: completionPrompt now continues existing session instead of recursive answer() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, completionPrompt spawned a recursive this.answer() call which created a fresh TaskManager and iteration counter, losing all context from the main turn. This caused structured outputs (pr_urls, files_changed) to be overwritten with empty results from the follow-up turn that had no memory of Phase 1's work. Now completionPrompt appends a user message to the existing conversation and runs one more streamText pass with the same tools, TaskManager, and history — just one more message in the same session rather than a new agent execution. Co-Authored-By: Claude Opus 4.6 --- npm/src/agent/ProbeAgent.js | 97 ++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 24 deletions(-) diff --git a/npm/src/agent/ProbeAgent.js b/npm/src/agent/ProbeAgent.js index f9e0659b..e71c6a71 100644 --- a/npm/src/agent/ProbeAgent.js +++ b/npm/src/agent/ProbeAgent.js @@ -3582,23 +3582,24 @@ Follow these instructions carefully: // Continue even if storage fails } - // Completion prompt handling - run a follow-up prompt after attempt_completion for validation/review - // This runs BEFORE mermaid validation and JSON schema validation - // Skip if we're already in a completion prompt follow-up call or if no completion prompt is configured + // Completion prompt handling - inject one more user message into the existing conversation + // This continues the SAME agentic session (same tools, same TaskManager, same history) + // rather than spawning a recursive this.answer() call which would reset state if (completionAttempted && this.completionPrompt && !options._completionPromptProcessed) { if (this.debug) { - console.log('[DEBUG] Running completion prompt for post-completion validation/review...'); + console.log('[DEBUG] Running completion prompt as continuation of current session...'); } try { - // Record completion prompt start in telemetry + const originalResult = finalResult; + if (this.tracer) { this.tracer.recordEvent('completion_prompt.started', { 'completion_prompt.original_result_length': finalResult?.length || 0 }); } - // Create the completion prompt with the current result as context + // Append completion prompt as a user message to the existing conversation const completionPromptMessage = `${this.completionPrompt} Here is the result to review: @@ -3608,32 +3609,80 @@ ${finalResult} After reviewing, provide your final answer using attempt_completion.`; - // Make a follow-up call with the completion prompt - // Pass _completionPromptProcessed to prevent infinite loops - // Save output buffers — the recursive answer() must not destroy DSL output() content - const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : []; - const savedExtractedBlocks = this._extractedRawBlocks ? [...this._extractedRawBlocks] : []; - const completionResult = await this.answer(completionPromptMessage, [], { - ...options, - _completionPromptProcessed: true - }); - // Restore output buffers so the parent call can append them to the final result - if (this._outputBuffer) { - this._outputBuffer.items = savedOutputItems; + currentMessages.push({ role: 'user', content: completionPromptMessage }); + + // Reset completion tracking for the follow-up turn + completionResult = null; + completionAttempted = false; + + // Run one more streamText pass with the same tools and conversation context + // Give a small number of extra iterations for the follow-up + const completionMaxIterations = 5; + const completionStreamOptions = { + model: this.provider ? this.provider(this.model) : this.model, + messages: this.prepareMessagesWithImages(currentMessages), + tools, + stopWhen: stepCountIs(completionMaxIterations), + maxTokens: maxResponseTokens, + temperature: 0.3, + onStepFinish: ({ toolResults, text, finishReason, usage }) => { + if (usage) { + this.tokenCounter.recordUsage(usage); + } + if (options.onStream && text) { + options.onStream(text); + } + if (this.debug) { + console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`); + } + } + }; + + const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens); + if (providerOpts) { + completionStreamOptions.providerOptions = providerOpts; + } + + const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions); + const cpFinalText = await cpResult.text; + const cpUsage = await cpResult.usage; + if (cpUsage) { + this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata); + } + + // Append follow-up messages to conversation history + const cpMessages = await cpResult.response?.messages; + if (cpMessages) { + for (const msg of cpMessages) { + currentMessages.push(msg); + } } - this._extractedRawBlocks = savedExtractedBlocks; - // Update finalResult with the result from the completion prompt - finalResult = completionResult; + // Use new completion result if the agent called attempt_completion again, + // otherwise keep the original result (the follow-up may have just done side-effects) + if (completionResult) { + finalResult = completionResult; + completionAttempted = true; + } else if (cpFinalText && cpFinalText.trim().length > 0) { + finalResult = cpFinalText; + completionAttempted = true; + } else { + // Follow-up produced nothing useful — keep the original + finalResult = originalResult; + completionAttempted = true; + if (this.debug) { + console.log('[DEBUG] Completion prompt returned empty result, keeping original.'); + } + } if (this.debug) { - console.log(`[DEBUG] Completion prompt finished. New result length: ${finalResult?.length || 0}`); + console.log(`[DEBUG] Completion prompt finished. Final result length: ${finalResult?.length || 0}`); } - // Record completion prompt completion in telemetry if (this.tracer) { this.tracer.recordEvent('completion_prompt.completed', { - 'completion_prompt.final_result_length': finalResult?.length || 0 + 'completion_prompt.final_result_length': finalResult?.length || 0, + 'completion_prompt.used_original': finalResult === originalResult }); } } catch (error) { From 4a4aa49d6be85257e8dc6d8fda8f96f2c0a31a99 Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Wed, 4 Mar 2026 14:21:10 +0000 Subject: [PATCH 2/3] fix: completion prompt footer instructs AI to preserve original if valid Tell the AI to respond with its previous answer as-is if everything checks out, and only modify + re-respond with the full answer if something actually needs fixing. Co-Authored-By: Claude Opus 4.6 --- npm/src/agent/ProbeAgent.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/npm/src/agent/ProbeAgent.js b/npm/src/agent/ProbeAgent.js index e71c6a71..98163f64 100644 --- a/npm/src/agent/ProbeAgent.js +++ b/npm/src/agent/ProbeAgent.js @@ -3607,7 +3607,7 @@ Here is the result to review: ${finalResult} -After reviewing, provide your final answer using attempt_completion.`; +Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`; currentMessages.push({ role: 'user', content: completionPromptMessage }); From 0a50d65e76756f9bf196e6849856495482de655e Mon Sep 17 00:00:00 2001 From: Leonid Bugaev Date: Wed, 4 Mar 2026 14:27:57 +0000 Subject: [PATCH 3/3] test: add behavioral tests for completion prompt session continuity Tests verify: - streamText called twice (not recursive answer()) when completionPrompt is set - Original result preserved when completion prompt returns empty - Completion prompt skipped when _completionPromptProcessed flag is set - Original result preserved when completion prompt throws - Updated result used when completion prompt calls attempt_completion again - No completion prompt when none is configured - Updated message format test for new footer text Co-Authored-By: Claude Opus 4.6 --- npm/tests/unit/completion-prompt.test.js | 248 ++++++++++++++++++++++- 1 file changed, 247 insertions(+), 1 deletion(-) diff --git a/npm/tests/unit/completion-prompt.test.js b/npm/tests/unit/completion-prompt.test.js index e24ae04d..a1128343 100644 --- a/npm/tests/unit/completion-prompt.test.js +++ b/npm/tests/unit/completion-prompt.test.js @@ -154,13 +154,15 @@ Here is the result to review: ${finalResult} -After reviewing, provide your final answer using attempt_completion.`; +Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`; expect(formattedMessage).toContain(completionPrompt); expect(formattedMessage).toContain(finalResult); expect(formattedMessage).toContain(''); expect(formattedMessage).toContain(''); expect(formattedMessage).toContain('attempt_completion'); + expect(formattedMessage).toContain('Double-check your response'); + expect(formattedMessage).toContain('respond with your previous answer exactly as-is'); }); }); @@ -377,3 +379,247 @@ describe('completionPrompt isolation', () => { expect(baseAgent.completionPrompt).toBe('Original prompt'); }); }); + +describe('completionPrompt session continuity behavior', () => { + // Helper to create a mock streamText result + function createMockStreamResult(text, messages = []) { + return { + text: Promise.resolve(text), + usage: Promise.resolve({ promptTokens: 10, completionTokens: 5 }), + response: { messages: Promise.resolve(messages) }, + experimental_providerMetadata: undefined, + steps: Promise.resolve([]), + }; + } + + // Helper to set up agent with mocked internals so answer() reaches streamText + function createMockedAgent(options = {}) { + const agent = new ProbeAgent({ + completionPrompt: options.completionPrompt || 'Check your work', + path: process.cwd(), + model: 'test-model', + ...options, + }); + + // Mock getSystemMessage to avoid filesystem access + jest.spyOn(agent, 'getSystemMessage').mockResolvedValue('You are a test agent.'); + + // Mock prepareMessagesWithImages to pass through + jest.spyOn(agent, 'prepareMessagesWithImages').mockImplementation(msgs => msgs); + + // Mock _buildThinkingProviderOptions + jest.spyOn(agent, '_buildThinkingProviderOptions').mockReturnValue(null); + + // Ensure provider is null so model string is used directly + agent.provider = null; + + // Mock hooks + agent.hooks = { emit: jest.fn().mockResolvedValue(undefined) }; + + // Mock storage adapter + agent.storageAdapter = { saveMessage: jest.fn().mockResolvedValue(undefined) }; + + return agent; + } + + test('should call streamText twice (not recursive answer) when completionPrompt is set', async () => { + const agent = createMockedAgent(); + + const streamCalls = []; + let streamCallCount = 0; + let onCompleteFn = null; + + // Capture the onComplete callback from _buildNativeTools + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async (opts) => { + streamCallCount++; + streamCalls.push({ + callNumber: streamCallCount, + messages: [...(opts.messages || [])], + }); + + if (streamCallCount === 1) { + // Simulate attempt_completion being called during main turn + if (onCompleteFn) onCompleteFn('{"summary":"Done","pr_urls":["https://github.com/test/1"]}'); + return createMockStreamResult('', [{ role: 'assistant', content: 'done' }]); + } + // Completion prompt follow-up + return createMockStreamResult('Looks good', [{ role: 'assistant', content: 'verified' }]); + }); + + const answerSpy = jest.spyOn(agent, 'answer'); + const result = await agent.answer('Implement feature'); + + // answer() called exactly once (no recursive call) + expect(answerSpy).toHaveBeenCalledTimes(1); + + // streamText called twice: main loop + completion prompt follow-up + expect(streamCallCount).toBe(2); + + // Second call should have more messages (completion prompt user message appended) + expect(streamCalls[1].messages.length).toBeGreaterThan(streamCalls[0].messages.length); + + // Verify the appended user message contains the completion prompt and result + const lastMsg = streamCalls[1].messages[streamCalls[1].messages.length - 1]; + expect(lastMsg.role).toBe('user'); + expect(lastMsg.content).toContain('Check your work'); + expect(lastMsg.content).toContain(''); + expect(lastMsg.content).toContain('pr_urls'); + expect(lastMsg.content).toContain('Double-check your response'); + + jest.restoreAllMocks(); + }); + + test('should preserve original result when completion prompt returns empty', async () => { + const agent = createMockedAgent(); + + let streamCallCount = 0; + let onCompleteFn = null; + + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => { + streamCallCount++; + if (streamCallCount === 1) { + if (onCompleteFn) onCompleteFn('Original result with PR URLs'); + return createMockStreamResult('', []); + } + // Completion prompt returns empty text, no attempt_completion called + return createMockStreamResult('', []); + }); + + const result = await agent.answer('Do the task'); + + // Original result should be preserved + expect(result).toBe('Original result with PR URLs'); + expect(streamCallCount).toBe(2); + + jest.restoreAllMocks(); + }); + + test('should not run completion prompt when _completionPromptProcessed is set', async () => { + const agent = createMockedAgent(); + + let streamCallCount = 0; + let onCompleteFn = null; + + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => { + streamCallCount++; + if (onCompleteFn) onCompleteFn('Result'); + return createMockStreamResult('', []); + }); + + await agent.answer('Do the task', [], { _completionPromptProcessed: true }); + + // Only 1 streamText call — completion prompt should be skipped + expect(streamCallCount).toBe(1); + + jest.restoreAllMocks(); + }); + + test('should keep original result when completion prompt throws', async () => { + const agent = createMockedAgent(); + + let streamCallCount = 0; + let onCompleteFn = null; + + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => { + streamCallCount++; + if (streamCallCount === 1) { + if (onCompleteFn) onCompleteFn('Original good result'); + return createMockStreamResult('', []); + } + throw new Error('API error during completion prompt'); + }); + + const consoleSpy = jest.spyOn(console, 'error').mockImplementation(() => {}); + + const result = await agent.answer('Do the task'); + + // Original result preserved despite completion prompt error + expect(result).toBe('Original good result'); + expect(streamCallCount).toBe(2); + + consoleSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + test('should use updated result when completion prompt calls attempt_completion', async () => { + const agent = createMockedAgent(); + + let streamCallCount = 0; + let onCompleteFn = null; + + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => { + streamCallCount++; + if (streamCallCount === 1) { + // Main turn: incomplete result + if (onCompleteFn) onCompleteFn('Incomplete - no PR yet'); + return createMockStreamResult('', []); + } + // Completion prompt follow-up: agent creates the PR and calls attempt_completion again + if (onCompleteFn) onCompleteFn('Complete - PR created at https://github.com/test/pr/1'); + return createMockStreamResult('', []); + }); + + const result = await agent.answer('Do the task'); + + // Updated result from completion prompt should be used + expect(result).toBe('Complete - PR created at https://github.com/test/pr/1'); + + jest.restoreAllMocks(); + }); + + test('should not run completion prompt when no completionPrompt is configured', async () => { + const agent = createMockedAgent({ completionPrompt: '' }); // Empty = null + + let streamCallCount = 0; + let onCompleteFn = null; + + const origBuild = agent._buildNativeTools.bind(agent); + jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => { + onCompleteFn = onComplete; + return origBuild(opts, onComplete, ctx); + }); + + jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => { + streamCallCount++; + if (onCompleteFn) onCompleteFn('Done'); + return createMockStreamResult('', []); + }); + + await agent.answer('Do the task'); + + // Only 1 streamText call — no completion prompt + expect(streamCallCount).toBe(1); + + jest.restoreAllMocks(); + }); +});