From 3ac0a6eb1e0713976f5000a698badb58d0c0581b Mon Sep 17 00:00:00 2001
From: Leonid Bugaev <leonsbox@gmail.com>
Date: Wed, 4 Mar 2026 14:19:27 +0000
Subject: [PATCH 1/3] fix: completionPrompt now continues existing session
 instead of recursive answer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, completionPrompt spawned a recursive this.answer() call which created
a fresh TaskManager and iteration counter, losing all context from the main turn.
This caused structured outputs (pr_urls, files_changed) to be overwritten with
empty results from the follow-up turn that had no memory of Phase 1's work.

Now completionPrompt appends a user message to the existing conversation and runs
one more streamText pass with the same tools, TaskManager, and history — just one
more message in the same session rather than a new agent execution.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 npm/src/agent/ProbeAgent.js | 97 ++++++++++++++++++++++++++++---------
 1 file changed, 73 insertions(+), 24 deletions(-)

diff --git a/npm/src/agent/ProbeAgent.js b/npm/src/agent/ProbeAgent.js
index f9e0659b..e71c6a71 100644
--- a/npm/src/agent/ProbeAgent.js
+++ b/npm/src/agent/ProbeAgent.js
@@ -3582,23 +3582,24 @@ Follow these instructions carefully:
         // Continue even if storage fails
       }
 
-      // Completion prompt handling - run a follow-up prompt after attempt_completion for validation/review
-      // This runs BEFORE mermaid validation and JSON schema validation
-      // Skip if we're already in a completion prompt follow-up call or if no completion prompt is configured
+      // Completion prompt handling - inject one more user message into the existing conversation
+      // This continues the SAME agentic session (same tools, same TaskManager, same history)
+      // rather than spawning a recursive this.answer() call which would reset state
       if (completionAttempted && this.completionPrompt && !options._completionPromptProcessed) {
         if (this.debug) {
-          console.log('[DEBUG] Running completion prompt for post-completion validation/review...');
+          console.log('[DEBUG] Running completion prompt as continuation of current session...');
         }
 
         try {
-          // Record completion prompt start in telemetry
+          const originalResult = finalResult;
+
           if (this.tracer) {
             this.tracer.recordEvent('completion_prompt.started', {
               'completion_prompt.original_result_length': finalResult?.length || 0
             });
           }
 
-          // Create the completion prompt with the current result as context
+          // Append completion prompt as a user message to the existing conversation
           const completionPromptMessage = `${this.completionPrompt}
 
 Here is the result to review:
@@ -3608,32 +3609,80 @@ ${finalResult}
 
 After reviewing, provide your final answer using attempt_completion.`;
 
-          // Make a follow-up call with the completion prompt
-          // Pass _completionPromptProcessed to prevent infinite loops
-          // Save output buffers — the recursive answer() must not destroy DSL output() content
-          const savedOutputItems = this._outputBuffer ? [...this._outputBuffer.items] : [];
-          const savedExtractedBlocks = this._extractedRawBlocks ? [...this._extractedRawBlocks] : [];
-          const completionResult = await this.answer(completionPromptMessage, [], {
-            ...options,
-            _completionPromptProcessed: true
-          });
-          // Restore output buffers so the parent call can append them to the final result
-          if (this._outputBuffer) {
-            this._outputBuffer.items = savedOutputItems;
+          currentMessages.push({ role: 'user', content: completionPromptMessage });
+
+          // Reset completion tracking for the follow-up turn
+          completionResult = null;
+          completionAttempted = false;
+
+          // Run one more streamText pass with the same tools and conversation context
+          // Give a small number of extra iterations for the follow-up
+          const completionMaxIterations = 5;
+          const completionStreamOptions = {
+            model: this.provider ? this.provider(this.model) : this.model,
+            messages: this.prepareMessagesWithImages(currentMessages),
+            tools,
+            stopWhen: stepCountIs(completionMaxIterations),
+            maxTokens: maxResponseTokens,
+            temperature: 0.3,
+            onStepFinish: ({ toolResults, text, finishReason, usage }) => {
+              if (usage) {
+                this.tokenCounter.recordUsage(usage);
+              }
+              if (options.onStream && text) {
+                options.onStream(text);
+              }
+              if (this.debug) {
+                console.log(`[DEBUG] Completion prompt step finished (reason: ${finishReason}, tools: ${toolResults?.length || 0})`);
+              }
+            }
+          };
+
+          const providerOpts = this._buildThinkingProviderOptions(maxResponseTokens);
+          if (providerOpts) {
+            completionStreamOptions.providerOptions = providerOpts;
+          }
+
+          const cpResult = await this.streamTextWithRetryAndFallback(completionStreamOptions);
+          const cpFinalText = await cpResult.text;
+          const cpUsage = await cpResult.usage;
+          if (cpUsage) {
+            this.tokenCounter.recordUsage(cpUsage, cpResult.experimental_providerMetadata);
+          }
+
+          // Append follow-up messages to conversation history
+          const cpMessages = await cpResult.response?.messages;
+          if (cpMessages) {
+            for (const msg of cpMessages) {
+              currentMessages.push(msg);
+            }
           }
-          this._extractedRawBlocks = savedExtractedBlocks;
 
-          // Update finalResult with the result from the completion prompt
-          finalResult = completionResult;
+          // Use new completion result if the agent called attempt_completion again,
+          // otherwise keep the original result (the follow-up may have just done side-effects)
+          if (completionResult) {
+            finalResult = completionResult;
+            completionAttempted = true;
+          } else if (cpFinalText && cpFinalText.trim().length > 0) {
+            finalResult = cpFinalText;
+            completionAttempted = true;
+          } else {
+            // Follow-up produced nothing useful — keep the original
+            finalResult = originalResult;
+            completionAttempted = true;
+            if (this.debug) {
+              console.log('[DEBUG] Completion prompt returned empty result, keeping original.');
+            }
+          }
 
           if (this.debug) {
-            console.log(`[DEBUG] Completion prompt finished. New result length: ${finalResult?.length || 0}`);
+            console.log(`[DEBUG] Completion prompt finished. Final result length: ${finalResult?.length || 0}`);
           }
 
-          // Record completion prompt completion in telemetry
           if (this.tracer) {
             this.tracer.recordEvent('completion_prompt.completed', {
-              'completion_prompt.final_result_length': finalResult?.length || 0
+              'completion_prompt.final_result_length': finalResult?.length || 0,
+              'completion_prompt.used_original': finalResult === originalResult
             });
           }
         } catch (error) {

From 4a4aa49d6be85257e8dc6d8fda8f96f2c0a31a99 Mon Sep 17 00:00:00 2001
From: Leonid Bugaev <leonsbox@gmail.com>
Date: Wed, 4 Mar 2026 14:21:10 +0000
Subject: [PATCH 2/3] fix: completion prompt footer instructs AI to preserve
 original if valid

Tell the AI to respond with its previous answer as-is if everything
checks out, and only modify + re-respond with the full answer if
something actually needs fixing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 npm/src/agent/ProbeAgent.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/npm/src/agent/ProbeAgent.js b/npm/src/agent/ProbeAgent.js
index e71c6a71..98163f64 100644
--- a/npm/src/agent/ProbeAgent.js
+++ b/npm/src/agent/ProbeAgent.js
@@ -3607,7 +3607,7 @@ Here is the result to review:
 ${finalResult}
 </result>
 
-After reviewing, provide your final answer using attempt_completion.`;
+Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`;
 
           currentMessages.push({ role: 'user', content: completionPromptMessage });
 

From 0a50d65e76756f9bf196e6849856495482de655e Mon Sep 17 00:00:00 2001
From: Leonid Bugaev <leonsbox@gmail.com>
Date: Wed, 4 Mar 2026 14:27:57 +0000
Subject: [PATCH 3/3] test: add behavioral tests for completion prompt session
 continuity

Tests verify:
- streamText called twice (not recursive answer()) when completionPrompt is set
- Original result preserved when completion prompt returns empty
- Completion prompt skipped when _completionPromptProcessed flag is set
- Original result preserved when completion prompt throws
- Updated result used when completion prompt calls attempt_completion again
- No completion prompt when none is configured
- Updated message format test for new footer text

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 npm/tests/unit/completion-prompt.test.js | 248 ++++++++++++++++++++++-
 1 file changed, 247 insertions(+), 1 deletion(-)

diff --git a/npm/tests/unit/completion-prompt.test.js b/npm/tests/unit/completion-prompt.test.js
index e24ae04d..a1128343 100644
--- a/npm/tests/unit/completion-prompt.test.js
+++ b/npm/tests/unit/completion-prompt.test.js
@@ -154,13 +154,15 @@ Here is the result to review:
 ${finalResult}
 </result>
 
-After reviewing, provide your final answer using attempt_completion.`;
+Double-check your response based on the criteria above. If everything looks good, respond with your previous answer exactly as-is using attempt_completion. If something needs to be fixed or is missing, do it now, then respond with the COMPLETE updated answer (everything you did in total, not just the fix) using attempt_completion.`;
 
     expect(formattedMessage).toContain(completionPrompt);
     expect(formattedMessage).toContain(finalResult);
     expect(formattedMessage).toContain('<result>');
     expect(formattedMessage).toContain('</result>');
     expect(formattedMessage).toContain('attempt_completion');
+    expect(formattedMessage).toContain('Double-check your response');
+    expect(formattedMessage).toContain('respond with your previous answer exactly as-is');
   });
 });
 
@@ -377,3 +379,247 @@ describe('completionPrompt isolation', () => {
     expect(baseAgent.completionPrompt).toBe('Original prompt');
   });
 });
+
+describe('completionPrompt session continuity behavior', () => {
+  // Helper to create a mock streamText result
+  function createMockStreamResult(text, messages = []) {
+    return {
+      text: Promise.resolve(text),
+      usage: Promise.resolve({ promptTokens: 10, completionTokens: 5 }),
+      response: { messages: Promise.resolve(messages) },
+      experimental_providerMetadata: undefined,
+      steps: Promise.resolve([]),
+    };
+  }
+
+  // Helper to set up agent with mocked internals so answer() reaches streamText
+  function createMockedAgent(options = {}) {
+    const agent = new ProbeAgent({
+      completionPrompt: options.completionPrompt || 'Check your work',
+      path: process.cwd(),
+      model: 'test-model',
+      ...options,
+    });
+
+    // Mock getSystemMessage to avoid filesystem access
+    jest.spyOn(agent, 'getSystemMessage').mockResolvedValue('You are a test agent.');
+
+    // Mock prepareMessagesWithImages to pass through
+    jest.spyOn(agent, 'prepareMessagesWithImages').mockImplementation(msgs => msgs);
+
+    // Mock _buildThinkingProviderOptions
+    jest.spyOn(agent, '_buildThinkingProviderOptions').mockReturnValue(null);
+
+    // Ensure provider is null so model string is used directly
+    agent.provider = null;
+
+    // Mock hooks
+    agent.hooks = { emit: jest.fn().mockResolvedValue(undefined) };
+
+    // Mock storage adapter
+    agent.storageAdapter = { saveMessage: jest.fn().mockResolvedValue(undefined) };
+
+    return agent;
+  }
+
+  test('should call streamText twice (not recursive answer) when completionPrompt is set', async () => {
+    const agent = createMockedAgent();
+
+    const streamCalls = [];
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    // Capture the onComplete callback from _buildNativeTools
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async (opts) => {
+      streamCallCount++;
+      streamCalls.push({
+        callNumber: streamCallCount,
+        messages: [...(opts.messages || [])],
+      });
+
+      if (streamCallCount === 1) {
+        // Simulate attempt_completion being called during main turn
+        if (onCompleteFn) onCompleteFn('{"summary":"Done","pr_urls":["https://github.com/test/1"]}');
+        return createMockStreamResult('', [{ role: 'assistant', content: 'done' }]);
+      }
+      // Completion prompt follow-up
+      return createMockStreamResult('Looks good', [{ role: 'assistant', content: 'verified' }]);
+    });
+
+    const answerSpy = jest.spyOn(agent, 'answer');
+    const result = await agent.answer('Implement feature');
+
+    // answer() called exactly once (no recursive call)
+    expect(answerSpy).toHaveBeenCalledTimes(1);
+
+    // streamText called twice: main loop + completion prompt follow-up
+    expect(streamCallCount).toBe(2);
+
+    // Second call should have more messages (completion prompt user message appended)
+    expect(streamCalls[1].messages.length).toBeGreaterThan(streamCalls[0].messages.length);
+
+    // Verify the appended user message contains the completion prompt and result
+    const lastMsg = streamCalls[1].messages[streamCalls[1].messages.length - 1];
+    expect(lastMsg.role).toBe('user');
+    expect(lastMsg.content).toContain('Check your work');
+    expect(lastMsg.content).toContain('<result>');
+    expect(lastMsg.content).toContain('pr_urls');
+    expect(lastMsg.content).toContain('Double-check your response');
+
+    jest.restoreAllMocks();
+  });
+
+  test('should preserve original result when completion prompt returns empty', async () => {
+    const agent = createMockedAgent();
+
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => {
+      streamCallCount++;
+      if (streamCallCount === 1) {
+        if (onCompleteFn) onCompleteFn('Original result with PR URLs');
+        return createMockStreamResult('', []);
+      }
+      // Completion prompt returns empty text, no attempt_completion called
+      return createMockStreamResult('', []);
+    });
+
+    const result = await agent.answer('Do the task');
+
+    // Original result should be preserved
+    expect(result).toBe('Original result with PR URLs');
+    expect(streamCallCount).toBe(2);
+
+    jest.restoreAllMocks();
+  });
+
+  test('should not run completion prompt when _completionPromptProcessed is set', async () => {
+    const agent = createMockedAgent();
+
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => {
+      streamCallCount++;
+      if (onCompleteFn) onCompleteFn('Result');
+      return createMockStreamResult('', []);
+    });
+
+    await agent.answer('Do the task', [], { _completionPromptProcessed: true });
+
+    // Only 1 streamText call — completion prompt should be skipped
+    expect(streamCallCount).toBe(1);
+
+    jest.restoreAllMocks();
+  });
+
+  test('should keep original result when completion prompt throws', async () => {
+    const agent = createMockedAgent();
+
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => {
+      streamCallCount++;
+      if (streamCallCount === 1) {
+        if (onCompleteFn) onCompleteFn('Original good result');
+        return createMockStreamResult('', []);
+      }
+      throw new Error('API error during completion prompt');
+    });
+
+    const consoleSpy = jest.spyOn(console, 'error').mockImplementation(() => {});
+
+    const result = await agent.answer('Do the task');
+
+    // Original result preserved despite completion prompt error
+    expect(result).toBe('Original good result');
+    expect(streamCallCount).toBe(2);
+
+    consoleSpy.mockRestore();
+    jest.restoreAllMocks();
+  });
+
+  test('should use updated result when completion prompt calls attempt_completion', async () => {
+    const agent = createMockedAgent();
+
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => {
+      streamCallCount++;
+      if (streamCallCount === 1) {
+        // Main turn: incomplete result
+        if (onCompleteFn) onCompleteFn('Incomplete - no PR yet');
+        return createMockStreamResult('', []);
+      }
+      // Completion prompt follow-up: agent creates the PR and calls attempt_completion again
+      if (onCompleteFn) onCompleteFn('Complete - PR created at https://github.com/test/pr/1');
+      return createMockStreamResult('', []);
+    });
+
+    const result = await agent.answer('Do the task');
+
+    // Updated result from completion prompt should be used
+    expect(result).toBe('Complete - PR created at https://github.com/test/pr/1');
+
+    jest.restoreAllMocks();
+  });
+
+  test('should not run completion prompt when no completionPrompt is configured', async () => {
+    const agent = createMockedAgent({ completionPrompt: '' }); // Empty = null
+
+    let streamCallCount = 0;
+    let onCompleteFn = null;
+
+    const origBuild = agent._buildNativeTools.bind(agent);
+    jest.spyOn(agent, '_buildNativeTools').mockImplementation((opts, onComplete, ctx) => {
+      onCompleteFn = onComplete;
+      return origBuild(opts, onComplete, ctx);
+    });
+
+    jest.spyOn(agent, 'streamTextWithRetryAndFallback').mockImplementation(async () => {
+      streamCallCount++;
+      if (onCompleteFn) onCompleteFn('Done');
+      return createMockStreamResult('', []);
+    });
+
+    await agent.answer('Do the task');
+
+    // Only 1 streamText call — no completion prompt
+    expect(streamCallCount).toBe(1);
+
+    jest.restoreAllMocks();
+  });
+});