From 41aee7d1e692fdb5b058e242662ffe60f1ca3a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Kie=C3=9Fling?= Date: Sat, 7 Feb 2026 11:47:39 +0100 Subject: [PATCH] Restore tool-attempt tracking in executeSingleTool() The overridden executeSingleTool() was not calling parent, which bypassed the toolAttempts counter and ToolMaxTriesException safety net. This allowed an agent to call the same tool infinitely when the context window overflowed and the conversation history was lost. Re-add attempt tracking and max-tries enforcement before tool execution. ToolMaxTriesException is thrown (not caught) so the agent loop terminates. Ref: https://github.com/dx-tooling/sitebuilder-webapp/issues/75 Co-authored-by: Cursor --- src/Agent/BaseCodingAgent.php | 22 ++++++++ tests/Unit/Agent/BaseCodingAgentTest.php | 65 ++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/src/Agent/BaseCodingAgent.php b/src/Agent/BaseCodingAgent.php index bcb9aca..d2bac2e 100644 --- a/src/Agent/BaseCodingAgent.php +++ b/src/Agent/BaseCodingAgent.php @@ -6,6 +6,7 @@ use EtfsCodingAgent\Service\WorkspaceToolingServiceInterface; use NeuronAI\Agent; +use NeuronAI\Exceptions\ToolMaxTriesException; use NeuronAI\Observability\Events\AgentError; use NeuronAI\Observability\Events\ToolCalled; use NeuronAI\Observability\Events\ToolCalling; @@ -282,9 +283,30 @@ protected function tools(): array /** * Override to catch tool execution errors and return them as results instead of crashing. * This allows the agent to learn from its mistakes and retry with correct parameters. + * + * Also tracks tool invocation attempts and enforces a maximum retry limit + * (via ToolMaxTriesException) to prevent infinite tool-call loops — e.g. when + * context-window trimming causes the agent to lose its conversation state. + * + * @see https://github.com/dx-tooling/sitebuilder-webapp/issues/75 + * + * @throws ToolMaxTriesException */ protected function executeSingleTool(ToolInterface $tool): void { + $this->toolAttempts[$tool->getName()] = ($this->toolAttempts[$tool->getName()] ?? 0) + 1; + + $maxTries = $tool->getMaxTries() ?? $this->toolMaxTries; + + if ($this->toolAttempts[$tool->getName()] > $maxTries) { + $exception = new ToolMaxTriesException( + "Tool {$tool->getName()} has been attempted too many times: {$maxTries} attempts." + ); + $this->notify('error', new AgentError($exception)); + + throw $exception; + } + $this->notify('tool-calling', new ToolCalling($tool)); try { diff --git a/tests/Unit/Agent/BaseCodingAgentTest.php b/tests/Unit/Agent/BaseCodingAgentTest.php index 999f042..4199eb2 100644 --- a/tests/Unit/Agent/BaseCodingAgentTest.php +++ b/tests/Unit/Agent/BaseCodingAgentTest.php @@ -4,6 +4,7 @@ use EtfsCodingAgent\Agent\BaseCodingAgent; use EtfsCodingAgent\Service\WorkspaceToolingServiceInterface; +use NeuronAI\Exceptions\ToolMaxTriesException; use NeuronAI\Tools\PropertyType; use NeuronAI\Tools\Tool; use NeuronAI\Tools\ToolProperty; @@ -127,3 +128,67 @@ public function runShellCommand(string $workingDirectory, string $command): stri expect($result)->toContain('required'); expect($result)->toContain('optional'); }); + +it('throws ToolMaxTriesException when a tool exceeds max attempts', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + // Default toolMaxTries is 5 + $tool = Tool::make('test_tool', 'A test tool') + ->setCallable(fn (): string => 'ok'); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute 5 times — should succeed + for ($i = 0; $i < 5; $i++) { + $reflection->invoke($agent, $tool); + } + + expect($tool->getResult())->toBe('ok'); + + // 6th attempt should throw + $reflection->invoke($agent, $tool); +})->throws(ToolMaxTriesException::class, 'too many times'); + +it('tracks tool attempts per tool name independently', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + $toolA = Tool::make('tool_a', 'Tool A')->setCallable(fn (): string => 'a'); + $toolB = Tool::make('tool_b', 'Tool B')->setCallable(fn (): string => 'b'); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute tool_a 4 times (under limit of 5) + for ($i = 0; $i < 4; $i++) { + $reflection->invoke($agent, $toolA); + } + + // Execute tool_b 4 times (under limit of 5) — should not be affected by tool_a's count + for ($i = 0; $i < 4; $i++) { + $reflection->invoke($agent, $toolB); + } + + expect($toolA->getResult())->toBe('a'); + expect($toolB->getResult())->toBe('b'); +}); + +it('catches tool execution errors and sets them as result without affecting attempt tracking', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + $callCount = 0; + $tool = Tool::make('flaky_tool', 'A flaky tool') + ->setCallable(function () use (&$callCount): string { + $callCount++; + throw new RuntimeException('Flaky error'); + }); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute — should catch the error and set it as result + $reflection->invoke($agent, $tool); + + expect($tool->getResult())->toContain('Flaky error'); + expect($callCount)->toBe(1); +});