diff --git a/src/Agent/BaseCodingAgent.php b/src/Agent/BaseCodingAgent.php index bcb9aca..d2bac2e 100644 --- a/src/Agent/BaseCodingAgent.php +++ b/src/Agent/BaseCodingAgent.php @@ -6,6 +6,7 @@ use EtfsCodingAgent\Service\WorkspaceToolingServiceInterface; use NeuronAI\Agent; +use NeuronAI\Exceptions\ToolMaxTriesException; use NeuronAI\Observability\Events\AgentError; use NeuronAI\Observability\Events\ToolCalled; use NeuronAI\Observability\Events\ToolCalling; @@ -282,9 +283,30 @@ protected function tools(): array /** * Override to catch tool execution errors and return them as results instead of crashing. * This allows the agent to learn from its mistakes and retry with correct parameters. + * + * Also tracks tool invocation attempts and enforces a maximum retry limit + * (via ToolMaxTriesException) to prevent infinite tool-call loops — e.g. when + * context-window trimming causes the agent to lose its conversation state. + * + * @see https://github.com/dx-tooling/sitebuilder-webapp/issues/75 + * + * @throws ToolMaxTriesException */ protected function executeSingleTool(ToolInterface $tool): void { + $this->toolAttempts[$tool->getName()] = ($this->toolAttempts[$tool->getName()] ?? 0) + 1; + + $maxTries = $tool->getMaxTries() ?? $this->toolMaxTries; + + if ($this->toolAttempts[$tool->getName()] > $maxTries) { + $exception = new ToolMaxTriesException( + "Tool {$tool->getName()} has been attempted too many times: {$maxTries} attempts." + ); + $this->notify('error', new AgentError($exception)); + + throw $exception; + } + $this->notify('tool-calling', new ToolCalling($tool)); try { diff --git a/tests/Unit/Agent/BaseCodingAgentTest.php b/tests/Unit/Agent/BaseCodingAgentTest.php index 999f042..4199eb2 100644 --- a/tests/Unit/Agent/BaseCodingAgentTest.php +++ b/tests/Unit/Agent/BaseCodingAgentTest.php @@ -4,6 +4,7 @@ use EtfsCodingAgent\Agent\BaseCodingAgent; use EtfsCodingAgent\Service\WorkspaceToolingServiceInterface; +use NeuronAI\Exceptions\ToolMaxTriesException; use NeuronAI\Tools\PropertyType; use NeuronAI\Tools\Tool; use NeuronAI\Tools\ToolProperty; @@ -127,3 +128,67 @@ public function runShellCommand(string $workingDirectory, string $command): stri expect($result)->toContain('required'); expect($result)->toContain('optional'); }); + +it('throws ToolMaxTriesException when a tool exceeds max attempts', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + // Default toolMaxTries is 5 + $tool = Tool::make('test_tool', 'A test tool') + ->setCallable(fn (): string => 'ok'); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute 5 times — should succeed + for ($i = 0; $i < 5; $i++) { + $reflection->invoke($agent, $tool); + } + + expect($tool->getResult())->toBe('ok'); + + // 6th attempt should throw + $reflection->invoke($agent, $tool); +})->throws(ToolMaxTriesException::class, 'too many times'); + +it('tracks tool attempts per tool name independently', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + $toolA = Tool::make('tool_a', 'Tool A')->setCallable(fn (): string => 'a'); + $toolB = Tool::make('tool_b', 'Tool B')->setCallable(fn (): string => 'b'); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute tool_a 4 times (under limit of 5) + for ($i = 0; $i < 4; $i++) { + $reflection->invoke($agent, $toolA); + } + + // Execute tool_b 4 times (under limit of 5) — should not be affected by tool_a's count + for ($i = 0; $i < 4; $i++) { + $reflection->invoke($agent, $toolB); + } + + expect($toolA->getResult())->toBe('a'); + expect($toolB->getResult())->toBe('b'); +}); + +it('catches tool execution errors and sets them as result without affecting attempt tracking', function () { + $mockFacade = createMockFacade(); + $agent = new BaseCodingAgent($mockFacade); + + $callCount = 0; + $tool = Tool::make('flaky_tool', 'A flaky tool') + ->setCallable(function () use (&$callCount): string { + $callCount++; + throw new RuntimeException('Flaky error'); + }); + + $reflection = new ReflectionMethod($agent, 'executeSingleTool'); + + // Execute — should catch the error and set it as result + $reflection->invoke($agent, $tool); + + expect($tool->getResult())->toContain('Flaky error'); + expect($callCount)->toBe(1); +});