From 6038dbbddcb4b438603f3fdeaf0ac0d34a313619 Mon Sep 17 00:00:00 2001 From: majiayu000 <1835304752@qq.com> Date: Mon, 22 Dec 2025 18:47:22 +0800 Subject: [PATCH] fix: clean internal format markers from agent output when parsing fails When LLM response parsing fails, the raw output may contain internal ReAct format markers (Thought:, Action:, Action Input:, Observation:) that should not appear in user-facing output. - Add _clean_raw_output() to strip internal format markers - Extract Final Answer content if present - Preserve original text for debugging while cleaning output - Add comprehensive tests for the cleaning function Fixes #3873 --- .../src/crewai/utilities/agent_utils.py | 51 ++++++- .../tests/utilities/test_agent_utils.py | 127 ++++++++++++++++++ 2 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 lib/crewai/tests/utilities/test_agent_utils.py diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 973ad55960..a68f654a4b 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -198,6 +198,53 @@ def format_message_for_llm( return {"role": role, "content": prompt} +def _clean_raw_output(answer: str) -> str: + """Clean raw LLM output by removing internal ReAct format markers. + + When parsing fails, the raw answer may contain internal format markers + like 'Thought:', 'Action:', 'Action Input:' that should not appear in + the final user-facing output. + + Args: + answer: The raw response from the LLM + + Returns: + Cleaned output with internal format markers removed + """ + # Check if answer contains "Final Answer:" and extract that part + if "Final Answer:" in answer: + # Extract everything after "Final Answer:" + parts = answer.split("Final Answer:") + if len(parts) > 1: + return parts[-1].strip() + + # Remove Thought: prefix if present at the start + lines = answer.split("\n") + cleaned_lines = [] + skip_until_content = False + + for line in lines: + stripped = line.strip() + # Skip lines that are internal format markers + if stripped.startswith("Thought:"): + skip_until_content = True + continue + if stripped.startswith("Action:") or stripped.startswith("Action Input:"): + skip_until_content = True + continue + if stripped.startswith("Observation:"): + skip_until_content = True + continue + + if skip_until_content and stripped: + skip_until_content = False + if not skip_until_content: + cleaned_lines.append(line) + + result = "\n".join(cleaned_lines).strip() + return result if result else answer + + def format_answer(answer: str) -> AgentAction | AgentFinish: """Format a response from the LLM into an AgentAction or AgentFinish. @@ -210,9 +257,11 @@ def format_answer(answer: str) -> AgentAction | AgentFinish: try: return parse(answer) except Exception: + # Clean the output to remove internal format markers + cleaned_output = _clean_raw_output(answer) return AgentFinish( thought="Failed to parse LLM response", - output=answer, + output=cleaned_output, text=answer, ) diff --git a/lib/crewai/tests/utilities/test_agent_utils.py b/lib/crewai/tests/utilities/test_agent_utils.py new file mode 100644 index 0000000000..9155e47824 --- /dev/null +++ b/lib/crewai/tests/utilities/test_agent_utils.py @@ -0,0 +1,127 @@ +"""Tests for agent utility functions.""" + +import pytest + +from crewai.agents.parser import AgentFinish +from crewai.utilities.agent_utils import _clean_raw_output, format_answer + + +class TestCleanRawOutput: + """Tests for _clean_raw_output function.""" + + def test_extracts_final_answer_when_present(self): + """Test that Final Answer content is properly extracted.""" + answer = """Thought: I need to process this request. +Action: search +Action Input: {"query": "test"} +Observation: search results here +Thought: Now I have the answer. +Final Answer: The search returned positive results.""" + + result = _clean_raw_output(answer) + assert result == "The search returned positive results." + + def test_removes_thought_prefix(self): + """Test that Thought: prefix lines are removed.""" + answer = """Thought: I'm thinking about the problem. +This is the actual content. +More content here.""" + + result = _clean_raw_output(answer) + assert "Thought:" not in result + assert "This is the actual content." in result + + def test_removes_action_lines(self): + """Test that Action: and Action Input: lines are removed.""" + answer = """Some content here. +Action: tool_name +Action Input: {"param": "value"} +More content after.""" + + result = _clean_raw_output(answer) + assert "Action:" not in result + assert "Action Input:" not in result + assert "Some content here." in result + + def test_removes_observation_lines(self): + """Test that Observation: lines are removed.""" + answer = """Content before. +Observation: tool output here +Content after observation.""" + + result = _clean_raw_output(answer) + assert "Observation:" not in result + assert "Content before." in result + + def test_returns_original_if_no_content_left(self): + """Test that original is returned if cleaning removes everything.""" + answer = """Thought: Only thought here +Action: some_action""" + + result = _clean_raw_output(answer) + # When cleaning results in empty content, return original + assert result == answer + + def test_handles_plain_text(self): + """Test that plain text without markers is returned as-is.""" + answer = "This is a simple response without any markers." + result = _clean_raw_output(answer) + assert result == answer + + def test_handles_multiline_final_answer(self): + """Test that multiline Final Answer is properly extracted.""" + answer = """Thought: Processing... +Final Answer: This is line one. +This is line two. +And line three.""" + + result = _clean_raw_output(answer) + assert "This is line one." in result + assert "This is line two." in result + assert "And line three." in result + + +class TestFormatAnswer: + """Tests for format_answer function.""" + + def test_returns_agent_finish_on_parse_failure(self): + """Test that AgentFinish is returned when parsing fails.""" + # Invalid format that will fail parsing + answer = """Thought: Some thought here +This is not a valid format.""" + + result = format_answer(answer) + assert isinstance(result, AgentFinish) + assert result.thought == "Failed to parse LLM response" + + def test_cleans_output_on_parse_failure(self): + """Test that output is cleaned when parsing fails.""" + answer = """Thought: I need to respond. +Action: invalid_action +The actual response content here.""" + + result = format_answer(answer) + assert isinstance(result, AgentFinish) + # The cleaned output should not contain internal markers + assert "Thought:" not in result.output + assert "Action:" not in result.output + + def test_preserves_original_text(self): + """Test that original text is preserved in the text field.""" + answer = """Thought: Some thought. +Action: tool +The response.""" + + result = format_answer(answer) + assert isinstance(result, AgentFinish) + # Original text should be preserved + assert result.text == answer + + def test_valid_final_answer_format(self): + """Test that valid Final Answer format is properly parsed.""" + answer = """Thought: I have the answer. +Final Answer: This is the correct response.""" + + result = format_answer(answer) + assert isinstance(result, AgentFinish) + assert result.output == "This is the correct response."