Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion lib/crewai/src/crewai/utilities/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,53 @@ def format_message_for_llm(
return {"role": role, "content": prompt}


def _clean_raw_output(answer: str) -> str:
"""Clean raw LLM output by removing internal ReAct format markers.

When parsing fails, the raw answer may contain internal format markers
like 'Thought:', 'Action:', 'Action Input:' that should not appear in
the final user-facing output.

Args:
answer: The raw response from the LLM

Returns:
Cleaned output with internal format markers removed
"""
# Check if answer contains "Final Answer:" and extract that part
if "Final Answer:" in answer:
# Extract everything after "Final Answer:"
parts = answer.split("Final Answer:")
if len(parts) > 1:
return parts[-1].strip()

# Remove Thought: prefix if present at the start
lines = answer.split("\n")
cleaned_lines = []
skip_until_content = False

for line in lines:
stripped = line.strip()
# Skip lines that are internal format markers
if stripped.startswith("Thought:"):
skip_until_content = True
continue
if stripped.startswith("Action:") or stripped.startswith("Action Input:"):
skip_until_content = True
continue
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numbered action markers not cleaned from output

The _clean_raw_output function uses exact prefix checks like startswith("Action:") and startswith("Action Input:"), but the parser accepts numbered variants via regex patterns that include \d* (e.g., Action\s*\d*\s*:). This means markers like Action 1:, Action1:, or Action 1 Input: are recognized by the parser but won't be cleaned when parsing fails. If an LLM outputs a numbered action marker and parsing fails, that internal format marker will appear in the user-facing output field.

Fix in Cursor Fix in Web

if stripped.startswith("Observation:"):
skip_until_content = True
continue

if skip_until_content and stripped:
skip_until_content = False
if not skip_until_content:
cleaned_lines.append(line)

result = "\n".join(cleaned_lines).strip()
return result if result else answer


def format_answer(answer: str) -> AgentAction | AgentFinish:
"""Format a response from the LLM into an AgentAction or AgentFinish.

Expand All @@ -210,9 +257,11 @@ def format_answer(answer: str) -> AgentAction | AgentFinish:
try:
return parse(answer)
except Exception:
# Clean the output to remove internal format markers
cleaned_output = _clean_raw_output(answer)
return AgentFinish(
thought="Failed to parse LLM response",
output=answer,
output=cleaned_output,
text=answer,
)

Expand Down
127 changes: 127 additions & 0 deletions lib/crewai/tests/utilities/test_agent_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Tests for agent utility functions."""

import pytest

from crewai.agents.parser import AgentFinish
from crewai.utilities.agent_utils import _clean_raw_output, format_answer


class TestCleanRawOutput:
"""Tests for _clean_raw_output function."""

def test_extracts_final_answer_when_present(self):
"""Test that Final Answer content is properly extracted."""
answer = """Thought: I need to process this request.
Action: search
Action Input: {"query": "test"}
Observation: search results here
Thought: Now I have the answer.
Final Answer: The search returned positive results."""

result = _clean_raw_output(answer)
assert result == "The search returned positive results."

def test_removes_thought_prefix(self):
"""Test that Thought: prefix lines are removed."""
answer = """Thought: I'm thinking about the problem.
This is the actual content.
More content here."""

result = _clean_raw_output(answer)
assert "Thought:" not in result
assert "This is the actual content." in result

def test_removes_action_lines(self):
"""Test that Action: and Action Input: lines are removed."""
answer = """Some content here.
Action: tool_name
Action Input: {"param": "value"}
More content after."""

result = _clean_raw_output(answer)
assert "Action:" not in result
assert "Action Input:" not in result
assert "Some content here." in result

def test_removes_observation_lines(self):
"""Test that Observation: lines are removed."""
answer = """Content before.
Observation: tool output here
Content after observation."""

result = _clean_raw_output(answer)
assert "Observation:" not in result
assert "Content before." in result

def test_returns_original_if_no_content_left(self):
"""Test that original is returned if cleaning removes everything."""
answer = """Thought: Only thought here
Action: some_action"""

result = _clean_raw_output(answer)
# When cleaning results in empty content, return original
assert result == answer

def test_handles_plain_text(self):
"""Test that plain text without markers is returned as-is."""
answer = "This is a simple response without any markers."
result = _clean_raw_output(answer)
assert result == answer

def test_handles_multiline_final_answer(self):
"""Test that multiline Final Answer is properly extracted."""
answer = """Thought: Processing...
Final Answer: This is line one.
This is line two.
And line three."""

result = _clean_raw_output(answer)
assert "This is line one." in result
assert "This is line two." in result
assert "And line three." in result


class TestFormatAnswer:
"""Tests for format_answer function."""

def test_returns_agent_finish_on_parse_failure(self):
"""Test that AgentFinish is returned when parsing fails."""
# Invalid format that will fail parsing
answer = """Thought: Some thought here
This is not a valid format."""

result = format_answer(answer)
assert isinstance(result, AgentFinish)
assert result.thought == "Failed to parse LLM response"

def test_cleans_output_on_parse_failure(self):
"""Test that output is cleaned when parsing fails."""
answer = """Thought: I need to respond.
Action: invalid_action
The actual response content here."""

result = format_answer(answer)
assert isinstance(result, AgentFinish)
# The cleaned output should not contain internal markers
assert "Thought:" not in result.output
assert "Action:" not in result.output

def test_preserves_original_text(self):
"""Test that original text is preserved in the text field."""
answer = """Thought: Some thought.
Action: tool
The response."""

result = format_answer(answer)
assert isinstance(result, AgentFinish)
# Original text should be preserved
assert result.text == answer

def test_valid_final_answer_format(self):
"""Test that valid Final Answer format is properly parsed."""
answer = """Thought: I have the answer.
Final Answer: This is the correct response."""

result = format_answer(answer)
assert isinstance(result, AgentFinish)
assert result.output == "This is the correct response."