Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion src/backend/crew_ai/crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,37 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str
logger.info(f"🏁 Crew execution finished - delegation cycle complete")
logger.info(f"📊 Final LLM Stats: {formatting_monitor.get_stats()}")

# Capture per-agent token metrics for cost attribution
logger.info("📊 Capturing per-agent token metrics...")
per_agent_metrics = {}
agents_list = [
("step_planner", step_planner_agent),
("element_identifier", element_identifier_agent),
("code_assembler", code_assembler_agent),
("code_validator", code_validator_agent),
]

for agent_name, agent in agents_list:
try:
# Get cumulative token usage from the agent's LLM
usage = agent.llm.get_token_usage_summary()
per_agent_metrics[agent_name] = {
'total_tokens': usage.total_tokens,
'prompt_tokens': usage.prompt_tokens,
'completion_tokens': usage.completion_tokens,
'successful_requests': usage.successful_requests,
}
logger.info(f" • {agent_name}: {usage.total_tokens} tokens "
f"(prompt: {usage.prompt_tokens}, completion: {usage.completion_tokens})")
except Exception as e:
logger.warning(f"⚠️ Could not get token usage for {agent_name}: {e}")
per_agent_metrics[agent_name] = {
'total_tokens': 0,
'prompt_tokens': 0,
'completion_tokens': 0,
'successful_requests': 0,
}

# NOTE: Pattern learning is NOT done here!
# Learning should only happen AFTER test execution succeeds (test_status == "passed")
# This ensures we only learn from validated, working code.
Expand All @@ -261,7 +292,7 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str
if optimization_metrics:
logger.info("📊 Optimization metrics collected")

return result, crew, optimization_metrics
return result, crew, optimization_metrics, per_agent_metrics

except Exception as e:
error_msg = str(e)
Expand Down
24 changes: 23 additions & 1 deletion src/backend/services/workflow_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model

# Run CrewAI workflow (this takes most of the time - 10-15 seconds)
# User sees progress messages above while this runs
validation_output, crew_with_results, optimization_metrics = run_crew(
validation_output, crew_with_results, optimization_metrics, per_agent_metrics = run_crew(
natural_language_query, model_provider, model_name, library_type=None, workflow_id=workflow_id)

# Stage 3: Generating (50-75%)
Expand Down Expand Up @@ -283,6 +283,13 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model

logging.info(f"📊 Raw CrewAI usage metrics: {usage_metrics_dict}")

# Log per-agent token breakdown
if per_agent_metrics:
logging.info(f"📊 Per-agent token breakdown:")
for agent_name, metrics in per_agent_metrics.items():
logging.info(f" • {agent_name}: {metrics['total_tokens']} tokens "
f"(prompt: {metrics['prompt_tokens']}, completion: {metrics['completion_tokens']})")

except Exception as e:
logging.warning(f"⚠️ Could not extract CrewAI usage metrics: {e}")
# Fallback to empty metrics
Expand Down Expand Up @@ -313,6 +320,18 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model
avg_llm_calls = browser_llm_calls / total_elements if total_elements > 0 else 0
avg_cost = browser_cost / total_elements if total_elements > 0 else 0

# Prepare token_usage dict from per_agent_metrics
token_usage = {
"step_planner": per_agent_metrics.get("step_planner", {}).get("total_tokens", 0),
"element_identifier": per_agent_metrics.get("element_identifier", {}).get("total_tokens", 0),
"code_assembler": per_agent_metrics.get("code_assembler", {}).get("total_tokens", 0),
"code_validator": per_agent_metrics.get("code_validator", {}).get("total_tokens", 0),
"total": sum(
per_agent_metrics.get(agent, {}).get("total_tokens", 0)
for agent in ["step_planner", "element_identifier", "code_assembler", "code_validator"]
)
}

unified_metrics = WorkflowMetrics(
workflow_id=workflow_id,
timestamp=datetime.now(),
Expand Down Expand Up @@ -345,6 +364,9 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model
custom_actions_enabled=browser_metrics.get('custom_actions_enabled', False),
custom_action_usage_count=browser_metrics.get('custom_action_usage_count', 0),
session_id=browser_metrics.get('session_id'),

# Per-agent token tracking
token_usage=token_usage,
)

# 4. Record unified metrics
Expand Down