diff --git a/src/backend/crew_ai/crew.py b/src/backend/crew_ai/crew.py index 702c83d..489f328 100644 --- a/src/backend/crew_ai/crew.py +++ b/src/backend/crew_ai/crew.py @@ -252,6 +252,37 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str logger.info(f"🏁 Crew execution finished - delegation cycle complete") logger.info(f"📊 Final LLM Stats: {formatting_monitor.get_stats()}") + # Capture per-agent token metrics for cost attribution + logger.info("📊 Capturing per-agent token metrics...") + per_agent_metrics = {} + agents_list = [ + ("step_planner", step_planner_agent), + ("element_identifier", element_identifier_agent), + ("code_assembler", code_assembler_agent), + ("code_validator", code_validator_agent), + ] + + for agent_name, agent in agents_list: + try: + # Get cumulative token usage from the agent's LLM + usage = agent.llm.get_token_usage_summary() + per_agent_metrics[agent_name] = { + 'total_tokens': usage.total_tokens, + 'prompt_tokens': usage.prompt_tokens, + 'completion_tokens': usage.completion_tokens, + 'successful_requests': usage.successful_requests, + } + logger.info(f" • {agent_name}: {usage.total_tokens} tokens " + f"(prompt: {usage.prompt_tokens}, completion: {usage.completion_tokens})") + except Exception as e: + logger.warning(f"⚠️ Could not get token usage for {agent_name}: {e}") + per_agent_metrics[agent_name] = { + 'total_tokens': 0, + 'prompt_tokens': 0, + 'completion_tokens': 0, + 'successful_requests': 0, + } + # NOTE: Pattern learning is NOT done here! # Learning should only happen AFTER test execution succeeds (test_status == "passed") # This ensures we only learn from validated, working code. @@ -261,7 +292,7 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str if optimization_metrics: logger.info("📊 Optimization metrics collected") - return result, crew, optimization_metrics + return result, crew, optimization_metrics, per_agent_metrics except Exception as e: error_msg = str(e) diff --git a/src/backend/services/workflow_service.py b/src/backend/services/workflow_service.py index b18b963..81cad76 100644 --- a/src/backend/services/workflow_service.py +++ b/src/backend/services/workflow_service.py @@ -78,7 +78,7 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model # Run CrewAI workflow (this takes most of the time - 10-15 seconds) # User sees progress messages above while this runs - validation_output, crew_with_results, optimization_metrics = run_crew( + validation_output, crew_with_results, optimization_metrics, per_agent_metrics = run_crew( natural_language_query, model_provider, model_name, library_type=None, workflow_id=workflow_id) # Stage 3: Generating (50-75%) @@ -283,6 +283,13 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model logging.info(f"📊 Raw CrewAI usage metrics: {usage_metrics_dict}") + # Log per-agent token breakdown + if per_agent_metrics: + logging.info(f"📊 Per-agent token breakdown:") + for agent_name, metrics in per_agent_metrics.items(): + logging.info(f" • {agent_name}: {metrics['total_tokens']} tokens " + f"(prompt: {metrics['prompt_tokens']}, completion: {metrics['completion_tokens']})") + except Exception as e: logging.warning(f"⚠️ Could not extract CrewAI usage metrics: {e}") # Fallback to empty metrics @@ -313,6 +320,18 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model avg_llm_calls = browser_llm_calls / total_elements if total_elements > 0 else 0 avg_cost = browser_cost / total_elements if total_elements > 0 else 0 + # Prepare token_usage dict from per_agent_metrics + token_usage = { + "step_planner": per_agent_metrics.get("step_planner", {}).get("total_tokens", 0), + "element_identifier": per_agent_metrics.get("element_identifier", {}).get("total_tokens", 0), + "code_assembler": per_agent_metrics.get("code_assembler", {}).get("total_tokens", 0), + "code_validator": per_agent_metrics.get("code_validator", {}).get("total_tokens", 0), + "total": sum( + per_agent_metrics.get(agent, {}).get("total_tokens", 0) + for agent in ["step_planner", "element_identifier", "code_assembler", "code_validator"] + ) + } + unified_metrics = WorkflowMetrics( workflow_id=workflow_id, timestamp=datetime.now(), @@ -345,6 +364,9 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model custom_actions_enabled=browser_metrics.get('custom_actions_enabled', False), custom_action_usage_count=browser_metrics.get('custom_action_usage_count', 0), session_id=browser_metrics.get('session_id'), + + # Per-agent token tracking + token_usage=token_usage, ) # 4. Record unified metrics