-
Notifications
You must be signed in to change notification settings - Fork 187
Open
Description
Describe the bug
UI displays a X FAIL next to valid tool uses even if only the final response score is a failure
To Reproduce
Example Agent
from google.adk.agents import Agent
def commerce_system(request: str) -> list[dict[str, str | int]]:
"""Responds to requests for 'promotions' data"""
if request.lower() == "promotions":
return [{"name": "Free delivery", "min_price": 50}]
else:
return []
prompt = """You are a shopping assistant that provides advice about **promotions**"""
root_agent = Agent(
name="root_agent",
model="gemini-2.5-flash",
instruction=prompt,
tools=[commerce_system],
)
Example evalset
{
"eval_set_id": "ui",
"eval_cases": [
{
"eval_id": "fails",
"conversation": [
{
"user_content": {
"parts": [
{
"text": "what promotions are there?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"text": "Jeff"
}
],
"role": "model"
},
"intermediate_data": {
"invocation_events": [
{
"author": "root_agent",
"content": {
"parts": [
{
"function_call": {
"args": {
"request": "PROMOTIONS"
},
"name": "commerce_system"
}
}
],
"role": "model"
}
},
{
"author": "root_agent",
"content": {
"parts": [
{
"function_response": {
"name": "commerce_system",
"response": {
"result": [
{
"name": "Free delivery",
"min_price": 50
}
]
}
}
}
],
"role": "user"
}
}
]
},
"creation_timestamp": 1760829983.020511
}
],
"session_input": {
"app_name": "app",
"user_id": "user"
},
"creation_timestamp": 1760830006.5009038
}
],
"creation_timestamp": 1760829999.966551
}
Expected behavior
After executing the evaluations, you should see the session replayed with ✅ markers indicating success, with a comparison of the reference vs predicted for marked failures
Actual behaviour
If the final response does not match, but the trajectory does pass, the eval case will load, however the tool calls will also be given ❌ ❌ indicators, despite being valid calls.

Desktop (please complete the following information):
ADK version(pip show google-adk): all recent versions at least
Additional Information
Eval session
{
"eval_set_result_id": "app_ui_1761243015.4547887",
"eval_set_result_name": "app_ui_1761243015.4547887",
"eval_set_id": "ui",
"eval_case_results": [
{
"eval_set_file": "ui",
"eval_set_id": "ui",
"eval_id": "fails",
"final_eval_status": 2,
"eval_metric_results": null,
"overall_eval_metric_results": [
{
"metric_name": "tool_trajectory_avg_score",
"threshold": 1,
"judge_model_options": null,
"criterion": null,
"score": 1,
"eval_status": 1,
"details": {
"rubric_scores": null
}
},
{
"metric_name": "response_match_score",
"threshold": 0.7,
"judge_model_options": null,
"criterion": null,
"score": 0,
"eval_status": 2,
"details": {
"rubric_scores": null
}
}
],
"eval_metric_result_per_invocation": [
{
"actual_invocation": {
"invocation_id": "e-bbbe4ef2-5354-418c-a474-fb7030073c1a",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "what promotions are there?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "There is a free delivery promotion for orders over $50. \n"
}
],
"role": "model"
},
"intermediate_data": {
"invocation_events": [
{
"author": "root_agent",
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": "REDACTED",
"function_call": {
"id": "adk-c10cd01d-4e22-4c83-a2f1-f4162879cef4",
"args": {
"request": "PROMOTIONS"
},
"name": "commerce_system"
},
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": null
}
],
"role": "model"
}
},
{
"author": "root_agent",
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": {
"will_continue": null,
"scheduling": null,
"parts": null,
"id": "adk-c10cd01d-4e22-4c83-a2f1-f4162879cef4",
"name": "commerce_system",
"response": {
"result": [
{
"name": "Free delivery",
"min_price": 50
}
]
}
},
"text": null
}
],
"role": "user"
}
}
]
},
"creation_timestamp": 1761243013.450981,
"rubrics": null,
"app_details": {
"agent_details": {
"root_agent": {
"name": "root_agent",
"instructions": "You are a shopping assistant that provides advice about **PRODUCTS** and **PROMOTIONS**\n\nYou are an agent. Your internal name is \"root_agent\".",
"tool_declarations": [
{
"function_declarations": [
{
"behavior": null,
"description": "Accepts requests for either 'PRODUCTS' or 'PROMOTIONS' data",
"name": "commerce_system",
"parameters": {
"additional_properties": null,
"defs": null,
"ref": null,
"any_of": null,
"default": null,
"description": null,
"enum": null,
"example": null,
"format": null,
"items": null,
"max_items": null,
"max_length": null,
"max_properties": null,
"maximum": null,
"min_items": null,
"min_length": null,
"min_properties": null,
"minimum": null,
"nullable": null,
"pattern": null,
"properties": {
"request": {
"additional_properties": null,
"defs": null,
"ref": null,
"any_of": null,
"default": null,
"description": null,
"enum": null,
"example": null,
"format": null,
"items": null,
"max_items": null,
"max_length": null,
"max_properties": null,
"maximum": null,
"min_items": null,
"min_length": null,
"min_properties": null,
"minimum": null,
"nullable": null,
"pattern": null,
"properties": null,
"property_ordering": null,
"required": null,
"title": null,
"type": "STRING"
}
},
"property_ordering": null,
"required": [
"request"
],
"title": null,
"type": "OBJECT"
},
"parameters_json_schema": null,
"response": {
"additional_properties": null,
"defs": null,
"ref": null,
"any_of": null,
"default": null,
"description": null,
"enum": null,
"example": null,
"format": null,
"items": {
"additional_properties": null,
"defs": null,
"ref": null,
"any_of": null,
"default": null,
"description": null,
"enum": null,
"example": null,
"format": null,
"items": null,
"max_items": null,
"max_length": null,
"max_properties": null,
"maximum": null,
"min_items": null,
"min_length": null,
"min_properties": null,
"minimum": null,
"nullable": null,
"pattern": null,
"properties": null,
"property_ordering": null,
"required": null,
"title": null,
"type": "OBJECT"
},
"max_items": null,
"max_length": null,
"max_properties": null,
"maximum": null,
"min_items": null,
"min_length": null,
"min_properties": null,
"minimum": null,
"nullable": null,
"pattern": null,
"properties": null,
"property_ordering": null,
"required": null,
"title": null,
"type": "ARRAY"
},
"response_json_schema": null
}
],
"retrieval": null,
"google_search": null,
"google_search_retrieval": null,
"enterprise_web_search": null,
"google_maps": null,
"url_context": null,
"computer_use": null,
"code_execution": null
}
]
}
}
}
},
"expected_invocation": {
"invocation_id": "",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "what promotions are there?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "Jeff"
}
],
"role": "model"
},
"intermediate_data": {
"invocation_events": [
{
"author": "root_agent",
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": {
"id": null,
"args": {
"request": "PROMOTIONS"
},
"name": "commerce_system"
},
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": null
}
],
"role": "model"
}
},
{
"author": "root_agent",
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": {
"will_continue": null,
"scheduling": null,
"parts": null,
"id": null,
"name": "commerce_system",
"response": {
"result": [
{
"name": "Free delivery",
"min_price": 50
}
]
}
},
"text": null
}
],
"role": "user"
}
}
]
},
"creation_timestamp": 1760829983.020511,
"rubrics": null,
"app_details": null
},
"eval_metric_results": [
{
"metric_name": "tool_trajectory_avg_score",
"threshold": 1,
"judge_model_options": null,
"criterion": null,
"score": 1,
"eval_status": 1,
"details": {
"rubric_scores": null
}
},
{
"metric_name": "response_match_score",
"threshold": 0.7,
"judge_model_options": null,
"criterion": null,
"score": 0,
"eval_status": 2,
"details": {
"rubric_scores": null
}
}
]
}
],
"session_id": "___eval___session___a8951bfd-0b77-4018-97b4-0f747e6b97ee",
"session_details": {
"id": "___eval___session___a8951bfd-0b77-4018-97b4-0f747e6b97ee",
"app_name": "app",
"user_id": "user",
"state": {
"__llm_request_key__": "aa1de94b-9076-43dd-b3a1-52c6a918a68b"
},
"events": [
{
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "what promotions are there?"
}
],
"role": "user"
},
"grounding_metadata": null,
"partial": null,
"turn_complete": null,
"finish_reason": null,
"error_code": null,
"error_message": null,
"interrupted": null,
"custom_metadata": null,
"usage_metadata": null,
"live_session_resumption_update": null,
"input_transcription": null,
"output_transcription": null,
"avg_logprobs": null,
"logprobs_result": null,
"cache_metadata": null,
"citation_metadata": null,
"invocation_id": "e-bbbe4ef2-5354-418c-a474-fb7030073c1a",
"author": "user",
"actions": {
"skip_summarization": null,
"state_delta": {},
"artifact_delta": {},
"transfer_to_agent": null,
"escalate": null,
"requested_auth_configs": {},
"requested_tool_confirmations": {},
"compaction": null,
"end_of_agent": null,
"agent_state": null,
"rewind_before_invocation_id": null
},
"long_running_tool_ids": null,
"branch": null,
"id": "05985875-0c0d-45ff-8708-d8e42874dd94",
"timestamp": 1761243001.489255
},
{
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": "REDACTED",
"function_call": {
"id": "adk-c10cd01d-4e22-4c83-a2f1-f4162879cef4",
"args": {
"request": "PROMOTIONS"
},
"name": "commerce_system"
},
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": null
}
],
"role": "model"
},
"grounding_metadata": null,
"partial": null,
"turn_complete": null,
"finish_reason": "STOP",
"error_code": null,
"error_message": null,
"interrupted": null,
"custom_metadata": {
"__llm_request_key__": "1a374b32-a241-4b0e-b1d2-4589dfa1c68e"
},
"usage_metadata": {
"cache_tokens_details": null,
"cached_content_token_count": null,
"candidates_token_count": 6,
"candidates_tokens_details": [
{
"modality": "TEXT",
"token_count": 6
}
],
"prompt_token_count": 59,
"prompt_tokens_details": [
{
"modality": "TEXT",
"token_count": 59
}
],
"thoughts_token_count": 109,
"tool_use_prompt_token_count": null,
"tool_use_prompt_tokens_details": null,
"total_token_count": 174,
"traffic_type": "ON_DEMAND"
},
"live_session_resumption_update": null,
"input_transcription": null,
"output_transcription": null,
"avg_logprobs": -0.6765135129292806,
"logprobs_result": null,
"cache_metadata": null,
"citation_metadata": null,
"invocation_id": "e-bbbe4ef2-5354-418c-a474-fb7030073c1a",
"author": "root_agent",
"actions": {
"skip_summarization": null,
"state_delta": {
"__llm_request_key__": "1a374b32-a241-4b0e-b1d2-4589dfa1c68e"
},
"artifact_delta": {},
"transfer_to_agent": null,
"escalate": null,
"requested_auth_configs": {},
"requested_tool_confirmations": {},
"compaction": null,
"end_of_agent": null,
"agent_state": null,
"rewind_before_invocation_id": null
},
"long_running_tool_ids": [],
"branch": null,
"id": "01b86f88-ad9d-4c2d-b8dc-60e2c30f381f",
"timestamp": 1761243001.490536
},
{
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": {
"will_continue": null,
"scheduling": null,
"parts": null,
"id": "adk-c10cd01d-4e22-4c83-a2f1-f4162879cef4",
"name": "commerce_system",
"response": {
"result": [
{
"name": "Free delivery",
"min_price": 50
}
]
}
},
"text": null
}
],
"role": "user"
},
"grounding_metadata": null,
"partial": null,
"turn_complete": null,
"finish_reason": null,
"error_code": null,
"error_message": null,
"interrupted": null,
"custom_metadata": null,
"usage_metadata": null,
"live_session_resumption_update": null,
"input_transcription": null,
"output_transcription": null,
"avg_logprobs": null,
"logprobs_result": null,
"cache_metadata": null,
"citation_metadata": null,
"invocation_id": "e-bbbe4ef2-5354-418c-a474-fb7030073c1a",
"author": "root_agent",
"actions": {
"skip_summarization": null,
"state_delta": {},
"artifact_delta": {},
"transfer_to_agent": null,
"escalate": null,
"requested_auth_configs": {},
"requested_tool_confirmations": {},
"compaction": null,
"end_of_agent": null,
"agent_state": null,
"rewind_before_invocation_id": null
},
"long_running_tool_ids": null,
"branch": null,
"id": "a548939b-d47f-419a-ba22-13fa989044e6",
"timestamp": 1761243013.451392
},
{
"content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"inline_data": null,
"file_data": null,
"thought_signature": null,
"function_call": null,
"code_execution_result": null,
"executable_code": null,
"function_response": null,
"text": "There is a free delivery promotion for orders over $50. \n"
}
],
"role": "model"
},
"grounding_metadata": null,
"partial": null,
"turn_complete": null,
"finish_reason": "STOP",
"error_code": null,
"error_message": null,
"interrupted": null,
"custom_metadata": {
"__llm_request_key__": "aa1de94b-9076-43dd-b3a1-52c6a918a68b"
},
"usage_metadata": {
"cache_tokens_details": null,
"cached_content_token_count": null,
"candidates_token_count": 15,
"candidates_tokens_details": [
{
"modality": "TEXT",
"token_count": 15
}
],
"prompt_token_count": 76,
"prompt_tokens_details": [
{
"modality": "TEXT",
"token_count": 183
}
],
"thoughts_token_count": null,
"tool_use_prompt_token_count": null,
"tool_use_prompt_tokens_details": null,
"total_token_count": 91,
"traffic_type": "ON_DEMAND"
},
"live_session_resumption_update": null,
"input_transcription": null,
"output_transcription": null,
"avg_logprobs": -0.20742103258768718,
"logprobs_result": null,
"cache_metadata": null,
"citation_metadata": null,
"invocation_id": "e-bbbe4ef2-5354-418c-a474-fb7030073c1a",
"author": "root_agent",
"actions": {
"skip_summarization": null,
"state_delta": {
"__llm_request_key__": "aa1de94b-9076-43dd-b3a1-52c6a918a68b"
},
"artifact_delta": {},
"transfer_to_agent": null,
"escalate": null,
"requested_auth_configs": {},
"requested_tool_confirmations": {},
"compaction": null,
"end_of_agent": null,
"agent_state": null,
"rewind_before_invocation_id": null
},
"long_running_tool_ids": null,
"branch": null,
"id": "45da8750-0d58-4670-a56e-763a2b515055",
"timestamp": 1761243013.452236
}
],
"last_update_time": 1761243013.452236
},
"user_id": "user"
}
],
"creation_timestamp": 1761243015.4547887
}
Metadata
Metadata
Assignees
Labels
No labels