Skip to content

Commit 2e4457c

Browse files
fix(openai-agents): also emit spans for MCP tool calls done by the LLM (#4875)
### Description We cannot directly intercept MCP Tool calls, as they are done remotely by the LLM and not in the Agent itself. However, we see when such a tool call took place, so we can emit a zero-length span with the tool call specifics. It will start at the same time as the parent span. Closes https://linear.app/getsentry/issue/TET-1192/openai-agents-hosted-mcp-calls-cannot-be-wrapped-in-an-execute-tool <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Emit execute_tool spans for MCP tool calls detected in agent results, with tool metadata, input/output (PII-gated), and error status. > > - **Tracing/Spans (openai_agents)**: > - Add `utils._create_mcp_execute_tool_spans` to emit `OP.GEN_AI_EXECUTE_TOOL` spans for MCP tool calls (`McpCall`) found in `result.output`. > - Sets `GEN_AI_TOOL_TYPE=mcp`, `GEN_AI_TOOL_NAME`, propagates input/output when PII allowed, and marks `SPANSTATUS.ERROR` on error. > - Spans start at the parent span's start time (zero-length representation of remote call). > - Wire into `spans/ai_client.update_ai_client_span` to create these tool spans after setting usage/input/output data. > - Update imports to include `SPANSTATUS` and `OP`. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 96df8c1. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Ivana Kellyer <ivana.kellyer@sentry.io>
1 parent b838765 commit 2e4457c

File tree

3 files changed

+329
-1
lines changed

3 files changed

+329
-1
lines changed

sentry_sdk/integrations/openai_agents/spans/ai_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
_set_input_data,
88
_set_output_data,
99
_set_usage_data,
10+
_create_mcp_execute_tool_spans,
1011
)
1112

1213
from typing import TYPE_CHECKING
@@ -38,3 +39,4 @@ def update_ai_client_span(span, agent, get_response_kwargs, result):
3839
_set_usage_data(span, result.usage)
3940
_set_input_data(span, get_response_kwargs)
4041
_set_output_data(span, result)
42+
_create_mcp_execute_tool_spans(span, result)

sentry_sdk/integrations/openai_agents/utils.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import sentry_sdk
22
from sentry_sdk.ai.utils import set_data_normalized
3-
from sentry_sdk.consts import SPANDATA
3+
from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP
44
from sentry_sdk.integrations import DidNotEnable
55
from sentry_sdk.scope import should_send_default_pii
66
from sentry_sdk.tracing_utils import set_span_errored
@@ -156,3 +156,27 @@ def _set_output_data(span, result):
156156
set_data_normalized(
157157
span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"]
158158
)
159+
160+
161+
def _create_mcp_execute_tool_spans(span, result):
162+
# type: (sentry_sdk.tracing.Span, agents.Result) -> None
163+
for output in result.output:
164+
if output.__class__.__name__ == "McpCall":
165+
with sentry_sdk.start_span(
166+
op=OP.GEN_AI_EXECUTE_TOOL,
167+
description=f"execute_tool {output.name}",
168+
start_timestamp=span.start_timestamp,
169+
) as execute_tool_span:
170+
set_data_normalized(execute_tool_span, SPANDATA.GEN_AI_TOOL_TYPE, "mcp")
171+
set_data_normalized(
172+
execute_tool_span, SPANDATA.GEN_AI_TOOL_NAME, output.name
173+
)
174+
if should_send_default_pii():
175+
execute_tool_span.set_data(
176+
SPANDATA.GEN_AI_TOOL_INPUT, output.arguments
177+
)
178+
execute_tool_span.set_data(
179+
SPANDATA.GEN_AI_TOOL_OUTPUT, output.output
180+
)
181+
if output.error:
182+
execute_tool_span.set_status(SPANSTATUS.ERROR)

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
ModelSettings,
1616
)
1717
from agents.items import (
18+
McpCall,
1819
ResponseOutputMessage,
1920
ResponseOutputText,
2021
ResponseFunctionToolCall,
@@ -683,6 +684,307 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
683684
assert transaction["contexts"]["trace"]["status"] == "error"
684685

685686

687+
@pytest.mark.asyncio
688+
async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent):
689+
"""
690+
Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
691+
"""
692+
693+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
694+
with patch(
695+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
696+
) as mock_get_response:
697+
# Create a McpCall object
698+
mcp_call = McpCall(
699+
id="mcp_call_123",
700+
name="test_mcp_tool",
701+
arguments='{"query": "search term"}',
702+
output="MCP tool executed successfully",
703+
error=None,
704+
type="mcp_call",
705+
server_label="test_server",
706+
)
707+
708+
# Create a ModelResponse with an McpCall in the output
709+
mcp_response = ModelResponse(
710+
output=[mcp_call],
711+
usage=Usage(
712+
requests=1,
713+
input_tokens=10,
714+
output_tokens=5,
715+
total_tokens=15,
716+
),
717+
response_id="resp_mcp_123",
718+
)
719+
720+
# Final response after MCP tool execution
721+
final_response = ModelResponse(
722+
output=[
723+
ResponseOutputMessage(
724+
id="msg_final",
725+
type="message",
726+
status="completed",
727+
content=[
728+
ResponseOutputText(
729+
text="Task completed using MCP tool",
730+
type="output_text",
731+
annotations=[],
732+
)
733+
],
734+
role="assistant",
735+
)
736+
],
737+
usage=Usage(
738+
requests=1,
739+
input_tokens=15,
740+
output_tokens=10,
741+
total_tokens=25,
742+
),
743+
response_id="resp_final_123",
744+
)
745+
746+
mock_get_response.side_effect = [mcp_response, final_response]
747+
748+
sentry_init(
749+
integrations=[OpenAIAgentsIntegration()],
750+
traces_sample_rate=1.0,
751+
send_default_pii=True,
752+
)
753+
754+
events = capture_events()
755+
756+
await agents.Runner.run(
757+
test_agent,
758+
"Please use MCP tool",
759+
run_config=test_run_config,
760+
)
761+
762+
(transaction,) = events
763+
spans = transaction["spans"]
764+
765+
# Find the MCP execute_tool span
766+
mcp_tool_span = None
767+
for span in spans:
768+
if (
769+
span.get("description") == "execute_tool test_mcp_tool"
770+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
771+
):
772+
mcp_tool_span = span
773+
break
774+
775+
# Verify the MCP tool span was created
776+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
777+
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
778+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
779+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
780+
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
781+
assert (
782+
mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
783+
)
784+
785+
# Verify no error status since error was None
786+
assert mcp_tool_span.get("tags", {}).get("status") != "error"
787+
788+
789+
@pytest.mark.asyncio
790+
async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent):
791+
"""
792+
Test that MCP tool calls with errors are tracked with error status.
793+
"""
794+
795+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
796+
with patch(
797+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
798+
) as mock_get_response:
799+
# Create a McpCall object with an error
800+
mcp_call_with_error = McpCall(
801+
id="mcp_call_error_123",
802+
name="failing_mcp_tool",
803+
arguments='{"query": "test"}',
804+
output=None,
805+
error="MCP tool execution failed",
806+
type="mcp_call",
807+
server_label="test_server",
808+
)
809+
810+
# Create a ModelResponse with a failing McpCall
811+
mcp_response = ModelResponse(
812+
output=[mcp_call_with_error],
813+
usage=Usage(
814+
requests=1,
815+
input_tokens=10,
816+
output_tokens=5,
817+
total_tokens=15,
818+
),
819+
response_id="resp_mcp_error_123",
820+
)
821+
822+
# Final response after error
823+
final_response = ModelResponse(
824+
output=[
825+
ResponseOutputMessage(
826+
id="msg_final",
827+
type="message",
828+
status="completed",
829+
content=[
830+
ResponseOutputText(
831+
text="The MCP tool encountered an error",
832+
type="output_text",
833+
annotations=[],
834+
)
835+
],
836+
role="assistant",
837+
)
838+
],
839+
usage=Usage(
840+
requests=1,
841+
input_tokens=15,
842+
output_tokens=10,
843+
total_tokens=25,
844+
),
845+
response_id="resp_final_error_123",
846+
)
847+
848+
mock_get_response.side_effect = [mcp_response, final_response]
849+
850+
sentry_init(
851+
integrations=[OpenAIAgentsIntegration()],
852+
traces_sample_rate=1.0,
853+
send_default_pii=True,
854+
)
855+
856+
events = capture_events()
857+
858+
await agents.Runner.run(
859+
test_agent,
860+
"Please use failing MCP tool",
861+
run_config=test_run_config,
862+
)
863+
864+
(transaction,) = events
865+
spans = transaction["spans"]
866+
867+
# Find the MCP execute_tool span with error
868+
mcp_tool_span = None
869+
for span in spans:
870+
if (
871+
span.get("description") == "execute_tool failing_mcp_tool"
872+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
873+
):
874+
mcp_tool_span = span
875+
break
876+
877+
# Verify the MCP tool span was created with error status
878+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
879+
assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
880+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
881+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
882+
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
883+
assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
884+
885+
# Verify error status was set
886+
assert mcp_tool_span["tags"]["status"] == "error"
887+
888+
889+
@pytest.mark.asyncio
890+
async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent):
891+
"""
892+
Test that MCP tool input/output are not included when send_default_pii is False.
893+
"""
894+
895+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
896+
with patch(
897+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
898+
) as mock_get_response:
899+
# Create a McpCall object
900+
mcp_call = McpCall(
901+
id="mcp_call_pii_123",
902+
name="test_mcp_tool",
903+
arguments='{"query": "sensitive data"}',
904+
output="Result with sensitive info",
905+
error=None,
906+
type="mcp_call",
907+
server_label="test_server",
908+
)
909+
910+
# Create a ModelResponse with an McpCall
911+
mcp_response = ModelResponse(
912+
output=[mcp_call],
913+
usage=Usage(
914+
requests=1,
915+
input_tokens=10,
916+
output_tokens=5,
917+
total_tokens=15,
918+
),
919+
response_id="resp_mcp_123",
920+
)
921+
922+
# Final response
923+
final_response = ModelResponse(
924+
output=[
925+
ResponseOutputMessage(
926+
id="msg_final",
927+
type="message",
928+
status="completed",
929+
content=[
930+
ResponseOutputText(
931+
text="Task completed",
932+
type="output_text",
933+
annotations=[],
934+
)
935+
],
936+
role="assistant",
937+
)
938+
],
939+
usage=Usage(
940+
requests=1,
941+
input_tokens=15,
942+
output_tokens=10,
943+
total_tokens=25,
944+
),
945+
response_id="resp_final_123",
946+
)
947+
948+
mock_get_response.side_effect = [mcp_response, final_response]
949+
950+
sentry_init(
951+
integrations=[OpenAIAgentsIntegration()],
952+
traces_sample_rate=1.0,
953+
send_default_pii=False, # PII disabled
954+
)
955+
956+
events = capture_events()
957+
958+
await agents.Runner.run(
959+
test_agent,
960+
"Please use MCP tool",
961+
run_config=test_run_config,
962+
)
963+
964+
(transaction,) = events
965+
spans = transaction["spans"]
966+
967+
# Find the MCP execute_tool span
968+
mcp_tool_span = None
969+
for span in spans:
970+
if (
971+
span.get("description") == "execute_tool test_mcp_tool"
972+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
973+
):
974+
mcp_tool_span = span
975+
break
976+
977+
# Verify the MCP tool span was created but without input/output
978+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
979+
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
980+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
981+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
982+
983+
# Verify input and output are not included when send_default_pii is False
984+
assert "gen_ai.tool.input" not in mcp_tool_span["data"]
985+
assert "gen_ai.tool.output" not in mcp_tool_span["data"]
986+
987+
686988
@pytest.mark.asyncio
687989
async def test_multiple_agents_asyncio(
688990
sentry_init, capture_events, test_agent, mock_model_response

0 commit comments

Comments
 (0)