|
15 | 15 | ModelSettings,
|
16 | 16 | )
|
17 | 17 | from agents.items import (
|
| 18 | + McpCall, |
18 | 19 | ResponseOutputMessage,
|
19 | 20 | ResponseOutputText,
|
20 | 21 | ResponseFunctionToolCall,
|
@@ -683,6 +684,307 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
|
683 | 684 | assert transaction["contexts"]["trace"]["status"] == "error"
|
684 | 685 |
|
685 | 686 |
|
| 687 | +@pytest.mark.asyncio |
| 688 | +async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent): |
| 689 | + """ |
| 690 | + Test that MCP (Model Context Protocol) tool calls create execute_tool spans. |
| 691 | + """ |
| 692 | + |
| 693 | + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): |
| 694 | + with patch( |
| 695 | + "agents.models.openai_responses.OpenAIResponsesModel.get_response" |
| 696 | + ) as mock_get_response: |
| 697 | + # Create a McpCall object |
| 698 | + mcp_call = McpCall( |
| 699 | + id="mcp_call_123", |
| 700 | + name="test_mcp_tool", |
| 701 | + arguments='{"query": "search term"}', |
| 702 | + output="MCP tool executed successfully", |
| 703 | + error=None, |
| 704 | + type="mcp_call", |
| 705 | + server_label="test_server", |
| 706 | + ) |
| 707 | + |
| 708 | + # Create a ModelResponse with an McpCall in the output |
| 709 | + mcp_response = ModelResponse( |
| 710 | + output=[mcp_call], |
| 711 | + usage=Usage( |
| 712 | + requests=1, |
| 713 | + input_tokens=10, |
| 714 | + output_tokens=5, |
| 715 | + total_tokens=15, |
| 716 | + ), |
| 717 | + response_id="resp_mcp_123", |
| 718 | + ) |
| 719 | + |
| 720 | + # Final response after MCP tool execution |
| 721 | + final_response = ModelResponse( |
| 722 | + output=[ |
| 723 | + ResponseOutputMessage( |
| 724 | + id="msg_final", |
| 725 | + type="message", |
| 726 | + status="completed", |
| 727 | + content=[ |
| 728 | + ResponseOutputText( |
| 729 | + text="Task completed using MCP tool", |
| 730 | + type="output_text", |
| 731 | + annotations=[], |
| 732 | + ) |
| 733 | + ], |
| 734 | + role="assistant", |
| 735 | + ) |
| 736 | + ], |
| 737 | + usage=Usage( |
| 738 | + requests=1, |
| 739 | + input_tokens=15, |
| 740 | + output_tokens=10, |
| 741 | + total_tokens=25, |
| 742 | + ), |
| 743 | + response_id="resp_final_123", |
| 744 | + ) |
| 745 | + |
| 746 | + mock_get_response.side_effect = [mcp_response, final_response] |
| 747 | + |
| 748 | + sentry_init( |
| 749 | + integrations=[OpenAIAgentsIntegration()], |
| 750 | + traces_sample_rate=1.0, |
| 751 | + send_default_pii=True, |
| 752 | + ) |
| 753 | + |
| 754 | + events = capture_events() |
| 755 | + |
| 756 | + await agents.Runner.run( |
| 757 | + test_agent, |
| 758 | + "Please use MCP tool", |
| 759 | + run_config=test_run_config, |
| 760 | + ) |
| 761 | + |
| 762 | + (transaction,) = events |
| 763 | + spans = transaction["spans"] |
| 764 | + |
| 765 | + # Find the MCP execute_tool span |
| 766 | + mcp_tool_span = None |
| 767 | + for span in spans: |
| 768 | + if ( |
| 769 | + span.get("description") == "execute_tool test_mcp_tool" |
| 770 | + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" |
| 771 | + ): |
| 772 | + mcp_tool_span = span |
| 773 | + break |
| 774 | + |
| 775 | + # Verify the MCP tool span was created |
| 776 | + assert mcp_tool_span is not None, "MCP execute_tool span was not created" |
| 777 | + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" |
| 778 | + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" |
| 779 | + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" |
| 780 | + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' |
| 781 | + assert ( |
| 782 | + mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully" |
| 783 | + ) |
| 784 | + |
| 785 | + # Verify no error status since error was None |
| 786 | + assert mcp_tool_span.get("tags", {}).get("status") != "error" |
| 787 | + |
| 788 | + |
| 789 | +@pytest.mark.asyncio |
| 790 | +async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent): |
| 791 | + """ |
| 792 | + Test that MCP tool calls with errors are tracked with error status. |
| 793 | + """ |
| 794 | + |
| 795 | + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): |
| 796 | + with patch( |
| 797 | + "agents.models.openai_responses.OpenAIResponsesModel.get_response" |
| 798 | + ) as mock_get_response: |
| 799 | + # Create a McpCall object with an error |
| 800 | + mcp_call_with_error = McpCall( |
| 801 | + id="mcp_call_error_123", |
| 802 | + name="failing_mcp_tool", |
| 803 | + arguments='{"query": "test"}', |
| 804 | + output=None, |
| 805 | + error="MCP tool execution failed", |
| 806 | + type="mcp_call", |
| 807 | + server_label="test_server", |
| 808 | + ) |
| 809 | + |
| 810 | + # Create a ModelResponse with a failing McpCall |
| 811 | + mcp_response = ModelResponse( |
| 812 | + output=[mcp_call_with_error], |
| 813 | + usage=Usage( |
| 814 | + requests=1, |
| 815 | + input_tokens=10, |
| 816 | + output_tokens=5, |
| 817 | + total_tokens=15, |
| 818 | + ), |
| 819 | + response_id="resp_mcp_error_123", |
| 820 | + ) |
| 821 | + |
| 822 | + # Final response after error |
| 823 | + final_response = ModelResponse( |
| 824 | + output=[ |
| 825 | + ResponseOutputMessage( |
| 826 | + id="msg_final", |
| 827 | + type="message", |
| 828 | + status="completed", |
| 829 | + content=[ |
| 830 | + ResponseOutputText( |
| 831 | + text="The MCP tool encountered an error", |
| 832 | + type="output_text", |
| 833 | + annotations=[], |
| 834 | + ) |
| 835 | + ], |
| 836 | + role="assistant", |
| 837 | + ) |
| 838 | + ], |
| 839 | + usage=Usage( |
| 840 | + requests=1, |
| 841 | + input_tokens=15, |
| 842 | + output_tokens=10, |
| 843 | + total_tokens=25, |
| 844 | + ), |
| 845 | + response_id="resp_final_error_123", |
| 846 | + ) |
| 847 | + |
| 848 | + mock_get_response.side_effect = [mcp_response, final_response] |
| 849 | + |
| 850 | + sentry_init( |
| 851 | + integrations=[OpenAIAgentsIntegration()], |
| 852 | + traces_sample_rate=1.0, |
| 853 | + send_default_pii=True, |
| 854 | + ) |
| 855 | + |
| 856 | + events = capture_events() |
| 857 | + |
| 858 | + await agents.Runner.run( |
| 859 | + test_agent, |
| 860 | + "Please use failing MCP tool", |
| 861 | + run_config=test_run_config, |
| 862 | + ) |
| 863 | + |
| 864 | + (transaction,) = events |
| 865 | + spans = transaction["spans"] |
| 866 | + |
| 867 | + # Find the MCP execute_tool span with error |
| 868 | + mcp_tool_span = None |
| 869 | + for span in spans: |
| 870 | + if ( |
| 871 | + span.get("description") == "execute_tool failing_mcp_tool" |
| 872 | + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" |
| 873 | + ): |
| 874 | + mcp_tool_span = span |
| 875 | + break |
| 876 | + |
| 877 | + # Verify the MCP tool span was created with error status |
| 878 | + assert mcp_tool_span is not None, "MCP execute_tool span was not created" |
| 879 | + assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" |
| 880 | + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" |
| 881 | + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" |
| 882 | + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' |
| 883 | + assert mcp_tool_span["data"]["gen_ai.tool.output"] is None |
| 884 | + |
| 885 | + # Verify error status was set |
| 886 | + assert mcp_tool_span["tags"]["status"] == "error" |
| 887 | + |
| 888 | + |
| 889 | +@pytest.mark.asyncio |
| 890 | +async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent): |
| 891 | + """ |
| 892 | + Test that MCP tool input/output are not included when send_default_pii is False. |
| 893 | + """ |
| 894 | + |
| 895 | + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): |
| 896 | + with patch( |
| 897 | + "agents.models.openai_responses.OpenAIResponsesModel.get_response" |
| 898 | + ) as mock_get_response: |
| 899 | + # Create a McpCall object |
| 900 | + mcp_call = McpCall( |
| 901 | + id="mcp_call_pii_123", |
| 902 | + name="test_mcp_tool", |
| 903 | + arguments='{"query": "sensitive data"}', |
| 904 | + output="Result with sensitive info", |
| 905 | + error=None, |
| 906 | + type="mcp_call", |
| 907 | + server_label="test_server", |
| 908 | + ) |
| 909 | + |
| 910 | + # Create a ModelResponse with an McpCall |
| 911 | + mcp_response = ModelResponse( |
| 912 | + output=[mcp_call], |
| 913 | + usage=Usage( |
| 914 | + requests=1, |
| 915 | + input_tokens=10, |
| 916 | + output_tokens=5, |
| 917 | + total_tokens=15, |
| 918 | + ), |
| 919 | + response_id="resp_mcp_123", |
| 920 | + ) |
| 921 | + |
| 922 | + # Final response |
| 923 | + final_response = ModelResponse( |
| 924 | + output=[ |
| 925 | + ResponseOutputMessage( |
| 926 | + id="msg_final", |
| 927 | + type="message", |
| 928 | + status="completed", |
| 929 | + content=[ |
| 930 | + ResponseOutputText( |
| 931 | + text="Task completed", |
| 932 | + type="output_text", |
| 933 | + annotations=[], |
| 934 | + ) |
| 935 | + ], |
| 936 | + role="assistant", |
| 937 | + ) |
| 938 | + ], |
| 939 | + usage=Usage( |
| 940 | + requests=1, |
| 941 | + input_tokens=15, |
| 942 | + output_tokens=10, |
| 943 | + total_tokens=25, |
| 944 | + ), |
| 945 | + response_id="resp_final_123", |
| 946 | + ) |
| 947 | + |
| 948 | + mock_get_response.side_effect = [mcp_response, final_response] |
| 949 | + |
| 950 | + sentry_init( |
| 951 | + integrations=[OpenAIAgentsIntegration()], |
| 952 | + traces_sample_rate=1.0, |
| 953 | + send_default_pii=False, # PII disabled |
| 954 | + ) |
| 955 | + |
| 956 | + events = capture_events() |
| 957 | + |
| 958 | + await agents.Runner.run( |
| 959 | + test_agent, |
| 960 | + "Please use MCP tool", |
| 961 | + run_config=test_run_config, |
| 962 | + ) |
| 963 | + |
| 964 | + (transaction,) = events |
| 965 | + spans = transaction["spans"] |
| 966 | + |
| 967 | + # Find the MCP execute_tool span |
| 968 | + mcp_tool_span = None |
| 969 | + for span in spans: |
| 970 | + if ( |
| 971 | + span.get("description") == "execute_tool test_mcp_tool" |
| 972 | + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" |
| 973 | + ): |
| 974 | + mcp_tool_span = span |
| 975 | + break |
| 976 | + |
| 977 | + # Verify the MCP tool span was created but without input/output |
| 978 | + assert mcp_tool_span is not None, "MCP execute_tool span was not created" |
| 979 | + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" |
| 980 | + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" |
| 981 | + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" |
| 982 | + |
| 983 | + # Verify input and output are not included when send_default_pii is False |
| 984 | + assert "gen_ai.tool.input" not in mcp_tool_span["data"] |
| 985 | + assert "gen_ai.tool.output" not in mcp_tool_span["data"] |
| 986 | + |
| 987 | + |
686 | 988 | @pytest.mark.asyncio
|
687 | 989 | async def test_multiple_agents_asyncio(
|
688 | 990 | sentry_init, capture_events, test_agent, mock_model_response
|
|
0 commit comments