From 4891305ff01b61c4b0b51f9bb6041b15642f4043 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:18:08 -0700 Subject: [PATCH 1/8] wip --- .../examples/generate_content_with_images.py | 219 ++++++++++++++++++ .../pyproject.toml | 3 + .../_request_attributes_extractor.py | 54 ++++- .../tests/test_instrumentation.py | 194 ++++++++++++++++ 4 files changed, 469 insertions(+), 1 deletion(-) create mode 100644 python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py diff --git a/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py b/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py new file mode 100644 index 0000000000..de800571e6 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +""" +Example demonstrating Google GenAI instrumentation with image attachments. +This example shows that the instrumentation now properly handles: +- Part.from_bytes() for base64 encoded images +- Part.from_uri() for URI-referenced images +- PDF and other file attachments +""" + +import asyncio +import base64 +import os + +import requests +from google import genai +from google.genai.types import Content, GenerateContentConfig, Part +from opentelemetry.exporter.otlp.proto.http.trace_exporter import ( + OTLPSpanExporter, # type: ignore[import-not-found] +) +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor + +from openinference.instrumentation.google_genai import GoogleGenAIInstrumentor + +# Phoenix endpoint +endpoint = "http://localhost:6006/v1/traces" +tracer_provider = trace_sdk.TracerProvider() +tracer_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter(endpoint))) +tracer_provider.add_span_processor(SimpleSpanProcessor(ConsoleSpanExporter())) + + +def create_test_image_data() -> bytes: + """Create a simple 1x1 pixel PNG for testing.""" + base64_png = ( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" + ) + return base64.b64decode(base64_png) + + +def test_inline_data_image(): + print("🖼️ Testing inline_data (Part.from_bytes) with image...") + try: + client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) + + image_data = create_test_image_data() + image_part = Part.from_bytes(data=image_data, mime_type="image/png") + + content = Content( + role="user", + parts=[ + Part.from_text(text="Describe this image:"), + image_part, + ], + ) + + config = GenerateContentConfig( + system_instruction="You are a helpful assistant. Describe what you see in images." + ) + + print(" Making API call with inline image data...") + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=content, # ✅ correct for 1.46.0 + config=config, # ✅ correct for 1.46.0 + ) + + print(f" ✅ Success! Response: {response.text[:100]}...") + return True + + except Exception as e: + print(f" ❌ Error: {e}") + return False + + +def test_inline_data_pdf(): + print("📄 Testing inline_data (Part.from_bytes) with PDF...") + try: + client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) + + fake_pdf_data = ( + b"%PDF-1.4\n" + b"1 0 obj\n<<\n/Type /Catalog\n/Pages 2 0 R\n>>\nendobj\n" + b"2 0 obj\n<<\n/Type /Pages\n/Kids [3 0 R]\n/Count 1\n>>\nendobj\n" + b"""3 0 obj\n<<\n/Type /Page\n/Parent 2 0 R\n/MediaBox [0 0 612 792]\n/Contents <4 0 R> + \n>>\nendobj\n""" + b"""4 0 obj\n<<\n/Length 44\n>>\nstream\nBT\n/F1 12 Tf\n72 720 Td\n(Tes + t PDF content) Tj\nET\nendstream\nendobj\n""" + b"""xref\n0 5\n0000000000 65535 f \n0000000010 00000 n \n0000000053 00000 n \n0000000100 + 00000 n \n0000000178 00000 n \n""" + b"""trailer\n<<\n/Size 5\n/Root 1 0 R\n>>\nstartxref\n273\n%%EOF""" + ) + + pdf_part = Part.from_bytes(data=fake_pdf_data, mime_type="application/pdf") + + content = Content( + role="user", + parts=[ + Part.from_text(text="Analyze this PDF document:"), + pdf_part, + ], + ) + + print(" Making API call with inline PDF data...") + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=content, + ) + + print(f" ✅ Success! Response: {response.text[:100]}...") + return True + + except Exception as e: + print(f" ❌ Error: {e}") + return False + + +def test_file_data_uri(): + print("🔗 Testing file_data (Part.from_uri equivalent via bytes)...") + try: + client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) + + # fetch the image manually - using a simple, reliable image + img_bytes = requests.get( + "https://httpbin.org/image/png", + headers={"User-Agent": "Mozilla/5.0 (compatible; OpenInference-Test)"}, + ).content + + image_part = Part.from_bytes(data=img_bytes, mime_type="image/png") + + content = Content( + role="user", + parts=[ + Part.from_text(text="What do you see in this image?"), + image_part, + ], + ) + + response = client.models.generate_content( + model="gemini-2.0-flash", + contents=content, + ) + print(f" ✅ Success! Response: {response.text[:100]}...") + return True + + except Exception as e: + print(f" ❌ Error: {e}") + return False + + +async def test_async_with_images(): + print("🔄 Testing async API with images...") + try: + client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")).aio + + image_data = create_test_image_data() + image_part = Part.from_bytes(data=image_data, mime_type="image/png") + + content = Content( + role="user", + parts=[ + Part.from_text(text="Describe this small test image:"), + image_part, + ], + ) + + print(" Making async API call with image...") + response = await client.models.generate_content( + model="gemini-2.0-flash", + contents=content, + ) + + print(f" ✅ Success! Async response: {response.text[:100]}...") + return True + + except Exception as e: + print(f" ❌ Error: {e}") + return False + + +def main(): + print("🚀 Testing Google GenAI Instrumentation with Images") + print("=" * 60) + print("This demonstrates that the instrumentation now properly handles:") + print("- Part.from_bytes() for inline image/file data") + print("- Part.from_uri() for URI-referenced files") + print("- No more 'Other field types not supported' errors!") + print("=" * 60) + + if not os.getenv("GEMINI_API_KEY"): + print("❌ Please set GEMINI_API_KEY environment variable") + print(" export GEMINI_API_KEY='your-api-key-here'") + return + + print("🔧 Instrumenting Google GenAI client...") + GoogleGenAIInstrumentor().instrument(tracer_provider=tracer_provider) + print(" ✅ Instrumentation enabled - traces will be sent to Phoenix!\n") + + results = [ + test_inline_data_image(), + test_inline_data_pdf(), + test_file_data_uri(), + asyncio.run(test_async_with_images()), + ] + + print("=" * 60) + print("📊 RESULTS SUMMARY:") + print(f" ✅ Successful tests: {sum(results)}/{len(results)}") + + if all(results): + print(" 🎉 All tests passed! The instrumentation fix is working!") + print(" 📈 Check Phoenix UI at http://localhost:6006 to see the traces") + else: + print(" ⚠️ Some tests failed - check API key and network connection") + + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml b/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml index a2a5eda467..f7baa0a227 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml @@ -32,6 +32,9 @@ dependencies = [ "openinference-instrumentation>=0.1.17", "openinference-semantic-conventions", "wrapt", + "opentelemetry-exporter-otlp>=1.38.0", + "opentelemetry-sdk>=1.38.0", + "google-genai>=1.46.0", ] [project.optional-dependencies] diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index 433b3e9f5b..e4601dc0d2 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -12,7 +12,9 @@ _io_value_and_type, ) from openinference.semconv.trace import ( + ImageAttributes, MessageAttributes, + MessageContentAttributes, OpenInferenceLLMProviderValues, OpenInferenceSpanKindValues, SpanAttributes, @@ -383,6 +385,49 @@ def _get_attributes_from_function_response( id, ) + def _get_attributes_from_inline_data( + self, inline_data: Any + ) -> Iterator[Tuple[str, AttributeValue]]: + """Handle inline data (base64 encoded content) from Part.from_bytes()""" + mime_type = get_attribute(inline_data, "mime_type", "unknown") + data = get_attribute(inline_data, "data") + + if mime_type.startswith("image/"): + # Use proper semantic conventions for images + if data: + import base64 + + base64_data = base64.b64encode(data).decode() if isinstance(data, bytes) else data + data_url = f"data:{mime_type};base64,{base64_data}" + yield ( + f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}", + data_url, + ) + else: + # Fallback for images without data + yield (MessageAttributes.MESSAGE_CONTENT, f"[Image: {mime_type}]") + else: + # For non-image files, use descriptive text (no specific semantic convention available) + data_size = len(data) if data else 0 + yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type}, {data_size} bytes]") + + def _get_attributes_from_file_data( + self, file_data: Any + ) -> Iterator[Tuple[str, AttributeValue]]: + """Handle file data (URI references) from Part.from_uri()""" + file_uri = get_attribute(file_data, "file_uri", "unknown") + mime_type = get_attribute(file_data, "mime_type", "unknown") + + if mime_type.startswith("image/"): + # Use proper semantic conventions for images + yield ( + f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}", + file_uri, + ) + else: + # For non-image files, use descriptive text (no specific semantic convention available) + yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type} from {file_uri}]") + def _flatten_parts(self, parts: list[Part]) -> Iterator[Tuple[str, AttributeValue]]: content_values = [] tool_call_index = 0 @@ -427,8 +472,15 @@ def _get_attributes_from_part( yield from self._get_attributes_from_function_call(function_call, tool_call_index) elif function_response := get_attribute(part, "function_response"): yield from self._get_attributes_from_function_response(function_response) + elif inline_data := get_attribute(part, "inline_data"): + # Handle base64 encoded content (Part.from_bytes()) + yield from self._get_attributes_from_inline_data(inline_data) + elif file_data := get_attribute(part, "file_data"): + # Handle URI-referenced files (Part.from_uri()) + yield from self._get_attributes_from_file_data(file_data) else: - logger.exception("Other field types of parts are not supported yet") + # Change from exception to debug log for unknown part types + logger.debug(f"Unsupported part type encountered, skipping: {type(part)}") T = TypeVar("T", bound=type) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/tests/test_instrumentation.py b/python/instrumentation/openinference-instrumentation-google-genai/tests/test_instrumentation.py index 7b24d6bbe1..0871fdcf5c 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/tests/test_instrumentation.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/tests/test_instrumentation.py @@ -1427,3 +1427,197 @@ def get_weather(location: str, unit: str = "fahrenheit") -> Dict[str, Any]: # We may or may not see explicit tool call attributes in the span depending on # how Google GenAI implements it internally. The key difference is that we get # a complete text response that incorporates the function results. + + +def test_inline_data_and_file_data_part_handling(): + """Test that inline_data and file_data Part types are handled correctly without errors.""" + from unittest.mock import Mock + + from openinference.instrumentation.google_genai._request_attributes_extractor import ( + _RequestAttributesExtractor, + ) + from openinference.semconv.trace import ( + ImageAttributes, + MessageAttributes, + MessageContentAttributes, + ) + + extractor = _RequestAttributesExtractor() + + # Test 1: inline_data with image (should use proper semantic conventions) + mock_inline_data_image = Mock() + mock_inline_data_image.mime_type = "image/png" + mock_inline_data_image.data = b"fake_image_data" + + mock_part_image = Mock() + mock_part_image.text = None + mock_part_image.function_call = None + mock_part_image.function_response = None + mock_part_image.inline_data = mock_inline_data_image + mock_part_image.file_data = None + + attributes = list(extractor._get_attributes_from_part(mock_part_image, 0)) + assert len(attributes) == 1, ( + f"Expected 1 attribute for image inline_data, got {len(attributes)}" + ) + + attr_key, attr_value = attributes[0] + expected_key = f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}" + assert attr_key == expected_key, f"Expected key {expected_key}, got {attr_key}" + assert attr_value.startswith("data:image/png;base64,"), ( + f"Expected base64 data URL, got {attr_value}" + ) + + # Test 2: inline_data with PDF (should use descriptive text) + mock_inline_data_pdf = Mock() + mock_inline_data_pdf.mime_type = "application/pdf" + mock_inline_data_pdf.data = b"fake_pdf_data_12345" + + mock_part_pdf = Mock() + mock_part_pdf.text = None + mock_part_pdf.function_call = None + mock_part_pdf.function_response = None + mock_part_pdf.inline_data = mock_inline_data_pdf + mock_part_pdf.file_data = None + + attributes = list(extractor._get_attributes_from_part(mock_part_pdf, 0)) + assert len(attributes) == 1, f"Expected 1 attribute for PDF inline_data, got {len(attributes)}" + + attr_key, attr_value = attributes[0] + assert attr_key == MessageAttributes.MESSAGE_CONTENT, ( + f"Expected MESSAGE_CONTENT key, got {attr_key}" + ) + assert attr_value == "[File: application/pdf, 19 bytes]", ( + f"Expected file description, got {attr_value}" + ) + + # Test 3: file_data with image (should use proper semantic conventions) + mock_file_data_image = Mock() + mock_file_data_image.mime_type = "image/jpeg" + mock_file_data_image.file_uri = "gs://bucket/image.jpg" + + mock_part_file_image = Mock() + mock_part_file_image.text = None + mock_part_file_image.function_call = None + mock_part_file_image.function_response = None + mock_part_file_image.inline_data = None + mock_part_file_image.file_data = mock_file_data_image + + attributes = list(extractor._get_attributes_from_part(mock_part_file_image, 0)) + assert len(attributes) == 1, f"Expected 1 attribute for image file_data, got {len(attributes)}" + + attr_key, attr_value = attributes[0] + expected_key = f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}" + assert attr_key == expected_key, f"Expected key {expected_key}, got {attr_key}" + assert attr_value == "gs://bucket/image.jpg", f"Expected file URI, got {attr_value}" + + # Test 4: file_data with PDF (should use descriptive text) + mock_file_data_pdf = Mock() + mock_file_data_pdf.mime_type = "application/pdf" + mock_file_data_pdf.file_uri = "https://example.com/document.pdf" + + mock_part_file_pdf = Mock() + mock_part_file_pdf.text = None + mock_part_file_pdf.function_call = None + mock_part_file_pdf.function_response = None + mock_part_file_pdf.inline_data = None + mock_part_file_pdf.file_data = mock_file_data_pdf + + attributes = list(extractor._get_attributes_from_part(mock_part_file_pdf, 0)) + assert len(attributes) == 1, f"Expected 1 attribute for PDF file_data, got {len(attributes)}" + + attr_key, attr_value = attributes[0] + assert attr_key == MessageAttributes.MESSAGE_CONTENT, ( + f"Expected MESSAGE_CONTENT key, got {attr_key}" + ) + assert attr_value == "[File: application/pdf from https://example.com/document.pdf]", ( + f"Expected file description, got {attr_value}" + ) + + # Test 5: Unknown part type (should not raise exception, return no attributes) + mock_part_unknown = Mock() + mock_part_unknown.text = None + mock_part_unknown.function_call = None + mock_part_unknown.function_response = None + mock_part_unknown.inline_data = None + mock_part_unknown.file_data = None + + # This should not raise an exception + attributes = list(extractor._get_attributes_from_part(mock_part_unknown, 0)) + assert len(attributes) == 0, ( + f"Expected 0 attributes for unknown part type, got {len(attributes)}" + ) + + # Test 6: inline_data with missing data (edge case) + mock_inline_data_no_data = Mock() + mock_inline_data_no_data.mime_type = "image/png" + mock_inline_data_no_data.data = None + + mock_part_no_data = Mock() + mock_part_no_data.text = None + mock_part_no_data.function_call = None + mock_part_no_data.function_response = None + mock_part_no_data.inline_data = mock_inline_data_no_data + mock_part_no_data.file_data = None + + attributes = list(extractor._get_attributes_from_part(mock_part_no_data, 0)) + assert len(attributes) == 1, ( + f"Expected 1 attribute for image with no data, got {len(attributes)}" + ) + + attr_key, attr_value = attributes[0] + assert attr_key == MessageAttributes.MESSAGE_CONTENT, ( + f"Expected MESSAGE_CONTENT key, got {attr_key}" + ) + assert attr_value == "[Image: image/png]", ( + f"Expected image fallback description, got {attr_value}" + ) + + +def test_part_handling_preserves_existing_functionality(): + """Test that existing Part handling (text, function_call, function_response) still works.""" + from unittest.mock import Mock + + from openinference.instrumentation.google_genai._request_attributes_extractor import ( + _RequestAttributesExtractor, + ) + from openinference.semconv.trace import MessageAttributes + + extractor = _RequestAttributesExtractor() + + # Test text part (existing functionality) + mock_part_text = Mock() + mock_part_text.text = "Hello, world!" + mock_part_text.function_call = None + mock_part_text.function_response = None + mock_part_text.inline_data = None + mock_part_text.file_data = None + + attributes = list(extractor._get_attributes_from_part(mock_part_text, 0)) + assert len(attributes) == 1, f"Expected 1 attribute for text part, got {len(attributes)}" + + attr_key, attr_value = attributes[0] + assert attr_key == MessageAttributes.MESSAGE_CONTENT, ( + f"Expected MESSAGE_CONTENT key, got {attr_key}" + ) + assert attr_value == "Hello, world!", f"Expected text content, got {attr_value}" + + # Test that inline_data and file_data don't interfere with text + mock_part_text_priority = Mock() + mock_part_text_priority.text = "Text takes priority" + mock_part_text_priority.function_call = None + mock_part_text_priority.function_response = None + # These should be ignored since text has priority + mock_part_text_priority.inline_data = Mock() + mock_part_text_priority.file_data = Mock() + + attributes = list(extractor._get_attributes_from_part(mock_part_text_priority, 0)) + assert len(attributes) == 1, ( + f"Expected 1 attribute for text part with other data, got {len(attributes)}" + ) + + attr_key, attr_value = attributes[0] + assert attr_key == MessageAttributes.MESSAGE_CONTENT, ( + f"Expected MESSAGE_CONTENT key, got {attr_key}" + ) + assert attr_value == "Text takes priority", f"Expected text content, got {attr_value}" From e92e601ca727e94372f6bf1d550dd7101faaec06 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:23:09 -0700 Subject: [PATCH 2/8] wip --- .../examples/generate_content_with_images.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py b/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py index de800571e6..73634179e2 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/examples/generate_content_with_images.py @@ -31,9 +31,7 @@ def create_test_image_data() -> bytes: """Create a simple 1x1 pixel PNG for testing.""" - base64_png = ( - "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" - ) + base64_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==" # noqa: E501 return base64.b64decode(base64_png) From 2f3b92bd44ad5146ef47ecba467ab83feaa6dd85 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Wed, 22 Oct 2025 14:28:19 -0700 Subject: [PATCH 3/8] wip --- .../pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml b/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml index f7baa0a227..4644214b57 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-google-genai/pyproject.toml @@ -32,9 +32,6 @@ dependencies = [ "openinference-instrumentation>=0.1.17", "openinference-semantic-conventions", "wrapt", - "opentelemetry-exporter-otlp>=1.38.0", - "opentelemetry-sdk>=1.38.0", - "google-genai>=1.46.0", ] [project.optional-dependencies] @@ -43,7 +40,8 @@ instruments = [ ] test = [ "google-genai >= 1.8.0", - "opentelemetry-sdk", + "opentelemetry-sdk>=1.38.0", + "opentelemetry-exporter-otlp>=1.38.0", ] [project.entry-points.opentelemetry_instrumentor] From 3fbf25e27f00987753206ca7ae045b5851b871a4 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Wed, 22 Oct 2025 18:27:43 -0700 Subject: [PATCH 4/8] preserve image url --- .../google_genai/_request_attributes_extractor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index e4601dc0d2..7404b49743 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -440,6 +440,9 @@ def _flatten_parts(self, parts: list[Part]) -> Iterator[Tuple[str, AttributeValu yield (attr, value) elif attr == MessageAttributes.MESSAGE_TOOL_CALL_ID: yield (attr, value) + elif attr.startswith(MessageContentAttributes.MESSAGE_CONTENT_IMAGE): + # Preserve image attributes (don't flatten) + yield (attr, value) elif isinstance(value, str): # Flatten all other string values into a single message content content_values.append(value) From 5f31b10ff6ad55f4f63c2f6217eaa75a77fe5696 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Wed, 22 Oct 2025 18:42:52 -0700 Subject: [PATCH 5/8] wip --- .../_request_attributes_extractor.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index 7404b49743..964faa131e 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -389,7 +389,7 @@ def _get_attributes_from_inline_data( self, inline_data: Any ) -> Iterator[Tuple[str, AttributeValue]]: """Handle inline data (base64 encoded content) from Part.from_bytes()""" - mime_type = get_attribute(inline_data, "mime_type", "unknown") + mime_type = get_attribute(inline_data, "mime_type", "unknown") or "unknown" data = get_attribute(inline_data, "data") if mime_type.startswith("image/"): @@ -397,7 +397,15 @@ def _get_attributes_from_inline_data( if data: import base64 - base64_data = base64.b64encode(data).decode() if isinstance(data, bytes) else data + # Handle both bytes and string data properly + if isinstance(data, bytes): + base64_data = base64.b64encode(data).decode() + elif isinstance(data, str): + # Assume it's already base64 encoded + base64_data = data + else: + # Convert other types to string and base64 encode + base64_data = base64.b64encode(str(data).encode()).decode() data_url = f"data:{mime_type};base64,{base64_data}" yield ( f"{MessageContentAttributes.MESSAGE_CONTENT_IMAGE}.{ImageAttributes.IMAGE_URL}", @@ -415,8 +423,8 @@ def _get_attributes_from_file_data( self, file_data: Any ) -> Iterator[Tuple[str, AttributeValue]]: """Handle file data (URI references) from Part.from_uri()""" - file_uri = get_attribute(file_data, "file_uri", "unknown") - mime_type = get_attribute(file_data, "mime_type", "unknown") + file_uri = get_attribute(file_data, "file_uri", "unknown") or "unknown" + mime_type = get_attribute(file_data, "mime_type", "unknown") or "unknown" if mime_type.startswith("image/"): # Use proper semantic conventions for images From b73ebfe449cb45573d6284b4ba3a9a9c1416d07e Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Thu, 23 Oct 2025 00:03:50 -0700 Subject: [PATCH 6/8] wip --- .../google_genai/_request_attributes_extractor.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index 964faa131e..956fb8385d 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -416,7 +416,11 @@ def _get_attributes_from_inline_data( yield (MessageAttributes.MESSAGE_CONTENT, f"[Image: {mime_type}]") else: # For non-image files, use descriptive text (no specific semantic convention available) - data_size = len(data) if data else 0 + try: + data_size = len(data) if data else 0 + except (TypeError, AttributeError): + # data doesn't support len() (e.g., int, object, etc.) + data_size = "unknown" yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type}, {data_size} bytes]") def _get_attributes_from_file_data( From 97dbf3abc473815ff90d99a77e2111f6543fcea4 Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:16:37 -0700 Subject: [PATCH 7/8] wip --- .../google_genai/_request_attributes_extractor.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index 956fb8385d..5b88bb496b 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -417,11 +417,12 @@ def _get_attributes_from_inline_data( else: # For non-image files, use descriptive text (no specific semantic convention available) try: - data_size = len(data) if data else 0 + data_size_value = len(data) if data else 0 + data_size_str = str(data_size_value) except (TypeError, AttributeError): # data doesn't support len() (e.g., int, object, etc.) - data_size = "unknown" - yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type}, {data_size} bytes]") + data_size_str = "unknown" + yield (MessageAttributes.MESSAGE_CONTENT, f"[File: {mime_type}, {data_size_str} bytes]") def _get_attributes_from_file_data( self, file_data: Any From fd654078bb4e85ebf2b9a4880cee43cf8adbf33a Mon Sep 17 00:00:00 2001 From: Ge Li <77590974+GeLi2001@users.noreply.github.com> Date: Tue, 28 Oct 2025 10:29:12 -0700 Subject: [PATCH 8/8] wip --- .../_request_attributes_extractor.py | 130 +++++++++++++++++- 1 file changed, 126 insertions(+), 4 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py index 5b88bb496b..3afe8d2631 100644 --- a/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py +++ b/python/instrumentation/openinference-instrumentation-google-genai/src/openinference/instrumentation/google_genai/_request_attributes_extractor.py @@ -10,6 +10,7 @@ from openinference.instrumentation.google_genai._utils import ( _as_input_attributes, _io_value_and_type, + _ValueAndType, ) from openinference.semconv.trace import ( ImageAttributes, @@ -39,7 +40,7 @@ def get_attributes_from_request( yield SpanAttributes.LLM_PROVIDER, OpenInferenceLLMProviderValues.GOOGLE.value try: yield from _as_input_attributes( - _io_value_and_type(request_parameters), + self._get_phoenix_friendly_input_value(request_parameters), ) except Exception: logger.exception( @@ -459,12 +460,133 @@ def _flatten_parts(self, parts: list[Part]) -> Iterator[Tuple[str, AttributeValu elif isinstance(value, str): # Flatten all other string values into a single message content content_values.append(value) - else: - # TODO: Handle other types of parts - logger.debug(f"Non-text part encountered: {part}") + else: + # TODO: Handle other types of parts + logger.debug(f"Non-text part encountered: {part}") if content_values: yield (MessageAttributes.MESSAGE_CONTENT, "\n\n".join(content_values)) + def _get_phoenix_friendly_input_value(self, request_parameters: Any) -> _ValueAndType: + """ + Create a Phoenix-friendly input value by replacing binary data with descriptive text. + This ensures the Phoenix UI shows readable content instead of binary data. + """ + try: + # First try the standard approach for non-binary content + if not isinstance(request_parameters, Mapping): + return _io_value_and_type(request_parameters) + + # Check if this request contains binary data (images/files) + contents = request_parameters.get("contents") + if not contents: + return _io_value_and_type(request_parameters) + + # Create a copy of request parameters to modify + cleaned_params = dict(request_parameters) + + # Process contents to replace binary data with descriptive text + if hasattr(contents, "parts"): + # Single Content object + cleaned_params["contents"] = self._clean_content_for_display(contents) + elif isinstance(contents, (list, tuple)): + # List of Content objects + cleaned_params["contents"] = [ + self._clean_content_for_display(content) + if hasattr(content, "parts") + else content + for content in contents + ] + + # Use the standard processing on the cleaned parameters + return _io_value_and_type(cleaned_params) + + except Exception: + logger.exception( + "Failed to create Phoenix-friendly input value, falling back to default" + ) + return _io_value_and_type(request_parameters) + + def _clean_content_for_display(self, content: Any) -> Dict[str, Any]: + """Clean a Content object by replacing binary data with descriptive text.""" + try: + # Create a simplified representation + result = {"role": get_attribute(content, "role", "user"), "parts": []} + + parts = get_attribute(content, "parts", []) + for part in parts: + if text := get_attribute(part, "text"): + result["parts"].append({"text": text}) + elif inline_data := get_attribute(part, "inline_data"): + mime_type = get_attribute(inline_data, "mime_type", "unknown") or "unknown" + data = get_attribute(inline_data, "data") + + if mime_type.startswith("image/"): + # For images, include the actual data URL so Phoenix can display them + if data: + import base64 + + # Handle both bytes and string data properly + if isinstance(data, bytes): + base64_data = base64.b64encode(data).decode() + elif isinstance(data, str): + # Assume it's already base64 encoded + base64_data = data + else: + # Convert other types to string and base64 encode + base64_data = base64.b64encode(str(data).encode()).decode() + + data_url = f"data:{mime_type};base64,{base64_data}" + result["parts"].append( + { + "inline_data": { + "mime_type": mime_type, + "data_url": data_url, # Phoenix-friendly image URL + "description": f"Image ({mime_type})", + } + } + ) + else: + result["parts"].append( + { + "inline_data": { + "mime_type": mime_type, + "description": f"[Image: {mime_type}, no data]", + } + } + ) + else: + try: + data_size_value = len(data) if data else 0 + data_size_str = str(data_size_value) + except (TypeError, AttributeError): + data_size_str = "unknown" + result["parts"].append( + { + "inline_data": { + "mime_type": mime_type, + "description": f"[File data: {mime_type}, {data_size_str} bytes]", # noqa: E501 + } + } + ) + elif file_data := get_attribute(part, "file_data"): + file_uri = get_attribute(file_data, "file_uri", "unknown") or "unknown" + mime_type = get_attribute(file_data, "mime_type", "unknown") or "unknown" + result["parts"].append( + {"file_data": {"file_uri": file_uri, "mime_type": mime_type}} + ) + elif function_call := get_attribute(part, "function_call"): + result["parts"].append({"function_call": str(function_call)}) + elif function_response := get_attribute(part, "function_response"): + result["parts"].append({"function_response": str(function_response)}) + else: + result["parts"].append({"unknown_part": str(type(part))}) + + return result + + except Exception: + logger.exception("Failed to clean content for display") + return {"role": "user", "parts": [{"error": "Failed to process content"}]} + def _extract_tool_call_index(self, attr: str) -> int: """Extract tool call index from message tool call attribute key.