From 2d830f01e90ec59f115e6e5c76d6cb1c3edd5fcd Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 6 Nov 2025 08:54:07 -0800 Subject: [PATCH 1/6] fix: anthropic cache token count --- .../instrumentation/langchain/_tracer.py | 211 ++++++++++-- .../tests/test_token_counts.py | 312 ++++++++++++++++++ 2 files changed, 490 insertions(+), 33 deletions(-) create mode 100644 python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py index 3b72f3bf09..ae8e194918 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py +++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py @@ -25,6 +25,7 @@ Optional, Sequence, Tuple, + TypedDict, TypeVar, Union, cast, @@ -35,6 +36,7 @@ import wrapt # type: ignore from langchain_core.messages import BaseMessage +from langchain_core.messages.ai import UsageMetadata from langchain_core.tracers import BaseTracer, LangChainTracer from langchain_core.tracers.schemas import Run from opentelemetry import context as context_api @@ -43,6 +45,7 @@ from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes from opentelemetry.trace import Span from opentelemetry.util.types import AttributeValue +from typing_extensions import TypeGuard from wrapt import ObjectProxy from openinference.instrumentation import get_attributes_from_context, safe_json_dumps @@ -832,6 +835,172 @@ def _model_name( return +class _HasInputAndOutputTokens(TypedDict): + input_tokens: int + output_tokens: int + + +class _RawAnthropicUsageWithCache(_HasInputAndOutputTokens): + # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13 + cache_creation_input_tokens: int + cache_read_input_tokens: int + + +class _RawAnthropicUsageWithCacheRead(_HasInputAndOutputTokens): + cache_read_input_tokens: int + + +class _RawAnthropicUsageWithCacheCreation(_HasInputAndOutputTokens): + cache_creation_input_tokens: int + + +def _is_raw_anthropic_usage_with_cache( + obj: Mapping[str, Any], +) -> TypeGuard[_RawAnthropicUsageWithCache]: + return ( + "input_tokens" in obj + and "output_tokens" in obj + and "cache_creation_input_tokens" in obj + and "cache_read_input_tokens" in obj + and isinstance(obj["input_tokens"], int) + and isinstance(obj["output_tokens"], int) + and isinstance(obj["cache_creation_input_tokens"], int) + and isinstance(obj["cache_read_input_tokens"], int) + ) + + +def _token_counts_from_raw_anthropic_usage_with_cache( + obj: _RawAnthropicUsageWithCache, +) -> Iterator[Tuple[str, int]]: + input_tokens = obj["input_tokens"] + output_tokens = obj["output_tokens"] + + if cache_creation_input_tokens := obj["cache_creation_input_tokens"]: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens + + if cache_read_input_tokens := obj["cache_read_input_tokens"]: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens + + prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens + completion_tokens = output_tokens + + yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens + yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + + +def _is_raw_anthropic_usage_with_cache_creation( + obj: Mapping[str, Any], +) -> TypeGuard[_RawAnthropicUsageWithCacheCreation]: + return ( + "input_tokens" in obj + and "output_tokens" in obj + and "cache_creation_input_tokens" in obj + and isinstance(obj["input_tokens"], int) + and isinstance(obj["output_tokens"], int) + and isinstance(obj["cache_creation_input_tokens"], int) + ) + + +def _token_counts_from_raw_anthropic_usage_with_cache_creation( + obj: _RawAnthropicUsageWithCacheCreation, +) -> Iterator[Tuple[str, int]]: + input_tokens = obj["input_tokens"] + output_tokens = obj["output_tokens"] + + if cache_creation_input_tokens := obj["cache_creation_input_tokens"]: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens + + prompt_tokens = input_tokens + cache_creation_input_tokens + completion_tokens = output_tokens + + yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens + yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + + +def _is_raw_anthropic_usage_with_cache_read( + obj: Mapping[str, Any], +) -> TypeGuard[_RawAnthropicUsageWithCacheRead]: + return ( + "input_tokens" in obj + and "output_tokens" in obj + and "cache_read_input_tokens" in obj + and isinstance(obj["input_tokens"], int) + and isinstance(obj["output_tokens"], int) + and isinstance(obj["cache_read_input_tokens"], int) + ) + + +def _token_counts_from_raw_anthropic_usage_with_cache_read( + obj: _RawAnthropicUsageWithCacheRead, +) -> Iterator[Tuple[str, int]]: + input_tokens = obj["input_tokens"] + output_tokens = obj["output_tokens"] + + if cache_read_input_tokens := obj["cache_read_input_tokens"]: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens + + prompt_tokens = input_tokens + cache_read_input_tokens + completion_tokens = output_tokens + + yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens + yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + + +def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]: + return ( + "input_tokens" in obj + and "output_tokens" in obj + and "total_tokens" in obj + and isinstance(obj["input_tokens"], int) + and isinstance(obj["output_tokens"], int) + and isinstance(obj["total_tokens"], int) + ) + + +def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[str, int]]: + input_tokens = obj["input_tokens"] + output_tokens = obj["output_tokens"] + total_tokens = obj["total_tokens"] + + yield LLM_TOKEN_COUNT_TOTAL, total_tokens + + if input_token_details := (obj.get("input_token_details") or {}): + if audio := input_token_details.get("audio"): + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, audio + if cache_creation := input_token_details.get("cache_creation"): + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation + if cache_read := input_token_details.get("cache_read"): + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read + + if output_token_details := (obj.get("output_token_details") or {}): + if audio := output_token_details.get("audio"): + yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, audio + if reasoning := output_token_details.get("reasoning"): + yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, reasoning + + if total_tokens == input_tokens + output_tokens: + prompt_tokens = input_tokens + completion_tokens = output_tokens + else: + prompt_tokens = ( + input_tokens + + (input_token_details.get("audio") or 0) + + (input_token_details.get("cache_creation") or 0) + + (input_token_details.get("cache_read") or 0) + ) + if total_tokens == prompt_tokens + output_tokens: + completion_tokens = output_tokens + else: + completion_tokens = ( + output_tokens + + (output_token_details.get("audio") or 0) + + (output_token_details.get("reasoning") or 0) + ) + + yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens + yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + + @stop_on_exception def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]: """Yields token count information if present.""" @@ -894,40 +1063,16 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i if (token_count := _get_first_value(details, keys)) is not None: yield attribute_name, token_count + if _is_raw_anthropic_usage_with_cache(token_usage): + yield from _token_counts_from_raw_anthropic_usage_with_cache(token_usage) + elif _is_raw_anthropic_usage_with_cache_read(token_usage): + yield from _token_counts_from_raw_anthropic_usage_with_cache_read(token_usage) + elif _is_raw_anthropic_usage_with_cache_creation(token_usage): + yield from _token_counts_from_raw_anthropic_usage_with_cache_creation(token_usage) + # maps langchain_core.messages.ai.UsageMetadata object - for attribute_name, details_key_or_none, keys in [ - (LLM_TOKEN_COUNT_PROMPT, None, ("input_tokens",)), - (LLM_TOKEN_COUNT_COMPLETION, None, ("output_tokens",)), - ( - LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, - "input_token_details", - ("audio",), - ), - ( - LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, - "input_token_details", - ("cache_creation",), - ), - ( - LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, - "input_token_details", - ("cache_read",), - ), - ( - LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, - "output_token_details", - ("audio",), - ), - ( - LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, - "output_token_details", - ("reasoning",), - ), - ]: - details = token_usage.get(details_key_or_none) if details_key_or_none else token_usage - if details is not None: - if (token_count := _get_first_value(details, keys)) is not None: - yield attribute_name, token_count + if _is_lc_usage_metadata(token_usage): + yield from _token_counts_from_lc_usage_metadata(token_usage) def _parse_token_usage_for_vertexai( diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py new file mode 100644 index 0000000000..7ab4f35dcf --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py @@ -0,0 +1,312 @@ +import pytest +from langchain_core.messages.ai import UsageMetadata + +from openinference.instrumentation.langchain._tracer import ( + _token_counts_from_lc_usage_metadata, + _token_counts_from_raw_anthropic_usage_with_cache, + _token_counts_from_raw_anthropic_usage_with_cache_creation, + _token_counts_from_raw_anthropic_usage_with_cache_read, +) +from openinference.semconv.trace import SpanAttributes + + +@pytest.mark.parametrize( + "usage_metadata,expected", + [ + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="basic_token_counts", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 35, + "input_token_details": {"audio": 5}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="input_audio_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 38, + "input_token_details": {"cache_creation": 8}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 38, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="input_cache_creation_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 33, + "input_token_details": {"cache_read": 3}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 33, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 13, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="input_cache_read_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 37, + "output_token_details": {"audio": 7}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 37, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 7, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 27, + }, + id="output_audio_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 35, + "output_token_details": {"reasoning": 5}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 5, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25, + }, + id="output_reasoning_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 50, + "input_token_details": { + "audio": 5, + "cache_creation": 3, + "cache_read": 2, + }, + "output_token_details": { + "audio": 6, + "reasoning": 4, + }, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 50, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 3, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 2, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 6, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 4, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 20, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 30, + }, + id="all_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "input_token_details": {"audio": 5}, + "output_token_details": {"reasoning": 3}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="total_equals_sum_no_detail_adjustment", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "input_token_details": {}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="empty_input_token_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "output_token_details": {}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="empty_output_token_details", + ), + pytest.param( + { + "input_tokens": 0, + "output_tokens": 0, + "total_tokens": 0, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0, + }, + id="zero_values", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "input_token_details": { + "audio": None, + "cache_creation": 5, + }, + "output_token_details": { + "audio": 3, + "reasoning": None, + }, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 3, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="none_values_not_yielded", + ), + ], +) +def test_token_counts_from_lc_usage_metadata( + usage_metadata: UsageMetadata, expected: dict[str, int] +) -> None: + """Test _token_counts_from_lc_usage_metadata with various inputs.""" + result = dict(_token_counts_from_lc_usage_metadata(usage_metadata)) + assert result == expected + + +@pytest.mark.parametrize( + "usage,expected", + [ + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "cache_creation_input_tokens": 5, + "cache_read_input_tokens": 3, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, # 10 + 5 + 3 + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + id="with_both_cache_types", + ), + pytest.param( + { + "input_tokens": 0, + "output_tokens": 0, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0, + }, + id="zeros_no_cache_details", + ), + ], +) +def test_token_counts_from_raw_anthropic_usage_with_cache( + usage: dict[str, int], expected: dict[str, int] +) -> None: + """Test Anthropic usage with both cache creation and read.""" + result = dict(_token_counts_from_raw_anthropic_usage_with_cache(usage)) # type: ignore + assert result == expected + + +@pytest.mark.parametrize( + "usage,expected", + [ + pytest.param( + { + "input_tokens": 15, + "output_tokens": 25, + "cache_creation_input_tokens": 8, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 23, # 15 + 8 + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25, + }, + id="with_cache_creation", + ), + ], +) +def test_token_counts_from_raw_anthropic_usage_with_cache_creation( + usage: dict[str, int], expected: dict[str, int] +) -> None: + """Test Anthropic usage with cache creation only.""" + result = dict(_token_counts_from_raw_anthropic_usage_with_cache_creation(usage)) # type: ignore + assert result == expected + + +@pytest.mark.parametrize( + "usage,expected", + [ + pytest.param( + { + "input_tokens": 12, + "output_tokens": 18, + "cache_read_input_tokens": 6, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, # 12 + 6 + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 18, + }, + id="with_cache_read", + ), + ], +) +def test_token_counts_from_raw_anthropic_usage_with_cache_read( + usage: dict[str, int], expected: dict[str, int] +) -> None: + """Test Anthropic usage with cache read only.""" + result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read(usage)) # type: ignore + assert result == expected From b7f65e385d196aa845b358acc2adf0aee019fa5e Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 6 Nov 2025 09:52:14 -0800 Subject: [PATCH 2/6] fix test --- .../tests/test_instrumentor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py index d2da135a78..35c86668cd 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py @@ -590,7 +590,7 @@ def test_anthropic_token_counts( span = spans[0] llm_attributes = dict(span.attributes or {}) assert llm_attributes.pop(OPENINFERENCE_SPAN_KIND, None) == LLM.value - assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 22 + assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 33 assert llm_attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) == 5 assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE) == 2 assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ) == 9 From 30538fad4a8de9534b660e443a9b225f750d10f2 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 6 Nov 2025 12:38:12 -0800 Subject: [PATCH 3/6] clean up --- .../pyproject.toml | 4 +- .../instrumentation/langchain/_tracer.py | 58 +++++++++++-------- python/tox.ini | 1 + 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml b/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml index ddc9b3786c..d069873eed 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ [project.optional-dependencies] instruments = [ - "langchain_core >= 0.2.43", + "langchain_core >= 0.3.9", ] test = [ "langchain_core == 0.3.50", @@ -55,7 +55,7 @@ test = [ "vcrpy>=6.0.1", ] type-check = [ - "langchain_core == 0.2.43", + "langchain_core == 0.3.9", ] [project.entry-points.opentelemetry_instrumentor] diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py index ae8e194918..9458eed709 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py +++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py @@ -964,42 +964,52 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s yield LLM_TOKEN_COUNT_TOTAL, total_tokens - if input_token_details := (obj.get("input_token_details") or {}): - if audio := input_token_details.get("audio"): - yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, audio - if cache_creation := input_token_details.get("cache_creation"): - yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation - if cache_read := input_token_details.get("cache_read"): - yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read - - if output_token_details := (obj.get("output_token_details") or {}): - if audio := output_token_details.get("audio"): - yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, audio - if reasoning := output_token_details.get("reasoning"): - yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, reasoning + input_audio = 0 + input_cache_creation = 0 + input_cache_read = 0 + output_audio = 0 + output_reasoning = 0 + + if "input_token_details" in obj: + input_token_details = obj["input_token_details"] + if "audio" in input_token_details: + input_audio = input_token_details["audio"] + if "cache_creation" in input_token_details: + input_cache_creation = input_token_details["cache_creation"] + if "cache_read" in input_token_details: + input_cache_read = input_token_details["cache_read"] + + if "output_token_details" in obj: + output_token_details = obj["output_token_details"] + if "audio" in output_token_details: + output_audio = output_token_details["audio"] + if "reasoning" in output_token_details: + output_reasoning = output_token_details["reasoning"] if total_tokens == input_tokens + output_tokens: prompt_tokens = input_tokens completion_tokens = output_tokens else: - prompt_tokens = ( - input_tokens - + (input_token_details.get("audio") or 0) - + (input_token_details.get("cache_creation") or 0) - + (input_token_details.get("cache_read") or 0) - ) + prompt_tokens = input_tokens + input_audio + input_cache_creation + input_cache_read if total_tokens == prompt_tokens + output_tokens: completion_tokens = output_tokens else: - completion_tokens = ( - output_tokens - + (output_token_details.get("audio") or 0) - + (output_token_details.get("reasoning") or 0) - ) + completion_tokens = output_tokens + output_audio + output_reasoning yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + if input_audio: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio + if input_cache_creation: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, input_cache_creation + if input_cache_read: + yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, input_cache_read + if output_audio: + yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, output_audio + if output_reasoning: + yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, output_reasoning + @stop_on_exception def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]: diff --git a/python/tox.ini b/python/tox.ini index ab56dd791e..a0980b900a 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -162,6 +162,7 @@ commands_pre = openllmetry-latest: uv pip install -U opentelemetry-instrumentation-openai openlit: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-openlit[test] openlit-latest: uv pip install -U openlit + uv pip list -v commands = ruff: ruff format . From 4c0a08c63b262176385cc84adb7fd5f3970153b4 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Fri, 7 Nov 2025 09:14:32 -0800 Subject: [PATCH 4/6] clean up --- .../instrumentation/langchain/_tracer.py | 146 +++------ .../tests/test_token_counts.py | 303 ++++++------------ 2 files changed, 133 insertions(+), 316 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py index 9458eed709..8615cb488c 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py +++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py @@ -45,7 +45,7 @@ from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes from opentelemetry.trace import Span from opentelemetry.util.types import AttributeValue -from typing_extensions import TypeGuard +from typing_extensions import NotRequired, TypeGuard from wrapt import ObjectProxy from openinference.instrumentation import get_attributes_from_context, safe_json_dumps @@ -835,51 +835,44 @@ def _model_name( return -class _HasInputAndOutputTokens(TypedDict): +class _RawAnthropicUsageWithCacheReadOrWrite(TypedDict): + # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13 input_tokens: int output_tokens: int + cache_read_input_tokens: NotRequired[int] + cache_creation_input_tokens: NotRequired[int] -class _RawAnthropicUsageWithCache(_HasInputAndOutputTokens): - # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13 - cache_creation_input_tokens: int - cache_read_input_tokens: int - - -class _RawAnthropicUsageWithCacheRead(_HasInputAndOutputTokens): - cache_read_input_tokens: int - - -class _RawAnthropicUsageWithCacheCreation(_HasInputAndOutputTokens): - cache_creation_input_tokens: int - - -def _is_raw_anthropic_usage_with_cache( +def _is_raw_anthropic_usage_with_cache_read_or_write( obj: Mapping[str, Any], -) -> TypeGuard[_RawAnthropicUsageWithCache]: +) -> TypeGuard[_RawAnthropicUsageWithCacheReadOrWrite]: return ( "input_tokens" in obj and "output_tokens" in obj - and "cache_creation_input_tokens" in obj - and "cache_read_input_tokens" in obj and isinstance(obj["input_tokens"], int) and isinstance(obj["output_tokens"], int) - and isinstance(obj["cache_creation_input_tokens"], int) - and isinstance(obj["cache_read_input_tokens"], int) + and ( + "cache_read_input_tokens" in obj + and isinstance(obj["cache_read_input_tokens"], int) + or "cache_creation_input_tokens" in obj + and isinstance(obj["cache_creation_input_tokens"], int) + ) ) -def _token_counts_from_raw_anthropic_usage_with_cache( - obj: _RawAnthropicUsageWithCache, +def _token_counts_from_raw_anthropic_usage_with_cache_read_or_write( + obj: _RawAnthropicUsageWithCacheReadOrWrite, ) -> Iterator[Tuple[str, int]]: input_tokens = obj["input_tokens"] output_tokens = obj["output_tokens"] - if cache_creation_input_tokens := obj["cache_creation_input_tokens"]: - yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens + cache_creation_input_tokens = 0 + cache_read_input_tokens = 0 - if cache_read_input_tokens := obj["cache_read_input_tokens"]: - yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens + if "cache_creation_input_tokens" in obj: + cache_creation_input_tokens = obj["cache_creation_input_tokens"] + if "cache_read_input_tokens" in obj: + cache_read_input_tokens = obj["cache_read_input_tokens"] prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens completion_tokens = output_tokens @@ -887,64 +880,11 @@ def _token_counts_from_raw_anthropic_usage_with_cache( yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens - -def _is_raw_anthropic_usage_with_cache_creation( - obj: Mapping[str, Any], -) -> TypeGuard[_RawAnthropicUsageWithCacheCreation]: - return ( - "input_tokens" in obj - and "output_tokens" in obj - and "cache_creation_input_tokens" in obj - and isinstance(obj["input_tokens"], int) - and isinstance(obj["output_tokens"], int) - and isinstance(obj["cache_creation_input_tokens"], int) - ) - - -def _token_counts_from_raw_anthropic_usage_with_cache_creation( - obj: _RawAnthropicUsageWithCacheCreation, -) -> Iterator[Tuple[str, int]]: - input_tokens = obj["input_tokens"] - output_tokens = obj["output_tokens"] - - if cache_creation_input_tokens := obj["cache_creation_input_tokens"]: + if cache_creation_input_tokens: yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens - - prompt_tokens = input_tokens + cache_creation_input_tokens - completion_tokens = output_tokens - - yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens - yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens - - -def _is_raw_anthropic_usage_with_cache_read( - obj: Mapping[str, Any], -) -> TypeGuard[_RawAnthropicUsageWithCacheRead]: - return ( - "input_tokens" in obj - and "output_tokens" in obj - and "cache_read_input_tokens" in obj - and isinstance(obj["input_tokens"], int) - and isinstance(obj["output_tokens"], int) - and isinstance(obj["cache_read_input_tokens"], int) - ) - - -def _token_counts_from_raw_anthropic_usage_with_cache_read( - obj: _RawAnthropicUsageWithCacheRead, -) -> Iterator[Tuple[str, int]]: - input_tokens = obj["input_tokens"] - output_tokens = obj["output_tokens"] - - if cache_read_input_tokens := obj["cache_read_input_tokens"]: + if cache_read_input_tokens: yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens - prompt_tokens = input_tokens + cache_read_input_tokens - completion_tokens = output_tokens - - yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens - yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens - def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]: return ( @@ -962,8 +902,6 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s output_tokens = obj["output_tokens"] total_tokens = obj["total_tokens"] - yield LLM_TOKEN_COUNT_TOTAL, total_tokens - input_audio = 0 input_cache_creation = 0 input_cache_read = 0 @@ -986,18 +924,23 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s if "reasoning" in output_token_details: output_reasoning = output_token_details["reasoning"] - if total_tokens == input_tokens + output_tokens: - prompt_tokens = input_tokens - completion_tokens = output_tokens - else: - prompt_tokens = input_tokens + input_audio + input_cache_creation + input_cache_read - if total_tokens == prompt_tokens + output_tokens: - completion_tokens = output_tokens - else: - completion_tokens = output_tokens + output_audio + output_reasoning + prompt_tokens = input_tokens + completion_tokens = output_tokens + + # heuristic adjustment for Bedrock Anthropic models with cache read or write + # https://github.com/Arize-ai/openinference/issues/2381 + if input_cache := input_cache_creation + input_cache_read: + if total_tokens == input_tokens + output_tokens + input_cache: + # for Bedrock Converse + prompt_tokens += input_cache + elif input_tokens < input_cache: + # for Bedrock InvokeModel + prompt_tokens += input_cache + total_tokens += input_cache yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens + yield LLM_TOKEN_COUNT_TOTAL, total_tokens if input_audio: yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio @@ -1022,12 +965,12 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i ) ): return + keys: Sequence[str] for attribute_name, keys in [ ( LLM_TOKEN_COUNT_PROMPT, ( "prompt_tokens", - "input_tokens", # Anthropic-specific key "prompt_token_count", # Gemini-specific key - https://ai.google.dev/gemini-api/docs/tokens?lang=python ), ), @@ -1035,13 +978,10 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i LLM_TOKEN_COUNT_COMPLETION, ( "completion_tokens", - "output_tokens", # Anthropic-specific key "candidates_token_count", # Gemini-specific key ), ), (LLM_TOKEN_COUNT_TOTAL, ("total_tokens", "total_token_count")), # Gemini-specific key - (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, ("cache_read_input_tokens",)), # Antrhopic - (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, ("cache_creation_input_tokens",)), # Antrhopic ]: if (token_count := _get_first_value(token_usage, keys)) is not None: yield attribute_name, token_count @@ -1073,17 +1013,13 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i if (token_count := _get_first_value(details, keys)) is not None: yield attribute_name, token_count - if _is_raw_anthropic_usage_with_cache(token_usage): - yield from _token_counts_from_raw_anthropic_usage_with_cache(token_usage) - elif _is_raw_anthropic_usage_with_cache_read(token_usage): - yield from _token_counts_from_raw_anthropic_usage_with_cache_read(token_usage) - elif _is_raw_anthropic_usage_with_cache_creation(token_usage): - yield from _token_counts_from_raw_anthropic_usage_with_cache_creation(token_usage) - # maps langchain_core.messages.ai.UsageMetadata object if _is_lc_usage_metadata(token_usage): yield from _token_counts_from_lc_usage_metadata(token_usage) + if _is_raw_anthropic_usage_with_cache_read_or_write(token_usage): + yield from _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(token_usage) + def _parse_token_usage_for_vertexai( outputs: Optional[Mapping[str, Any]], diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py index 7ab4f35dcf..deaea82fe2 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py +++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py @@ -1,17 +1,18 @@ +from typing import Any + import pytest -from langchain_core.messages.ai import UsageMetadata from openinference.instrumentation.langchain._tracer import ( + _is_lc_usage_metadata, + _is_raw_anthropic_usage_with_cache_read_or_write, _token_counts_from_lc_usage_metadata, - _token_counts_from_raw_anthropic_usage_with_cache, - _token_counts_from_raw_anthropic_usage_with_cache_creation, - _token_counts_from_raw_anthropic_usage_with_cache_read, + _token_counts_from_raw_anthropic_usage_with_cache_read_or_write, ) from openinference.semconv.trace import SpanAttributes @pytest.mark.parametrize( - "usage_metadata,expected", + "usage_metadata,expected,is_valid", [ pytest.param( { @@ -20,158 +21,64 @@ "total_tokens": 30, }, { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, }, - id="basic_token_counts", + True, + id="basic", ), pytest.param( { "input_tokens": 10, "output_tokens": 20, "total_tokens": 35, - "input_token_details": {"audio": 5}, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + "input_token_details": {"cache_creation": 3, "cache_read": 2}, }, - id="input_audio_details", - ), - pytest.param( { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 38, - "input_token_details": {"cache_creation": 8}, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 38, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15, # 10 + 3 + 2 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, - }, - id="input_cache_creation_details", - ), - pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 33, - "input_token_details": {"cache_read": 3}, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 33, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 13, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, - }, - id="input_cache_read_details", - ), - pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 37, - "output_token_details": {"audio": 7}, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 37, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 7, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 27, - }, - id="output_audio_details", - ), - pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 35, - "output_token_details": {"reasoning": 5}, - }, - { SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 5, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25, - }, - id="output_reasoning_details", - ), - pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 50, - "input_token_details": { - "audio": 5, - "cache_creation": 3, - "cache_read": 2, - }, - "output_token_details": { - "audio": 6, - "reasoning": 4, - }, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 50, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 3, SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 2, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 6, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 4, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 20, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 30, }, - id="all_details", + True, + id="bedrock_converse", ), pytest.param( { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, - "input_token_details": {"audio": 5}, - "output_token_details": {"reasoning": 3}, + "input_tokens": 5, + "output_tokens": 10, + "total_tokens": 15, + "input_token_details": {"cache_creation": 20, "cache_read": 10}, }, { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 35, # 5 + 20 + 10 + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 10, + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 45, # adjusted + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 20, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 10, }, - id="total_equals_sum_no_detail_adjustment", + True, + id="bedrock_invokemodel", ), pytest.param( { "input_tokens": 10, "output_tokens": 20, "total_tokens": 30, - "input_token_details": {}, + "input_token_details": {"audio": 5}, + "output_token_details": {"reasoning": 3}, }, { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, - }, - id="empty_input_token_details", - ), - pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, - "output_token_details": {}, - }, - { SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3, }, - id="empty_output_token_details", + True, + id="non_cache_details", ), pytest.param( { @@ -180,47 +87,45 @@ "total_tokens": 0, }, { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0, SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0, + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0, }, - id="zero_values", + True, + id="zeros", ), pytest.param( - { - "input_tokens": 10, - "output_tokens": 20, - "total_tokens": 30, - "input_token_details": { - "audio": None, - "cache_creation": 5, - }, - "output_token_details": { - "audio": 3, - "reasoning": None, - }, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 3, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, - }, - id="none_values_not_yielded", + {"input_tokens": 10, "output_tokens": 20}, + {}, + False, + id="missing_total", + ), + pytest.param( + {"input_tokens": "10", "output_tokens": 20, "total_tokens": 30}, + {}, + False, + id="wrong_type", + ), + pytest.param( + {"output_tokens": 20, "total_tokens": 30}, + {}, + False, + id="missing_field", ), ], ) def test_token_counts_from_lc_usage_metadata( - usage_metadata: UsageMetadata, expected: dict[str, int] + usage_metadata: dict[str, Any], expected: dict[str, int], is_valid: bool ) -> None: """Test _token_counts_from_lc_usage_metadata with various inputs.""" - result = dict(_token_counts_from_lc_usage_metadata(usage_metadata)) - assert result == expected + assert _is_lc_usage_metadata(usage_metadata) == is_valid + if _is_lc_usage_metadata(usage_metadata): + result = dict(_token_counts_from_lc_usage_metadata(usage_metadata)) + assert result == expected @pytest.mark.parametrize( - "usage,expected", + "usage,expected,is_valid", [ pytest.param( { @@ -230,83 +135,59 @@ def test_token_counts_from_lc_usage_metadata( "cache_read_input_tokens": 3, }, { - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5, - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, # 10 + 5 + 3 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3, }, - id="with_both_cache_types", - ), - pytest.param( - { - "input_tokens": 0, - "output_tokens": 0, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0, - SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0, - }, - id="zeros_no_cache_details", + True, + id="both", ), - ], -) -def test_token_counts_from_raw_anthropic_usage_with_cache( - usage: dict[str, int], expected: dict[str, int] -) -> None: - """Test Anthropic usage with both cache creation and read.""" - result = dict(_token_counts_from_raw_anthropic_usage_with_cache(usage)) # type: ignore - assert result == expected - - -@pytest.mark.parametrize( - "usage,expected", - [ pytest.param( + {"input_tokens": 15, "output_tokens": 25, "cache_creation_input_tokens": 8}, { - "input_tokens": 15, - "output_tokens": 25, - "cache_creation_input_tokens": 8, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 23, # 15 + 8 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8, }, - id="with_cache_creation", + True, + id="write", ), - ], -) -def test_token_counts_from_raw_anthropic_usage_with_cache_creation( - usage: dict[str, int], expected: dict[str, int] -) -> None: - """Test Anthropic usage with cache creation only.""" - result = dict(_token_counts_from_raw_anthropic_usage_with_cache_creation(usage)) # type: ignore - assert result == expected - - -@pytest.mark.parametrize( - "usage,expected", - [ pytest.param( + {"input_tokens": 12, "output_tokens": 18, "cache_read_input_tokens": 6}, { - "input_tokens": 12, - "output_tokens": 18, - "cache_read_input_tokens": 6, - }, - { - SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6, SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18, # 12 + 6 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 18, + SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6, }, - id="with_cache_read", + True, + id="read", + ), + pytest.param( + {"input_tokens": 10, "output_tokens": 20}, + {}, + False, + id="no_cache", + ), + pytest.param( + {"input_tokens": "10", "output_tokens": 20, "cache_read_input_tokens": 5}, + {}, + False, + id="wrong_type", + ), + pytest.param( + {"output_tokens": 20, "cache_read_input_tokens": 5}, + {}, + False, + id="missing_field", ), ], ) -def test_token_counts_from_raw_anthropic_usage_with_cache_read( - usage: dict[str, int], expected: dict[str, int] +def test_token_counts_from_raw_anthropic_usage( + usage: dict[str, Any], expected: dict[str, int], is_valid: bool ) -> None: - """Test Anthropic usage with cache read only.""" - result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read(usage)) # type: ignore - assert result == expected + """Test Anthropic usage with cache.""" + assert _is_raw_anthropic_usage_with_cache_read_or_write(usage) == is_valid + if _is_raw_anthropic_usage_with_cache_read_or_write(usage): + result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read_or_write(usage)) + assert result == expected From 8f3b955c4298ce819984a174baeb2f09157222ae Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Fri, 7 Nov 2025 09:17:31 -0800 Subject: [PATCH 5/6] clean up --- .../tests/test_token_counts.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py index deaea82fe2..c99d3b67e5 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py +++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py @@ -94,6 +94,37 @@ True, id="zeros", ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "input_token_details": {"cache_creation": 0, "cache_read": 0}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + }, + True, + id="zero_cache_no_details", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + "input_token_details": {}, + "output_token_details": {}, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30, + }, + True, + id="empty_details", + ), pytest.param( {"input_tokens": 10, "output_tokens": 20}, {}, @@ -163,6 +194,29 @@ def test_token_counts_from_lc_usage_metadata( True, id="read", ), + pytest.param( + {"input_tokens": 10, "output_tokens": 20, "cache_creation_input_tokens": 0}, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + True, + id="zero_cache_write", + ), + pytest.param( + { + "input_tokens": 10, + "output_tokens": 20, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + }, + { + SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10, + SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20, + }, + True, + id="zero_both_cache", + ), pytest.param( {"input_tokens": 10, "output_tokens": 20}, {}, From 30a6e953bdda7258a552b13a8b73c0b9d91649c0 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 10 Nov 2025 13:08:19 -0800 Subject: [PATCH 6/6] add parentheses --- .../openinference/instrumentation/langchain/_tracer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py index 8615cb488c..cdf220b9d4 100644 --- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py +++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py @@ -852,10 +852,11 @@ def _is_raw_anthropic_usage_with_cache_read_or_write( and isinstance(obj["input_tokens"], int) and isinstance(obj["output_tokens"], int) and ( - "cache_read_input_tokens" in obj - and isinstance(obj["cache_read_input_tokens"], int) - or "cache_creation_input_tokens" in obj - and isinstance(obj["cache_creation_input_tokens"], int) + ("cache_read_input_tokens" in obj and isinstance(obj["cache_read_input_tokens"], int)) + or ( + "cache_creation_input_tokens" in obj + and isinstance(obj["cache_creation_input_tokens"], int) + ) ) )