Arize-ai · RogerHYang · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
@@ -36,7 +36,7 @@ dependencies = [
 
 [project.optional-dependencies]
 instruments = [
-  "langchain_core >= 0.2.43",
+  "langchain_core >= 0.3.9",
 ]
 test = [
   "langchain_core == 0.3.50",
@@ -55,7 +55,7 @@ test = [
   "vcrpy>=6.0.1",
 ]
 type-check = [
-  "langchain_core == 0.2.43",
+  "langchain_core == 0.3.9",
 ]
 
 [project.entry-points.opentelemetry_instrumentor]

@@ -25,6 +25,7 @@
     Optional,
     Sequence,
     Tuple,
+    TypedDict,
     TypeVar,
     Union,
     cast,
@@ -35,6 +36,7 @@
 
 import wrapt  # type: ignore
 from langchain_core.messages import BaseMessage
+from langchain_core.messages.ai import UsageMetadata
 from langchain_core.tracers import BaseTracer, LangChainTracer
 from langchain_core.tracers.schemas import Run
 from opentelemetry import context as context_api
@@ -43,6 +45,7 @@
 from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes
 from opentelemetry.trace import Span
 from opentelemetry.util.types import AttributeValue
+from typing_extensions import NotRequired, TypeGuard
 from wrapt import ObjectProxy
 
 from openinference.instrumentation import get_attributes_from_context, safe_json_dumps
@@ -832,6 +835,125 @@ def _model_name(
             return
 
 
+class _RawAnthropicUsageWithCacheReadOrWrite(TypedDict):
+    # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13
+    input_tokens: int
+    output_tokens: int
+    cache_read_input_tokens: NotRequired[int]
+    cache_creation_input_tokens: NotRequired[int]
+
+
+def _is_raw_anthropic_usage_with_cache_read_or_write(
+    obj: Mapping[str, Any],
+) -> TypeGuard[_RawAnthropicUsageWithCacheReadOrWrite]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and (
+            "cache_read_input_tokens" in obj
+            and isinstance(obj["cache_read_input_tokens"], int)
+            or "cache_creation_input_tokens" in obj
+            and isinstance(obj["cache_creation_input_tokens"], int)
+        )
+    )
+
+
+def _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(
+    obj: _RawAnthropicUsageWithCacheReadOrWrite,
+) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+
+    cache_creation_input_tokens = 0
+    cache_read_input_tokens = 0
+
+    if "cache_creation_input_tokens" in obj:
+        cache_creation_input_tokens = obj["cache_creation_input_tokens"]
+    if "cache_read_input_tokens" in obj:
+        cache_read_input_tokens = obj["cache_read_input_tokens"]
+
+    prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
+    completion_tokens = output_tokens
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+    if cache_creation_input_tokens:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
+    if cache_read_input_tokens:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens
+
+
+def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and "total_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and isinstance(obj["total_tokens"], int)
+    )
+
+
+def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+    total_tokens = obj["total_tokens"]
+
+    input_audio = 0
+    input_cache_creation = 0
+    input_cache_read = 0
+    output_audio = 0
+    output_reasoning = 0
+
+    if "input_token_details" in obj:
+        input_token_details = obj["input_token_details"]
+        if "audio" in input_token_details:
+            input_audio = input_token_details["audio"]
+        if "cache_creation" in input_token_details:
+            input_cache_creation = input_token_details["cache_creation"]
+        if "cache_read" in input_token_details:
+            input_cache_read = input_token_details["cache_read"]
+
+    if "output_token_details" in obj:
+        output_token_details = obj["output_token_details"]
+        if "audio" in output_token_details:
+            output_audio = output_token_details["audio"]
+        if "reasoning" in output_token_details:
+            output_reasoning = output_token_details["reasoning"]
+
+    prompt_tokens = input_tokens
+    completion_tokens = output_tokens
+
+    # heuristic adjustment for Bedrock Anthropic models with cache read or write
+    # https://github.com/Arize-ai/openinference/issues/2381
+    if input_cache := input_cache_creation + input_cache_read:
+        if total_tokens == input_tokens + output_tokens + input_cache:
+            # for Bedrock Converse
+            prompt_tokens += input_cache
+        elif input_tokens < input_cache:
+            # for Bedrock InvokeModel
+            prompt_tokens += input_cache
+            total_tokens += input_cache
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+    yield LLM_TOKEN_COUNT_TOTAL, total_tokens
+
+    if input_audio:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio
+    if input_cache_creation:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, input_cache_creation
+    if input_cache_read:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, input_cache_read
+    if output_audio:
+        yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, output_audio
+    if output_reasoning:
+        yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, output_reasoning
+
+
 @stop_on_exception
 def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]:
     """Yields token count information if present."""
@@ -843,26 +965,23 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
         )
     ):
         return
+    keys: Sequence[str]
     for attribute_name, keys in [
         (
             LLM_TOKEN_COUNT_PROMPT,
             (
                 "prompt_tokens",
-                "input_tokens",  # Anthropic-specific key
                 "prompt_token_count",  # Gemini-specific key - https://ai.google.dev/gemini-api/docs/tokens?lang=python
             ),
         ),
         (
             LLM_TOKEN_COUNT_COMPLETION,
             (
                 "completion_tokens",
-                "output_tokens",  # Anthropic-specific key
                 "candidates_token_count",  # Gemini-specific key
             ),
         ),
         (LLM_TOKEN_COUNT_TOTAL, ("total_tokens", "total_token_count")),  # Gemini-specific key
-        (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, ("cache_read_input_tokens",)),  # Antrhopic
-        (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, ("cache_creation_input_tokens",)),  # Antrhopic
     ]:
         if (token_count := _get_first_value(token_usage, keys)) is not None:
             yield attribute_name, token_count
@@ -895,39 +1014,11 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
                 yield attribute_name, token_count
 
     # maps langchain_core.messages.ai.UsageMetadata object
-    for attribute_name, details_key_or_none, keys in [
-        (LLM_TOKEN_COUNT_PROMPT, None, ("input_tokens",)),
-        (LLM_TOKEN_COUNT_COMPLETION, None, ("output_tokens",)),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO,
-            "input_token_details",
-            ("audio",),
-        ),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE,
-            "input_token_details",
-            ("cache_creation",),
-        ),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
-            "input_token_details",
-            ("cache_read",),
-        ),
-        (
-            LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO,
-            "output_token_details",
-            ("audio",),
-        ),
-        (
-            LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
-            "output_token_details",
-            ("reasoning",),
-        ),
-    ]:
-        details = token_usage.get(details_key_or_none) if details_key_or_none else token_usage
-        if details is not None:
-            if (token_count := _get_first_value(details, keys)) is not None:
-                yield attribute_name, token_count
+    if _is_lc_usage_metadata(token_usage):
+        yield from _token_counts_from_lc_usage_metadata(token_usage)
+
+    if _is_raw_anthropic_usage_with_cache_read_or_write(token_usage):
+        yield from _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(token_usage)
 
 
 def _parse_token_usage_for_vertexai(

@@ -590,7 +590,7 @@ def test_anthropic_token_counts(
     span = spans[0]
     llm_attributes = dict(span.attributes or {})
     assert llm_attributes.pop(OPENINFERENCE_SPAN_KIND, None) == LLM.value
-    assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 22
+    assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 33
     assert llm_attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) == 5
     assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE) == 2
     assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ) == 9