From 2d830f01e90ec59f115e6e5c76d6cb1c3edd5fcd Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 6 Nov 2025 08:54:07 -0800
Subject: [PATCH 1/6] fix: anthropic cache token count

---
 .../instrumentation/langchain/_tracer.py      | 211 ++++++++++--
 .../tests/test_token_counts.py                | 312 ++++++++++++++++++
 2 files changed, 490 insertions(+), 33 deletions(-)
 create mode 100644 python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py

diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
index 3b72f3bf09..ae8e194918 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
@@ -25,6 +25,7 @@
     Optional,
     Sequence,
     Tuple,
+    TypedDict,
     TypeVar,
     Union,
     cast,
@@ -35,6 +36,7 @@
 
 import wrapt  # type: ignore
 from langchain_core.messages import BaseMessage
+from langchain_core.messages.ai import UsageMetadata
 from langchain_core.tracers import BaseTracer, LangChainTracer
 from langchain_core.tracers.schemas import Run
 from opentelemetry import context as context_api
@@ -43,6 +45,7 @@
 from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes
 from opentelemetry.trace import Span
 from opentelemetry.util.types import AttributeValue
+from typing_extensions import TypeGuard
 from wrapt import ObjectProxy
 
 from openinference.instrumentation import get_attributes_from_context, safe_json_dumps
@@ -832,6 +835,172 @@ def _model_name(
             return
 
 
+class _HasInputAndOutputTokens(TypedDict):
+    input_tokens: int
+    output_tokens: int
+
+
+class _RawAnthropicUsageWithCache(_HasInputAndOutputTokens):
+    # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+
+
+class _RawAnthropicUsageWithCacheRead(_HasInputAndOutputTokens):
+    cache_read_input_tokens: int
+
+
+class _RawAnthropicUsageWithCacheCreation(_HasInputAndOutputTokens):
+    cache_creation_input_tokens: int
+
+
+def _is_raw_anthropic_usage_with_cache(
+    obj: Mapping[str, Any],
+) -> TypeGuard[_RawAnthropicUsageWithCache]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and "cache_creation_input_tokens" in obj
+        and "cache_read_input_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and isinstance(obj["cache_creation_input_tokens"], int)
+        and isinstance(obj["cache_read_input_tokens"], int)
+    )
+
+
+def _token_counts_from_raw_anthropic_usage_with_cache(
+    obj: _RawAnthropicUsageWithCache,
+) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+
+    if cache_creation_input_tokens := obj["cache_creation_input_tokens"]:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
+
+    if cache_read_input_tokens := obj["cache_read_input_tokens"]:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens
+
+    prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
+    completion_tokens = output_tokens
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+
+def _is_raw_anthropic_usage_with_cache_creation(
+    obj: Mapping[str, Any],
+) -> TypeGuard[_RawAnthropicUsageWithCacheCreation]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and "cache_creation_input_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and isinstance(obj["cache_creation_input_tokens"], int)
+    )
+
+
+def _token_counts_from_raw_anthropic_usage_with_cache_creation(
+    obj: _RawAnthropicUsageWithCacheCreation,
+) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+
+    if cache_creation_input_tokens := obj["cache_creation_input_tokens"]:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
+
+    prompt_tokens = input_tokens + cache_creation_input_tokens
+    completion_tokens = output_tokens
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+
+def _is_raw_anthropic_usage_with_cache_read(
+    obj: Mapping[str, Any],
+) -> TypeGuard[_RawAnthropicUsageWithCacheRead]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and "cache_read_input_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and isinstance(obj["cache_read_input_tokens"], int)
+    )
+
+
+def _token_counts_from_raw_anthropic_usage_with_cache_read(
+    obj: _RawAnthropicUsageWithCacheRead,
+) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+
+    if cache_read_input_tokens := obj["cache_read_input_tokens"]:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens
+
+    prompt_tokens = input_tokens + cache_read_input_tokens
+    completion_tokens = output_tokens
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+
+def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]:
+    return (
+        "input_tokens" in obj
+        and "output_tokens" in obj
+        and "total_tokens" in obj
+        and isinstance(obj["input_tokens"], int)
+        and isinstance(obj["output_tokens"], int)
+        and isinstance(obj["total_tokens"], int)
+    )
+
+
+def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[str, int]]:
+    input_tokens = obj["input_tokens"]
+    output_tokens = obj["output_tokens"]
+    total_tokens = obj["total_tokens"]
+
+    yield LLM_TOKEN_COUNT_TOTAL, total_tokens
+
+    if input_token_details := (obj.get("input_token_details") or {}):
+        if audio := input_token_details.get("audio"):
+            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, audio
+        if cache_creation := input_token_details.get("cache_creation"):
+            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation
+        if cache_read := input_token_details.get("cache_read"):
+            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read
+
+    if output_token_details := (obj.get("output_token_details") or {}):
+        if audio := output_token_details.get("audio"):
+            yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, audio
+        if reasoning := output_token_details.get("reasoning"):
+            yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, reasoning
+
+    if total_tokens == input_tokens + output_tokens:
+        prompt_tokens = input_tokens
+        completion_tokens = output_tokens
+    else:
+        prompt_tokens = (
+            input_tokens
+            + (input_token_details.get("audio") or 0)
+            + (input_token_details.get("cache_creation") or 0)
+            + (input_token_details.get("cache_read") or 0)
+        )
+        if total_tokens == prompt_tokens + output_tokens:
+            completion_tokens = output_tokens
+        else:
+            completion_tokens = (
+                output_tokens
+                + (output_token_details.get("audio") or 0)
+                + (output_token_details.get("reasoning") or 0)
+            )
+
+    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+
 @stop_on_exception
 def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]:
     """Yields token count information if present."""
@@ -894,40 +1063,16 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
             if (token_count := _get_first_value(details, keys)) is not None:
                 yield attribute_name, token_count
 
+    if _is_raw_anthropic_usage_with_cache(token_usage):
+        yield from _token_counts_from_raw_anthropic_usage_with_cache(token_usage)
+    elif _is_raw_anthropic_usage_with_cache_read(token_usage):
+        yield from _token_counts_from_raw_anthropic_usage_with_cache_read(token_usage)
+    elif _is_raw_anthropic_usage_with_cache_creation(token_usage):
+        yield from _token_counts_from_raw_anthropic_usage_with_cache_creation(token_usage)
+
     # maps langchain_core.messages.ai.UsageMetadata object
-    for attribute_name, details_key_or_none, keys in [
-        (LLM_TOKEN_COUNT_PROMPT, None, ("input_tokens",)),
-        (LLM_TOKEN_COUNT_COMPLETION, None, ("output_tokens",)),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO,
-            "input_token_details",
-            ("audio",),
-        ),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE,
-            "input_token_details",
-            ("cache_creation",),
-        ),
-        (
-            LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
-            "input_token_details",
-            ("cache_read",),
-        ),
-        (
-            LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO,
-            "output_token_details",
-            ("audio",),
-        ),
-        (
-            LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
-            "output_token_details",
-            ("reasoning",),
-        ),
-    ]:
-        details = token_usage.get(details_key_or_none) if details_key_or_none else token_usage
-        if details is not None:
-            if (token_count := _get_first_value(details, keys)) is not None:
-                yield attribute_name, token_count
+    if _is_lc_usage_metadata(token_usage):
+        yield from _token_counts_from_lc_usage_metadata(token_usage)
 
 
 def _parse_token_usage_for_vertexai(
diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
new file mode 100644
index 0000000000..7ab4f35dcf
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
@@ -0,0 +1,312 @@
+import pytest
+from langchain_core.messages.ai import UsageMetadata
+
+from openinference.instrumentation.langchain._tracer import (
+    _token_counts_from_lc_usage_metadata,
+    _token_counts_from_raw_anthropic_usage_with_cache,
+    _token_counts_from_raw_anthropic_usage_with_cache_creation,
+    _token_counts_from_raw_anthropic_usage_with_cache_read,
+)
+from openinference.semconv.trace import SpanAttributes
+
+
+@pytest.mark.parametrize(
+    "usage_metadata,expected",
+    [
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="basic_token_counts",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 35,
+                "input_token_details": {"audio": 5},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="input_audio_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 38,
+                "input_token_details": {"cache_creation": 8},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 38,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="input_cache_creation_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 33,
+                "input_token_details": {"cache_read": 3},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 33,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 13,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="input_cache_read_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 37,
+                "output_token_details": {"audio": 7},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 37,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 7,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 27,
+            },
+            id="output_audio_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 35,
+                "output_token_details": {"reasoning": 5},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25,
+            },
+            id="output_reasoning_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 50,
+                "input_token_details": {
+                    "audio": 5,
+                    "cache_creation": 3,
+                    "cache_read": 2,
+                },
+                "output_token_details": {
+                    "audio": 6,
+                    "reasoning": 4,
+                },
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 50,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 3,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 2,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 6,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 4,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 30,
+            },
+            id="all_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "input_token_details": {"audio": 5},
+                "output_token_details": {"reasoning": 3},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="total_equals_sum_no_detail_adjustment",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "input_token_details": {},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="empty_input_token_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "output_token_details": {},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="empty_output_token_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "total_tokens": 0,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0,
+            },
+            id="zero_values",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "input_token_details": {
+                    "audio": None,
+                    "cache_creation": 5,
+                },
+                "output_token_details": {
+                    "audio": 3,
+                    "reasoning": None,
+                },
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 3,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="none_values_not_yielded",
+        ),
+    ],
+)
+def test_token_counts_from_lc_usage_metadata(
+    usage_metadata: UsageMetadata, expected: dict[str, int]
+) -> None:
+    """Test _token_counts_from_lc_usage_metadata with various inputs."""
+    result = dict(_token_counts_from_lc_usage_metadata(usage_metadata))
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "usage,expected",
+    [
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "cache_creation_input_tokens": 5,
+                "cache_read_input_tokens": 3,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,  # 10 + 5 + 3
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            id="with_both_cache_types",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0,
+            },
+            id="zeros_no_cache_details",
+        ),
+    ],
+)
+def test_token_counts_from_raw_anthropic_usage_with_cache(
+    usage: dict[str, int], expected: dict[str, int]
+) -> None:
+    """Test Anthropic usage with both cache creation and read."""
+    result = dict(_token_counts_from_raw_anthropic_usage_with_cache(usage))  # type: ignore
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "usage,expected",
+    [
+        pytest.param(
+            {
+                "input_tokens": 15,
+                "output_tokens": 25,
+                "cache_creation_input_tokens": 8,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 23,  # 15 + 8
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25,
+            },
+            id="with_cache_creation",
+        ),
+    ],
+)
+def test_token_counts_from_raw_anthropic_usage_with_cache_creation(
+    usage: dict[str, int], expected: dict[str, int]
+) -> None:
+    """Test Anthropic usage with cache creation only."""
+    result = dict(_token_counts_from_raw_anthropic_usage_with_cache_creation(usage))  # type: ignore
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "usage,expected",
+    [
+        pytest.param(
+            {
+                "input_tokens": 12,
+                "output_tokens": 18,
+                "cache_read_input_tokens": 6,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,  # 12 + 6
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 18,
+            },
+            id="with_cache_read",
+        ),
+    ],
+)
+def test_token_counts_from_raw_anthropic_usage_with_cache_read(
+    usage: dict[str, int], expected: dict[str, int]
+) -> None:
+    """Test Anthropic usage with cache read only."""
+    result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read(usage))  # type: ignore
+    assert result == expected

From b7f65e385d196aa845b358acc2adf0aee019fa5e Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 6 Nov 2025 09:52:14 -0800
Subject: [PATCH 2/6] fix test

---
 .../tests/test_instrumentor.py                                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py
index d2da135a78..35c86668cd 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_instrumentor.py
@@ -590,7 +590,7 @@ def test_anthropic_token_counts(
     span = spans[0]
     llm_attributes = dict(span.attributes or {})
     assert llm_attributes.pop(OPENINFERENCE_SPAN_KIND, None) == LLM.value
-    assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 22
+    assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 33
     assert llm_attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) == 5
     assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE) == 2
     assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ) == 9

From 30538fad4a8de9534b660e443a9b225f750d10f2 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 6 Nov 2025 12:38:12 -0800
Subject: [PATCH 3/6] clean up

---
 .../pyproject.toml                            |  4 +-
 .../instrumentation/langchain/_tracer.py      | 58 +++++++++++--------
 python/tox.ini                                |  1 +
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml b/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml
index ddc9b3786c..d069873eed 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-langchain/pyproject.toml
@@ -36,7 +36,7 @@ dependencies = [
 
 [project.optional-dependencies]
 instruments = [
-  "langchain_core >= 0.2.43",
+  "langchain_core >= 0.3.9",
 ]
 test = [
   "langchain_core == 0.3.50",
@@ -55,7 +55,7 @@ test = [
   "vcrpy>=6.0.1",
 ]
 type-check = [
-  "langchain_core == 0.2.43",
+  "langchain_core == 0.3.9",
 ]
 
 [project.entry-points.opentelemetry_instrumentor]
diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
index ae8e194918..9458eed709 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
@@ -964,42 +964,52 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s
 
     yield LLM_TOKEN_COUNT_TOTAL, total_tokens
 
-    if input_token_details := (obj.get("input_token_details") or {}):
-        if audio := input_token_details.get("audio"):
-            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, audio
-        if cache_creation := input_token_details.get("cache_creation"):
-            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation
-        if cache_read := input_token_details.get("cache_read"):
-            yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read
-
-    if output_token_details := (obj.get("output_token_details") or {}):
-        if audio := output_token_details.get("audio"):
-            yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, audio
-        if reasoning := output_token_details.get("reasoning"):
-            yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, reasoning
+    input_audio = 0
+    input_cache_creation = 0
+    input_cache_read = 0
+    output_audio = 0
+    output_reasoning = 0
+
+    if "input_token_details" in obj:
+        input_token_details = obj["input_token_details"]
+        if "audio" in input_token_details:
+            input_audio = input_token_details["audio"]
+        if "cache_creation" in input_token_details:
+            input_cache_creation = input_token_details["cache_creation"]
+        if "cache_read" in input_token_details:
+            input_cache_read = input_token_details["cache_read"]
+
+    if "output_token_details" in obj:
+        output_token_details = obj["output_token_details"]
+        if "audio" in output_token_details:
+            output_audio = output_token_details["audio"]
+        if "reasoning" in output_token_details:
+            output_reasoning = output_token_details["reasoning"]
 
     if total_tokens == input_tokens + output_tokens:
         prompt_tokens = input_tokens
         completion_tokens = output_tokens
     else:
-        prompt_tokens = (
-            input_tokens
-            + (input_token_details.get("audio") or 0)
-            + (input_token_details.get("cache_creation") or 0)
-            + (input_token_details.get("cache_read") or 0)
-        )
+        prompt_tokens = input_tokens + input_audio + input_cache_creation + input_cache_read
         if total_tokens == prompt_tokens + output_tokens:
             completion_tokens = output_tokens
         else:
-            completion_tokens = (
-                output_tokens
-                + (output_token_details.get("audio") or 0)
-                + (output_token_details.get("reasoning") or 0)
-            )
+            completion_tokens = output_tokens + output_audio + output_reasoning
 
     yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
     yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
 
+    if input_audio:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio
+    if input_cache_creation:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, input_cache_creation
+    if input_cache_read:
+        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, input_cache_read
+    if output_audio:
+        yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, output_audio
+    if output_reasoning:
+        yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, output_reasoning
+
 
 @stop_on_exception
 def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]:
diff --git a/python/tox.ini b/python/tox.ini
index ab56dd791e..a0980b900a 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -162,6 +162,7 @@ commands_pre =
   openllmetry-latest: uv pip install -U opentelemetry-instrumentation-openai
   openlit: uv pip install --reinstall {toxinidir}/instrumentation/openinference-instrumentation-openlit[test]
   openlit-latest: uv pip install -U openlit
+  uv pip list -v
 
 commands =
   ruff: ruff format .

From 4c0a08c63b262176385cc84adb7fd5f3970153b4 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Fri, 7 Nov 2025 09:14:32 -0800
Subject: [PATCH 4/6] clean up

---
 .../instrumentation/langchain/_tracer.py      | 146 +++------
 .../tests/test_token_counts.py                | 303 ++++++------------
 2 files changed, 133 insertions(+), 316 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
index 9458eed709..8615cb488c 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
@@ -45,7 +45,7 @@
 from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes
 from opentelemetry.trace import Span
 from opentelemetry.util.types import AttributeValue
-from typing_extensions import TypeGuard
+from typing_extensions import NotRequired, TypeGuard
 from wrapt import ObjectProxy
 
 from openinference.instrumentation import get_attributes_from_context, safe_json_dumps
@@ -835,51 +835,44 @@ def _model_name(
             return
 
 
-class _HasInputAndOutputTokens(TypedDict):
+class _RawAnthropicUsageWithCacheReadOrWrite(TypedDict):
+    # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13
     input_tokens: int
     output_tokens: int
+    cache_read_input_tokens: NotRequired[int]
+    cache_creation_input_tokens: NotRequired[int]
 
 
-class _RawAnthropicUsageWithCache(_HasInputAndOutputTokens):
-    # https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13
-    cache_creation_input_tokens: int
-    cache_read_input_tokens: int
-
-
-class _RawAnthropicUsageWithCacheRead(_HasInputAndOutputTokens):
-    cache_read_input_tokens: int
-
-
-class _RawAnthropicUsageWithCacheCreation(_HasInputAndOutputTokens):
-    cache_creation_input_tokens: int
-
-
-def _is_raw_anthropic_usage_with_cache(
+def _is_raw_anthropic_usage_with_cache_read_or_write(
     obj: Mapping[str, Any],
-) -> TypeGuard[_RawAnthropicUsageWithCache]:
+) -> TypeGuard[_RawAnthropicUsageWithCacheReadOrWrite]:
     return (
         "input_tokens" in obj
         and "output_tokens" in obj
-        and "cache_creation_input_tokens" in obj
-        and "cache_read_input_tokens" in obj
         and isinstance(obj["input_tokens"], int)
         and isinstance(obj["output_tokens"], int)
-        and isinstance(obj["cache_creation_input_tokens"], int)
-        and isinstance(obj["cache_read_input_tokens"], int)
+        and (
+            "cache_read_input_tokens" in obj
+            and isinstance(obj["cache_read_input_tokens"], int)
+            or "cache_creation_input_tokens" in obj
+            and isinstance(obj["cache_creation_input_tokens"], int)
+        )
     )
 
 
-def _token_counts_from_raw_anthropic_usage_with_cache(
-    obj: _RawAnthropicUsageWithCache,
+def _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(
+    obj: _RawAnthropicUsageWithCacheReadOrWrite,
 ) -> Iterator[Tuple[str, int]]:
     input_tokens = obj["input_tokens"]
     output_tokens = obj["output_tokens"]
 
-    if cache_creation_input_tokens := obj["cache_creation_input_tokens"]:
-        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
+    cache_creation_input_tokens = 0
+    cache_read_input_tokens = 0
 
-    if cache_read_input_tokens := obj["cache_read_input_tokens"]:
-        yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens
+    if "cache_creation_input_tokens" in obj:
+        cache_creation_input_tokens = obj["cache_creation_input_tokens"]
+    if "cache_read_input_tokens" in obj:
+        cache_read_input_tokens = obj["cache_read_input_tokens"]
 
     prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
     completion_tokens = output_tokens
@@ -887,64 +880,11 @@ def _token_counts_from_raw_anthropic_usage_with_cache(
     yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
     yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
 
-
-def _is_raw_anthropic_usage_with_cache_creation(
-    obj: Mapping[str, Any],
-) -> TypeGuard[_RawAnthropicUsageWithCacheCreation]:
-    return (
-        "input_tokens" in obj
-        and "output_tokens" in obj
-        and "cache_creation_input_tokens" in obj
-        and isinstance(obj["input_tokens"], int)
-        and isinstance(obj["output_tokens"], int)
-        and isinstance(obj["cache_creation_input_tokens"], int)
-    )
-
-
-def _token_counts_from_raw_anthropic_usage_with_cache_creation(
-    obj: _RawAnthropicUsageWithCacheCreation,
-) -> Iterator[Tuple[str, int]]:
-    input_tokens = obj["input_tokens"]
-    output_tokens = obj["output_tokens"]
-
-    if cache_creation_input_tokens := obj["cache_creation_input_tokens"]:
+    if cache_creation_input_tokens:
         yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
-
-    prompt_tokens = input_tokens + cache_creation_input_tokens
-    completion_tokens = output_tokens
-
-    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
-    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
-
-
-def _is_raw_anthropic_usage_with_cache_read(
-    obj: Mapping[str, Any],
-) -> TypeGuard[_RawAnthropicUsageWithCacheRead]:
-    return (
-        "input_tokens" in obj
-        and "output_tokens" in obj
-        and "cache_read_input_tokens" in obj
-        and isinstance(obj["input_tokens"], int)
-        and isinstance(obj["output_tokens"], int)
-        and isinstance(obj["cache_read_input_tokens"], int)
-    )
-
-
-def _token_counts_from_raw_anthropic_usage_with_cache_read(
-    obj: _RawAnthropicUsageWithCacheRead,
-) -> Iterator[Tuple[str, int]]:
-    input_tokens = obj["input_tokens"]
-    output_tokens = obj["output_tokens"]
-
-    if cache_read_input_tokens := obj["cache_read_input_tokens"]:
+    if cache_read_input_tokens:
         yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens
 
-    prompt_tokens = input_tokens + cache_read_input_tokens
-    completion_tokens = output_tokens
-
-    yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
-    yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
-
 
 def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]:
     return (
@@ -962,8 +902,6 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s
     output_tokens = obj["output_tokens"]
     total_tokens = obj["total_tokens"]
 
-    yield LLM_TOKEN_COUNT_TOTAL, total_tokens
-
     input_audio = 0
     input_cache_creation = 0
     input_cache_read = 0
@@ -986,18 +924,23 @@ def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[s
         if "reasoning" in output_token_details:
             output_reasoning = output_token_details["reasoning"]
 
-    if total_tokens == input_tokens + output_tokens:
-        prompt_tokens = input_tokens
-        completion_tokens = output_tokens
-    else:
-        prompt_tokens = input_tokens + input_audio + input_cache_creation + input_cache_read
-        if total_tokens == prompt_tokens + output_tokens:
-            completion_tokens = output_tokens
-        else:
-            completion_tokens = output_tokens + output_audio + output_reasoning
+    prompt_tokens = input_tokens
+    completion_tokens = output_tokens
+
+    # heuristic adjustment for Bedrock Anthropic models with cache read or write
+    # https://github.com/Arize-ai/openinference/issues/2381
+    if input_cache := input_cache_creation + input_cache_read:
+        if total_tokens == input_tokens + output_tokens + input_cache:
+            # for Bedrock Converse
+            prompt_tokens += input_cache
+        elif input_tokens < input_cache:
+            # for Bedrock InvokeModel
+            prompt_tokens += input_cache
+            total_tokens += input_cache
 
     yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
     yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+    yield LLM_TOKEN_COUNT_TOTAL, total_tokens
 
     if input_audio:
         yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio
@@ -1022,12 +965,12 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
         )
     ):
         return
+    keys: Sequence[str]
     for attribute_name, keys in [
         (
             LLM_TOKEN_COUNT_PROMPT,
             (
                 "prompt_tokens",
-                "input_tokens",  # Anthropic-specific key
                 "prompt_token_count",  # Gemini-specific key - https://ai.google.dev/gemini-api/docs/tokens?lang=python
             ),
         ),
@@ -1035,13 +978,10 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
             LLM_TOKEN_COUNT_COMPLETION,
             (
                 "completion_tokens",
-                "output_tokens",  # Anthropic-specific key
                 "candidates_token_count",  # Gemini-specific key
             ),
         ),
         (LLM_TOKEN_COUNT_TOTAL, ("total_tokens", "total_token_count")),  # Gemini-specific key
-        (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, ("cache_read_input_tokens",)),  # Antrhopic
-        (LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, ("cache_creation_input_tokens",)),  # Antrhopic
     ]:
         if (token_count := _get_first_value(token_usage, keys)) is not None:
             yield attribute_name, token_count
@@ -1073,17 +1013,13 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
             if (token_count := _get_first_value(details, keys)) is not None:
                 yield attribute_name, token_count
 
-    if _is_raw_anthropic_usage_with_cache(token_usage):
-        yield from _token_counts_from_raw_anthropic_usage_with_cache(token_usage)
-    elif _is_raw_anthropic_usage_with_cache_read(token_usage):
-        yield from _token_counts_from_raw_anthropic_usage_with_cache_read(token_usage)
-    elif _is_raw_anthropic_usage_with_cache_creation(token_usage):
-        yield from _token_counts_from_raw_anthropic_usage_with_cache_creation(token_usage)
-
     # maps langchain_core.messages.ai.UsageMetadata object
     if _is_lc_usage_metadata(token_usage):
         yield from _token_counts_from_lc_usage_metadata(token_usage)
 
+    if _is_raw_anthropic_usage_with_cache_read_or_write(token_usage):
+        yield from _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(token_usage)
+
 
 def _parse_token_usage_for_vertexai(
     outputs: Optional[Mapping[str, Any]],
diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
index 7ab4f35dcf..deaea82fe2 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
@@ -1,17 +1,18 @@
+from typing import Any
+
 import pytest
-from langchain_core.messages.ai import UsageMetadata
 
 from openinference.instrumentation.langchain._tracer import (
+    _is_lc_usage_metadata,
+    _is_raw_anthropic_usage_with_cache_read_or_write,
     _token_counts_from_lc_usage_metadata,
-    _token_counts_from_raw_anthropic_usage_with_cache,
-    _token_counts_from_raw_anthropic_usage_with_cache_creation,
-    _token_counts_from_raw_anthropic_usage_with_cache_read,
+    _token_counts_from_raw_anthropic_usage_with_cache_read_or_write,
 )
 from openinference.semconv.trace import SpanAttributes
 
 
 @pytest.mark.parametrize(
-    "usage_metadata,expected",
+    "usage_metadata,expected,is_valid",
     [
         pytest.param(
             {
@@ -20,158 +21,64 @@
                 "total_tokens": 30,
             },
             {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
             },
-            id="basic_token_counts",
+            True,
+            id="basic",
         ),
         pytest.param(
             {
                 "input_tokens": 10,
                 "output_tokens": 20,
                 "total_tokens": 35,
-                "input_token_details": {"audio": 5},
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                "input_token_details": {"cache_creation": 3, "cache_read": 2},
             },
-            id="input_audio_details",
-        ),
-        pytest.param(
             {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 38,
-                "input_token_details": {"cache_creation": 8},
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 38,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 15,  # 10 + 3 + 2
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
-            },
-            id="input_cache_creation_details",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 33,
-                "input_token_details": {"cache_read": 3},
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 33,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 13,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
-            },
-            id="input_cache_read_details",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 37,
-                "output_token_details": {"audio": 7},
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 37,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 7,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 27,
-            },
-            id="output_audio_details",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 35,
-                "output_token_details": {"reasoning": 5},
-            },
-            {
                 SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 35,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 5,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25,
-            },
-            id="output_reasoning_details",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 50,
-                "input_token_details": {
-                    "audio": 5,
-                    "cache_creation": 3,
-                    "cache_read": 2,
-                },
-                "output_token_details": {
-                    "audio": 6,
-                    "reasoning": 4,
-                },
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 50,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 3,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 2,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 6,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 4,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 20,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 30,
             },
-            id="all_details",
+            True,
+            id="bedrock_converse",
         ),
         pytest.param(
             {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30,
-                "input_token_details": {"audio": 5},
-                "output_token_details": {"reasoning": 3},
+                "input_tokens": 5,
+                "output_tokens": 10,
+                "total_tokens": 15,
+                "input_token_details": {"cache_creation": 20, "cache_read": 10},
             },
             {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 35,  # 5 + 20 + 10
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 45,  # adjusted
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 10,
             },
-            id="total_equals_sum_no_detail_adjustment",
+            True,
+            id="bedrock_invokemodel",
         ),
         pytest.param(
             {
                 "input_tokens": 10,
                 "output_tokens": 20,
                 "total_tokens": 30,
-                "input_token_details": {},
+                "input_token_details": {"audio": 5},
+                "output_token_details": {"reasoning": 3},
             },
             {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
-            },
-            id="empty_input_token_details",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30,
-                "output_token_details": {},
-            },
-            {
                 SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING: 3,
             },
-            id="empty_output_token_details",
+            True,
+            id="non_cache_details",
         ),
         pytest.param(
             {
@@ -180,47 +87,45 @@
                 "total_tokens": 0,
             },
             {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0,
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0,
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 0,
             },
-            id="zero_values",
+            True,
+            id="zeros",
         ),
         pytest.param(
-            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30,
-                "input_token_details": {
-                    "audio": None,
-                    "cache_creation": 5,
-                },
-                "output_token_details": {
-                    "audio": 3,
-                    "reasoning": None,
-                },
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO: 3,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
-            },
-            id="none_values_not_yielded",
+            {"input_tokens": 10, "output_tokens": 20},
+            {},
+            False,
+            id="missing_total",
+        ),
+        pytest.param(
+            {"input_tokens": "10", "output_tokens": 20, "total_tokens": 30},
+            {},
+            False,
+            id="wrong_type",
+        ),
+        pytest.param(
+            {"output_tokens": 20, "total_tokens": 30},
+            {},
+            False,
+            id="missing_field",
         ),
     ],
 )
 def test_token_counts_from_lc_usage_metadata(
-    usage_metadata: UsageMetadata, expected: dict[str, int]
+    usage_metadata: dict[str, Any], expected: dict[str, int], is_valid: bool
 ) -> None:
     """Test _token_counts_from_lc_usage_metadata with various inputs."""
-    result = dict(_token_counts_from_lc_usage_metadata(usage_metadata))
-    assert result == expected
+    assert _is_lc_usage_metadata(usage_metadata) == is_valid
+    if _is_lc_usage_metadata(usage_metadata):
+        result = dict(_token_counts_from_lc_usage_metadata(usage_metadata))
+        assert result == expected
 
 
 @pytest.mark.parametrize(
-    "usage,expected",
+    "usage,expected,is_valid",
     [
         pytest.param(
             {
@@ -230,83 +135,59 @@ def test_token_counts_from_lc_usage_metadata(
                 "cache_read_input_tokens": 3,
             },
             {
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5,
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,  # 10 + 5 + 3
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 5,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 3,
             },
-            id="with_both_cache_types",
-        ),
-        pytest.param(
-            {
-                "input_tokens": 0,
-                "output_tokens": 0,
-                "cache_creation_input_tokens": 0,
-                "cache_read_input_tokens": 0,
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 0,
-                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 0,
-            },
-            id="zeros_no_cache_details",
+            True,
+            id="both",
         ),
-    ],
-)
-def test_token_counts_from_raw_anthropic_usage_with_cache(
-    usage: dict[str, int], expected: dict[str, int]
-) -> None:
-    """Test Anthropic usage with both cache creation and read."""
-    result = dict(_token_counts_from_raw_anthropic_usage_with_cache(usage))  # type: ignore
-    assert result == expected
-
-
-@pytest.mark.parametrize(
-    "usage,expected",
-    [
         pytest.param(
+            {"input_tokens": 15, "output_tokens": 25, "cache_creation_input_tokens": 8},
             {
-                "input_tokens": 15,
-                "output_tokens": 25,
-                "cache_creation_input_tokens": 8,
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 23,  # 15 + 8
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 25,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE: 8,
             },
-            id="with_cache_creation",
+            True,
+            id="write",
         ),
-    ],
-)
-def test_token_counts_from_raw_anthropic_usage_with_cache_creation(
-    usage: dict[str, int], expected: dict[str, int]
-) -> None:
-    """Test Anthropic usage with cache creation only."""
-    result = dict(_token_counts_from_raw_anthropic_usage_with_cache_creation(usage))  # type: ignore
-    assert result == expected
-
-
-@pytest.mark.parametrize(
-    "usage,expected",
-    [
         pytest.param(
+            {"input_tokens": 12, "output_tokens": 18, "cache_read_input_tokens": 6},
             {
-                "input_tokens": 12,
-                "output_tokens": 18,
-                "cache_read_input_tokens": 6,
-            },
-            {
-                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6,
                 SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 18,  # 12 + 6
                 SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 18,
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ: 6,
             },
-            id="with_cache_read",
+            True,
+            id="read",
+        ),
+        pytest.param(
+            {"input_tokens": 10, "output_tokens": 20},
+            {},
+            False,
+            id="no_cache",
+        ),
+        pytest.param(
+            {"input_tokens": "10", "output_tokens": 20, "cache_read_input_tokens": 5},
+            {},
+            False,
+            id="wrong_type",
+        ),
+        pytest.param(
+            {"output_tokens": 20, "cache_read_input_tokens": 5},
+            {},
+            False,
+            id="missing_field",
         ),
     ],
 )
-def test_token_counts_from_raw_anthropic_usage_with_cache_read(
-    usage: dict[str, int], expected: dict[str, int]
+def test_token_counts_from_raw_anthropic_usage(
+    usage: dict[str, Any], expected: dict[str, int], is_valid: bool
 ) -> None:
-    """Test Anthropic usage with cache read only."""
-    result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read(usage))  # type: ignore
-    assert result == expected
+    """Test Anthropic usage with cache."""
+    assert _is_raw_anthropic_usage_with_cache_read_or_write(usage) == is_valid
+    if _is_raw_anthropic_usage_with_cache_read_or_write(usage):
+        result = dict(_token_counts_from_raw_anthropic_usage_with_cache_read_or_write(usage))
+        assert result == expected

From 8f3b955c4298ce819984a174baeb2f09157222ae Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Fri, 7 Nov 2025 09:17:31 -0800
Subject: [PATCH 5/6] clean up

---
 .../tests/test_token_counts.py                | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
index deaea82fe2..c99d3b67e5 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/tests/test_token_counts.py
@@ -94,6 +94,37 @@
             True,
             id="zeros",
         ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "input_token_details": {"cache_creation": 0, "cache_read": 0},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+            },
+            True,
+            id="zero_cache_no_details",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "total_tokens": 30,
+                "input_token_details": {},
+                "output_token_details": {},
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+                SpanAttributes.LLM_TOKEN_COUNT_TOTAL: 30,
+            },
+            True,
+            id="empty_details",
+        ),
         pytest.param(
             {"input_tokens": 10, "output_tokens": 20},
             {},
@@ -163,6 +194,29 @@ def test_token_counts_from_lc_usage_metadata(
             True,
             id="read",
         ),
+        pytest.param(
+            {"input_tokens": 10, "output_tokens": 20, "cache_creation_input_tokens": 0},
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            True,
+            id="zero_cache_write",
+        ),
+        pytest.param(
+            {
+                "input_tokens": 10,
+                "output_tokens": 20,
+                "cache_creation_input_tokens": 0,
+                "cache_read_input_tokens": 0,
+            },
+            {
+                SpanAttributes.LLM_TOKEN_COUNT_PROMPT: 10,
+                SpanAttributes.LLM_TOKEN_COUNT_COMPLETION: 20,
+            },
+            True,
+            id="zero_both_cache",
+        ),
         pytest.param(
             {"input_tokens": 10, "output_tokens": 20},
             {},

From 30a6e953bdda7258a552b13a8b73c0b9d91649c0 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Mon, 10 Nov 2025 13:08:19 -0800
Subject: [PATCH 6/6] add parentheses

---
 .../openinference/instrumentation/langchain/_tracer.py   | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
index 8615cb488c..cdf220b9d4 100644
--- a/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
+++ b/python/instrumentation/openinference-instrumentation-langchain/src/openinference/instrumentation/langchain/_tracer.py
@@ -852,10 +852,11 @@ def _is_raw_anthropic_usage_with_cache_read_or_write(
         and isinstance(obj["input_tokens"], int)
         and isinstance(obj["output_tokens"], int)
         and (
-            "cache_read_input_tokens" in obj
-            and isinstance(obj["cache_read_input_tokens"], int)
-            or "cache_creation_input_tokens" in obj
-            and isinstance(obj["cache_creation_input_tokens"], int)
+            ("cache_read_input_tokens" in obj and isinstance(obj["cache_read_input_tokens"], int))
+            or (
+                "cache_creation_input_tokens" in obj
+                and isinstance(obj["cache_creation_input_tokens"], int)
+            )
         )
     )