Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ dependencies = [

[project.optional-dependencies]
instruments = [
"langchain_core >= 0.2.43",
"langchain_core >= 0.3.9",
]
test = [
"langchain_core == 0.3.50",
Expand All @@ -55,7 +55,7 @@ test = [
"vcrpy>=6.0.1",
]
type-check = [
"langchain_core == 0.2.43",
"langchain_core == 0.3.9",
]

[project.entry-points.opentelemetry_instrumentor]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Optional,
Sequence,
Tuple,
TypedDict,
TypeVar,
Union,
cast,
Expand All @@ -35,6 +36,7 @@

import wrapt # type: ignore
from langchain_core.messages import BaseMessage
from langchain_core.messages.ai import UsageMetadata
from langchain_core.tracers import BaseTracer, LangChainTracer
from langchain_core.tracers.schemas import Run
from opentelemetry import context as context_api
Expand All @@ -43,6 +45,7 @@
from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes
from opentelemetry.trace import Span
from opentelemetry.util.types import AttributeValue
from typing_extensions import NotRequired, TypeGuard
from wrapt import ObjectProxy

from openinference.instrumentation import get_attributes_from_context, safe_json_dumps
Expand Down Expand Up @@ -832,6 +835,125 @@ def _model_name(
return


class _RawAnthropicUsageWithCacheReadOrWrite(TypedDict):
# https://github.com/anthropics/anthropic-sdk-python/blob/2e2f663104c8926434088828c08fbdf202d6d6fd/src/anthropic/types/usage.py#L13
input_tokens: int
output_tokens: int
cache_read_input_tokens: NotRequired[int]
cache_creation_input_tokens: NotRequired[int]


def _is_raw_anthropic_usage_with_cache_read_or_write(
obj: Mapping[str, Any],
) -> TypeGuard[_RawAnthropicUsageWithCacheReadOrWrite]:
return (
"input_tokens" in obj
and "output_tokens" in obj
and isinstance(obj["input_tokens"], int)
and isinstance(obj["output_tokens"], int)
and (
"cache_read_input_tokens" in obj
and isinstance(obj["cache_read_input_tokens"], int)
or "cache_creation_input_tokens" in obj
and isinstance(obj["cache_creation_input_tokens"], int)
)
)


def _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(
obj: _RawAnthropicUsageWithCacheReadOrWrite,
) -> Iterator[Tuple[str, int]]:
input_tokens = obj["input_tokens"]
output_tokens = obj["output_tokens"]

cache_creation_input_tokens = 0
cache_read_input_tokens = 0

if "cache_creation_input_tokens" in obj:
cache_creation_input_tokens = obj["cache_creation_input_tokens"]
if "cache_read_input_tokens" in obj:
cache_read_input_tokens = obj["cache_read_input_tokens"]

prompt_tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
completion_tokens = output_tokens

yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens

if cache_creation_input_tokens:
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, cache_creation_input_tokens
if cache_read_input_tokens:
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, cache_read_input_tokens


def _is_lc_usage_metadata(obj: Mapping[str, Any]) -> TypeGuard[UsageMetadata]:
return (
"input_tokens" in obj
and "output_tokens" in obj
and "total_tokens" in obj
and isinstance(obj["input_tokens"], int)
and isinstance(obj["output_tokens"], int)
and isinstance(obj["total_tokens"], int)
)


def _token_counts_from_lc_usage_metadata(obj: UsageMetadata) -> Iterator[Tuple[str, int]]:
input_tokens = obj["input_tokens"]
output_tokens = obj["output_tokens"]
total_tokens = obj["total_tokens"]

input_audio = 0
input_cache_creation = 0
input_cache_read = 0
output_audio = 0
output_reasoning = 0

if "input_token_details" in obj:
input_token_details = obj["input_token_details"]
if "audio" in input_token_details:
input_audio = input_token_details["audio"]
if "cache_creation" in input_token_details:
input_cache_creation = input_token_details["cache_creation"]
if "cache_read" in input_token_details:
input_cache_read = input_token_details["cache_read"]

if "output_token_details" in obj:
output_token_details = obj["output_token_details"]
if "audio" in output_token_details:
output_audio = output_token_details["audio"]
if "reasoning" in output_token_details:
output_reasoning = output_token_details["reasoning"]

prompt_tokens = input_tokens
completion_tokens = output_tokens

# heuristic adjustment for Bedrock Anthropic models with cache read or write
# https://github.com/Arize-ai/openinference/issues/2381
if input_cache := input_cache_creation + input_cache_read:
if total_tokens == input_tokens + output_tokens + input_cache:
# for Bedrock Converse
prompt_tokens += input_cache
elif input_tokens < input_cache:
# for Bedrock InvokeModel
prompt_tokens += input_cache
total_tokens += input_cache

yield LLM_TOKEN_COUNT_PROMPT, prompt_tokens
yield LLM_TOKEN_COUNT_COMPLETION, completion_tokens
yield LLM_TOKEN_COUNT_TOTAL, total_tokens

if input_audio:
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO, input_audio
if input_cache_creation:
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, input_cache_creation
if input_cache_read:
yield LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, input_cache_read
if output_audio:
yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO, output_audio
if output_reasoning:
yield LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING, output_reasoning


@stop_on_exception
def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, int]]:
"""Yields token count information if present."""
Expand All @@ -843,26 +965,23 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
)
):
return
keys: Sequence[str]
for attribute_name, keys in [
(
LLM_TOKEN_COUNT_PROMPT,
(
"prompt_tokens",
"input_tokens", # Anthropic-specific key
"prompt_token_count", # Gemini-specific key - https://ai.google.dev/gemini-api/docs/tokens?lang=python
),
),
(
LLM_TOKEN_COUNT_COMPLETION,
(
"completion_tokens",
"output_tokens", # Anthropic-specific key
"candidates_token_count", # Gemini-specific key
),
),
(LLM_TOKEN_COUNT_TOTAL, ("total_tokens", "total_token_count")), # Gemini-specific key
(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ, ("cache_read_input_tokens",)), # Antrhopic
(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE, ("cache_creation_input_tokens",)), # Antrhopic
]:
if (token_count := _get_first_value(token_usage, keys)) is not None:
yield attribute_name, token_count
Expand Down Expand Up @@ -895,39 +1014,11 @@ def _token_counts(outputs: Optional[Mapping[str, Any]]) -> Iterator[Tuple[str, i
yield attribute_name, token_count

# maps langchain_core.messages.ai.UsageMetadata object
for attribute_name, details_key_or_none, keys in [
(LLM_TOKEN_COUNT_PROMPT, None, ("input_tokens",)),
(LLM_TOKEN_COUNT_COMPLETION, None, ("output_tokens",)),
(
LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO,
"input_token_details",
("audio",),
),
(
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE,
"input_token_details",
("cache_creation",),
),
(
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ,
"input_token_details",
("cache_read",),
),
(
LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO,
"output_token_details",
("audio",),
),
(
LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING,
"output_token_details",
("reasoning",),
),
]:
details = token_usage.get(details_key_or_none) if details_key_or_none else token_usage
if details is not None:
if (token_count := _get_first_value(details, keys)) is not None:
yield attribute_name, token_count
if _is_lc_usage_metadata(token_usage):
yield from _token_counts_from_lc_usage_metadata(token_usage)

if _is_raw_anthropic_usage_with_cache_read_or_write(token_usage):
yield from _token_counts_from_raw_anthropic_usage_with_cache_read_or_write(token_usage)


def _parse_token_usage_for_vertexai(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ def test_anthropic_token_counts(
span = spans[0]
llm_attributes = dict(span.attributes or {})
assert llm_attributes.pop(OPENINFERENCE_SPAN_KIND, None) == LLM.value
assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 22
assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == 33
assert llm_attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) == 5
assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE) == 2
assert llm_attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ) == 9
Expand Down
Loading