run-llama · logan-markewich · Nov 10, 2025 · Nov 2, 2025 · Nov 4, 2025 · Nov 7, 2025
diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py b/llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
@@ -59,6 +59,9 @@
     RawContentBlockDeltaEvent,
     RawContentBlockStartEvent,
     RawContentBlockStopEvent,
+    RawMessageDeltaEvent,
+    RawMessageStartEvent,
+    RawMessageStopEvent,
     TextBlock,
     TextDelta,
     ThinkingBlock,
@@ -462,6 +465,9 @@ def gen() -> Generator[AnthropicChatResponse, None, None]:
             cur_citations: List[Dict[str, Any]] = []
             tracked_citations: Set[str] = set()
             role = MessageRole.ASSISTANT
+            # Track usage metadata and stop_reason from RawMessage events
+            usage_metadata: Dict[str, Any] = {}
+            stop_reason: Optional[str] = None
             for r in response:
                 if isinstance(r, (ContentBlockDeltaEvent, RawContentBlockDeltaEvent)):
                     if isinstance(r.delta, TextDelta):
@@ -550,6 +556,10 @@ def gen() -> Generator[AnthropicChatResponse, None, None]:
                         message=ChatMessage(
                             role=role,
                             blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
                         ),
                         citations=cur_citations,
                         delta=content_delta,
@@ -584,11 +594,49 @@ def gen() -> Generator[AnthropicChatResponse, None, None]:
                         message=ChatMessage(
                             role=role,
                             blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
                         ),
                         citations=cur_citations,
                         delta="",
                         raw=dict(r),
                     )
+                elif isinstance(r, RawMessageStartEvent):
+                    # Capture initial usage metadata from message_start
+                    if hasattr(r.message, "usage") and r.message.usage:
+                        usage_metadata = {
+                            "input_tokens": r.message.usage.input_tokens,
+                            "output_tokens": r.message.usage.output_tokens,
+                        }
+                elif isinstance(r, RawMessageDeltaEvent):
+                    # Update usage metadata and capture stop_reason from message_delta
+                    if hasattr(r, "usage") and r.usage:
+                        usage_metadata = {
+                            "input_tokens": r.usage.input_tokens,
+                            "output_tokens": r.usage.output_tokens,
+                        }
+                    if hasattr(r, "delta") and hasattr(r.delta, "stop_reason"):
+                        stop_reason = r.delta.stop_reason
+
+                    # Yield a final chunk with updated metadata including stop_reason
+                    yield AnthropicChatResponse(
+                        message=ChatMessage(
+                            role=role,
+                            blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
+                        ),
+                        citations=cur_citations,
+                        delta="",
+                        raw=dict(r),
+                    )
+                elif isinstance(r, RawMessageStopEvent):
+                    # Final event - no additional data to capture
+                    pass
 
         return gen()
 
@@ -664,6 +712,9 @@ async def gen() -> ChatResponseAsyncGen:
             cur_citations: List[Dict[str, Any]] = []
             tracked_citations: Set[str] = set()
             role = MessageRole.ASSISTANT
+            # Track usage metadata and stop_reason from RawMessage events
+            usage_metadata: Dict[str, Any] = {}
+            stop_reason: Optional[str] = None
             async for r in response:
                 if isinstance(r, (ContentBlockDeltaEvent, RawContentBlockDeltaEvent)):
                     if isinstance(r.delta, TextDelta):
@@ -752,6 +803,10 @@ async def gen() -> ChatResponseAsyncGen:
                         message=ChatMessage(
                             role=role,
                             blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
                         ),
                         citations=cur_citations,
                         delta=content_delta,
@@ -786,11 +841,49 @@ async def gen() -> ChatResponseAsyncGen:
                         message=ChatMessage(
                             role=role,
                             blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
+                        ),
+                        citations=cur_citations,
+                        delta="",
+                        raw=dict(r),
+                    )
+                elif isinstance(r, RawMessageStartEvent):
+                    # Capture initial usage metadata from message_start
+                    if hasattr(r.message, "usage") and r.message.usage:
+                        usage_metadata = {
+                            "input_tokens": r.message.usage.input_tokens,
+                            "output_tokens": r.message.usage.output_tokens,
+                        }
+                elif isinstance(r, RawMessageDeltaEvent):
+                    # Update usage metadata and capture stop_reason from message_delta
+                    if hasattr(r, "usage") and r.usage:
+                        usage_metadata = {
+                            "input_tokens": r.usage.input_tokens,
+                            "output_tokens": r.usage.output_tokens,
+                        }
+                    if hasattr(r, "delta") and hasattr(r.delta, "stop_reason"):
+                        stop_reason = r.delta.stop_reason
+
+                    # Yield a final chunk with updated metadata including stop_reason
+                    yield AnthropicChatResponse(
+                        message=ChatMessage(
+                            role=role,
+                            blocks=content,
+                            additional_kwargs={
+                                "usage": usage_metadata if usage_metadata else None,
+                                "stop_reason": stop_reason,
+                            },
                         ),
                         citations=cur_citations,
                         delta="",
                         raw=dict(r),
                     )
+                elif isinstance(r, RawMessageStopEvent):
+                    # Final event - no additional data to capture
+                    pass
 
         return gen()
 

diff --git a/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-anthropic/pyproject.toml
@@ -27,7 +27,7 @@ dev = [
 
 [project]
 name = "llama-index-llms-anthropic"
-version = "0.10.1"
+version = "0.10.2"
 description = "llama-index llms anthropic integration"
 authors = [{name = "Your Name", email = "you@example.com"}]
 requires-python = ">=3.9,<4.0"