|
| 1 | +from typing import TYPE_CHECKING |
| 2 | + |
| 3 | +import sentry_sdk |
| 4 | +from sentry_sdk import consts |
| 5 | +from sentry_sdk.ai.monitoring import record_token_usage |
| 6 | +from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized |
| 7 | +from sentry_sdk.consts import SPANDATA |
| 8 | +from sentry_sdk.integrations import DidNotEnable, Integration |
| 9 | +from sentry_sdk.scope import should_send_default_pii |
| 10 | +from sentry_sdk.utils import event_from_exception |
| 11 | + |
| 12 | +if TYPE_CHECKING: |
| 13 | + from typing import Any, Dict |
| 14 | + from datetime import datetime |
| 15 | + |
| 16 | +try: |
| 17 | + import litellm # type: ignore[import-not-found] |
| 18 | +except ImportError: |
| 19 | + raise DidNotEnable("LiteLLM not installed") |
| 20 | + |
| 21 | + |
| 22 | +def _get_metadata_dict(kwargs): |
| 23 | + # type: (Dict[str, Any]) -> Dict[str, Any] |
| 24 | + """Get the metadata dictionary from the kwargs.""" |
| 25 | + litellm_params = kwargs.setdefault("litellm_params", {}) |
| 26 | + |
| 27 | + # we need this weird little dance, as metadata might be set but may be None initially |
| 28 | + metadata = litellm_params.get("metadata") |
| 29 | + if metadata is None: |
| 30 | + metadata = {} |
| 31 | + litellm_params["metadata"] = metadata |
| 32 | + return metadata |
| 33 | + |
| 34 | + |
| 35 | +def _input_callback(kwargs): |
| 36 | + # type: (Dict[str, Any]) -> None |
| 37 | + """Handle the start of a request.""" |
| 38 | + integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) |
| 39 | + |
| 40 | + if integration is None: |
| 41 | + return |
| 42 | + |
| 43 | + # Get key parameters |
| 44 | + full_model = kwargs.get("model", "") |
| 45 | + try: |
| 46 | + model, provider, _, _ = litellm.get_llm_provider(full_model) |
| 47 | + except Exception: |
| 48 | + model = full_model |
| 49 | + provider = "unknown" |
| 50 | + |
| 51 | + messages = kwargs.get("messages", []) |
| 52 | + operation = "chat" if messages else "embeddings" |
| 53 | + |
| 54 | + # Start a new span/transaction |
| 55 | + span = get_start_span_function()( |
| 56 | + op=( |
| 57 | + consts.OP.GEN_AI_CHAT |
| 58 | + if operation == "chat" |
| 59 | + else consts.OP.GEN_AI_EMBEDDINGS |
| 60 | + ), |
| 61 | + name=f"{operation} {model}", |
| 62 | + origin=LiteLLMIntegration.origin, |
| 63 | + ) |
| 64 | + span.__enter__() |
| 65 | + |
| 66 | + # Store span for later |
| 67 | + _get_metadata_dict(kwargs)["_sentry_span"] = span |
| 68 | + |
| 69 | + # Set basic data |
| 70 | + set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider) |
| 71 | + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) |
| 72 | + |
| 73 | + # Record messages if allowed |
| 74 | + if messages and should_send_default_pii() and integration.include_prompts: |
| 75 | + set_data_normalized( |
| 76 | + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False |
| 77 | + ) |
| 78 | + |
| 79 | + # Record other parameters |
| 80 | + params = { |
| 81 | + "model": SPANDATA.GEN_AI_REQUEST_MODEL, |
| 82 | + "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, |
| 83 | + "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, |
| 84 | + "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, |
| 85 | + "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, |
| 86 | + "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, |
| 87 | + "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, |
| 88 | + } |
| 89 | + for key, attribute in params.items(): |
| 90 | + value = kwargs.get(key) |
| 91 | + if value is not None: |
| 92 | + set_data_normalized(span, attribute, value) |
| 93 | + |
| 94 | + # Record LiteLLM-specific parameters |
| 95 | + litellm_params = { |
| 96 | + "api_base": kwargs.get("api_base"), |
| 97 | + "api_version": kwargs.get("api_version"), |
| 98 | + "custom_llm_provider": kwargs.get("custom_llm_provider"), |
| 99 | + } |
| 100 | + for key, value in litellm_params.items(): |
| 101 | + if value is not None: |
| 102 | + set_data_normalized(span, f"gen_ai.litellm.{key}", value) |
| 103 | + |
| 104 | + |
| 105 | +def _success_callback(kwargs, completion_response, start_time, end_time): |
| 106 | + # type: (Dict[str, Any], Any, datetime, datetime) -> None |
| 107 | + """Handle successful completion.""" |
| 108 | + |
| 109 | + span = _get_metadata_dict(kwargs).get("_sentry_span") |
| 110 | + if span is None: |
| 111 | + return |
| 112 | + |
| 113 | + integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) |
| 114 | + if integration is None: |
| 115 | + return |
| 116 | + |
| 117 | + try: |
| 118 | + # Record model information |
| 119 | + if hasattr(completion_response, "model"): |
| 120 | + set_data_normalized( |
| 121 | + span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model |
| 122 | + ) |
| 123 | + |
| 124 | + # Record response content if allowed |
| 125 | + if should_send_default_pii() and integration.include_prompts: |
| 126 | + if hasattr(completion_response, "choices"): |
| 127 | + response_messages = [] |
| 128 | + for choice in completion_response.choices: |
| 129 | + if hasattr(choice, "message"): |
| 130 | + if hasattr(choice.message, "model_dump"): |
| 131 | + response_messages.append(choice.message.model_dump()) |
| 132 | + elif hasattr(choice.message, "dict"): |
| 133 | + response_messages.append(choice.message.dict()) |
| 134 | + else: |
| 135 | + # Fallback for basic message objects |
| 136 | + msg = {} |
| 137 | + if hasattr(choice.message, "role"): |
| 138 | + msg["role"] = choice.message.role |
| 139 | + if hasattr(choice.message, "content"): |
| 140 | + msg["content"] = choice.message.content |
| 141 | + if hasattr(choice.message, "tool_calls"): |
| 142 | + msg["tool_calls"] = choice.message.tool_calls |
| 143 | + response_messages.append(msg) |
| 144 | + |
| 145 | + if response_messages: |
| 146 | + set_data_normalized( |
| 147 | + span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages |
| 148 | + ) |
| 149 | + |
| 150 | + # Record token usage |
| 151 | + if hasattr(completion_response, "usage"): |
| 152 | + usage = completion_response.usage |
| 153 | + record_token_usage( |
| 154 | + span, |
| 155 | + input_tokens=getattr(usage, "prompt_tokens", None), |
| 156 | + output_tokens=getattr(usage, "completion_tokens", None), |
| 157 | + total_tokens=getattr(usage, "total_tokens", None), |
| 158 | + ) |
| 159 | + |
| 160 | + finally: |
| 161 | + # Always finish the span and clean up |
| 162 | + span.__exit__(None, None, None) |
| 163 | + |
| 164 | + |
| 165 | +def _failure_callback(kwargs, exception, start_time, end_time): |
| 166 | + # type: (Dict[str, Any], Exception, datetime, datetime) -> None |
| 167 | + """Handle request failure.""" |
| 168 | + span = _get_metadata_dict(kwargs).get("_sentry_span") |
| 169 | + if span is None: |
| 170 | + return |
| 171 | + |
| 172 | + try: |
| 173 | + # Capture the exception |
| 174 | + event, hint = event_from_exception( |
| 175 | + exception, |
| 176 | + client_options=sentry_sdk.get_client().options, |
| 177 | + mechanism={"type": "litellm", "handled": False}, |
| 178 | + ) |
| 179 | + sentry_sdk.capture_event(event, hint=hint) |
| 180 | + finally: |
| 181 | + # Always finish the span and clean up |
| 182 | + span.__exit__(type(exception), exception, None) |
| 183 | + |
| 184 | + |
| 185 | +class LiteLLMIntegration(Integration): |
| 186 | + """ |
| 187 | + LiteLLM integration for Sentry. |
| 188 | +
|
| 189 | + This integration automatically captures LiteLLM API calls and sends them to Sentry |
| 190 | + for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM |
| 191 | + supports, including OpenAI, Anthropic, Google, Cohere, and many others. |
| 192 | +
|
| 193 | + Features: |
| 194 | + - Automatic exception capture for all LiteLLM calls |
| 195 | + - Token usage tracking across all providers |
| 196 | + - Provider detection and attribution |
| 197 | + - Input/output message capture (configurable) |
| 198 | + - Streaming response support |
| 199 | + - Cost tracking integration |
| 200 | +
|
| 201 | + Usage: |
| 202 | +
|
| 203 | + ```python |
| 204 | + import litellm |
| 205 | + import sentry_sdk |
| 206 | +
|
| 207 | + # Initialize Sentry with the LiteLLM integration |
| 208 | + sentry_sdk.init( |
| 209 | + dsn="your-dsn", |
| 210 | + send_default_pii=True |
| 211 | + integrations=[ |
| 212 | + sentry_sdk.integrations.LiteLLMIntegration( |
| 213 | + include_prompts=True # Set to False to exclude message content |
| 214 | + ) |
| 215 | + ] |
| 216 | + ) |
| 217 | +
|
| 218 | + # All LiteLLM calls will now be monitored |
| 219 | + response = litellm.completion( |
| 220 | + model="gpt-3.5-turbo", |
| 221 | + messages=[{"role": "user", "content": "Hello!"}] |
| 222 | + ) |
| 223 | + ``` |
| 224 | +
|
| 225 | + Configuration: |
| 226 | + - include_prompts (bool): Whether to include prompts and responses in spans. |
| 227 | + Defaults to True. Set to False to exclude potentially sensitive data. |
| 228 | + """ |
| 229 | + |
| 230 | + identifier = "litellm" |
| 231 | + origin = f"auto.ai.{identifier}" |
| 232 | + |
| 233 | + def __init__(self, include_prompts=True): |
| 234 | + # type: (LiteLLMIntegration, bool) -> None |
| 235 | + self.include_prompts = include_prompts |
| 236 | + |
| 237 | + @staticmethod |
| 238 | + def setup_once(): |
| 239 | + # type: () -> None |
| 240 | + """Set up LiteLLM callbacks for monitoring.""" |
| 241 | + litellm.input_callback = litellm.input_callback or [] |
| 242 | + if _input_callback not in litellm.input_callback: |
| 243 | + litellm.input_callback.append(_input_callback) |
| 244 | + |
| 245 | + litellm.success_callback = litellm.success_callback or [] |
| 246 | + if _success_callback not in litellm.success_callback: |
| 247 | + litellm.success_callback.append(_success_callback) |
| 248 | + |
| 249 | + litellm.failure_callback = litellm.failure_callback or [] |
| 250 | + if _failure_callback not in litellm.failure_callback: |
| 251 | + litellm.failure_callback.append(_failure_callback) |
0 commit comments