Skip to content

Commit f979abf

Browse files
feat(integrations): add litellm integration (#4864)
Add a first implementation of the litellm integration, supporting completion and embeddings Closes https://linear.app/getsentry/issue/PY-1828/add-agent-monitoring-support-for-litellm Closes https://linear.app/getsentry/issue/TET-1218/litellm-testing <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Introduce `LiteLLMIntegration` that instruments LiteLLM chat/embeddings calls with spans, token usage, optional prompt logging, and exception capture. > > - **Integrations**: > - Add `sentry_sdk/integrations/litellm.py` with `LiteLLMIntegration` registering LiteLLM `input/success/failure` callbacks. > - Start spans for `chat`/`embeddings`, set `gen_ai.*` metadata (provider/system, operation, model, params like `max_tokens`, `temperature`, `top_p`, `stream`). > - Record LiteLLM-specific fields: `api_base`, `api_version`, `custom_llm_provider`. > - Optionally capture request/response messages when `include_prompts` and PII are enabled. > - Track token usage from response `usage` and capture exceptions; always finish spans. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 1ecd559. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Co-authored-by: Ivana Kellyer <ivana.kellyer@sentry.io>
1 parent bbd2a5d commit f979abf

File tree

10 files changed

+825
-11
lines changed

10 files changed

+825
-11
lines changed

.github/workflows/test-integrations-ai.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ jobs:
6666
run: |
6767
set -x # print commands that are executed
6868
./scripts/runtox.sh "py${{ matrix.python-version }}-langchain-notiktoken"
69+
- name: Test litellm
70+
run: |
71+
set -x # print commands that are executed
72+
./scripts/runtox.sh "py${{ matrix.python-version }}-litellm"
6973
- name: Test openai-base
7074
run: |
7175
set -x # print commands that are executed

scripts/populate_tox/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@
214214
"package": "launchdarkly-server-sdk",
215215
"num_versions": 2,
216216
},
217+
"litellm": {
218+
"package": "litellm",
219+
},
217220
"litestar": {
218221
"package": "litestar",
219222
"deps": {

scripts/populate_tox/releases.jsonl

Lines changed: 4 additions & 3 deletions
Large diffs are not rendered by default.

scripts/split_tox_gh_actions/split_tox_gh_actions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
"cohere",
7575
"langchain-base",
7676
"langchain-notiktoken",
77+
"litellm",
7778
"openai-base",
7879
"openai-notiktoken",
7980
"langgraph",

sentry_sdk/integrations/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
146146
"langchain": (0, 1, 0),
147147
"langgraph": (0, 6, 6),
148148
"launchdarkly": (9, 8, 0),
149+
"litellm": (1, 77, 5),
149150
"loguru": (0, 7, 0),
150151
"openai": (1, 0, 0),
151152
"openai_agents": (0, 0, 19),

sentry_sdk/integrations/litellm.py

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
from typing import TYPE_CHECKING
2+
3+
import sentry_sdk
4+
from sentry_sdk import consts
5+
from sentry_sdk.ai.monitoring import record_token_usage
6+
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
7+
from sentry_sdk.consts import SPANDATA
8+
from sentry_sdk.integrations import DidNotEnable, Integration
9+
from sentry_sdk.scope import should_send_default_pii
10+
from sentry_sdk.utils import event_from_exception
11+
12+
if TYPE_CHECKING:
13+
from typing import Any, Dict
14+
from datetime import datetime
15+
16+
try:
17+
import litellm # type: ignore[import-not-found]
18+
except ImportError:
19+
raise DidNotEnable("LiteLLM not installed")
20+
21+
22+
def _get_metadata_dict(kwargs):
23+
# type: (Dict[str, Any]) -> Dict[str, Any]
24+
"""Get the metadata dictionary from the kwargs."""
25+
litellm_params = kwargs.setdefault("litellm_params", {})
26+
27+
# we need this weird little dance, as metadata might be set but may be None initially
28+
metadata = litellm_params.get("metadata")
29+
if metadata is None:
30+
metadata = {}
31+
litellm_params["metadata"] = metadata
32+
return metadata
33+
34+
35+
def _input_callback(kwargs):
36+
# type: (Dict[str, Any]) -> None
37+
"""Handle the start of a request."""
38+
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
39+
40+
if integration is None:
41+
return
42+
43+
# Get key parameters
44+
full_model = kwargs.get("model", "")
45+
try:
46+
model, provider, _, _ = litellm.get_llm_provider(full_model)
47+
except Exception:
48+
model = full_model
49+
provider = "unknown"
50+
51+
messages = kwargs.get("messages", [])
52+
operation = "chat" if messages else "embeddings"
53+
54+
# Start a new span/transaction
55+
span = get_start_span_function()(
56+
op=(
57+
consts.OP.GEN_AI_CHAT
58+
if operation == "chat"
59+
else consts.OP.GEN_AI_EMBEDDINGS
60+
),
61+
name=f"{operation} {model}",
62+
origin=LiteLLMIntegration.origin,
63+
)
64+
span.__enter__()
65+
66+
# Store span for later
67+
_get_metadata_dict(kwargs)["_sentry_span"] = span
68+
69+
# Set basic data
70+
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
71+
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
72+
73+
# Record messages if allowed
74+
if messages and should_send_default_pii() and integration.include_prompts:
75+
set_data_normalized(
76+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
77+
)
78+
79+
# Record other parameters
80+
params = {
81+
"model": SPANDATA.GEN_AI_REQUEST_MODEL,
82+
"stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
83+
"max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
84+
"presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
85+
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
86+
"temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
87+
"top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
88+
}
89+
for key, attribute in params.items():
90+
value = kwargs.get(key)
91+
if value is not None:
92+
set_data_normalized(span, attribute, value)
93+
94+
# Record LiteLLM-specific parameters
95+
litellm_params = {
96+
"api_base": kwargs.get("api_base"),
97+
"api_version": kwargs.get("api_version"),
98+
"custom_llm_provider": kwargs.get("custom_llm_provider"),
99+
}
100+
for key, value in litellm_params.items():
101+
if value is not None:
102+
set_data_normalized(span, f"gen_ai.litellm.{key}", value)
103+
104+
105+
def _success_callback(kwargs, completion_response, start_time, end_time):
106+
# type: (Dict[str, Any], Any, datetime, datetime) -> None
107+
"""Handle successful completion."""
108+
109+
span = _get_metadata_dict(kwargs).get("_sentry_span")
110+
if span is None:
111+
return
112+
113+
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
114+
if integration is None:
115+
return
116+
117+
try:
118+
# Record model information
119+
if hasattr(completion_response, "model"):
120+
set_data_normalized(
121+
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
122+
)
123+
124+
# Record response content if allowed
125+
if should_send_default_pii() and integration.include_prompts:
126+
if hasattr(completion_response, "choices"):
127+
response_messages = []
128+
for choice in completion_response.choices:
129+
if hasattr(choice, "message"):
130+
if hasattr(choice.message, "model_dump"):
131+
response_messages.append(choice.message.model_dump())
132+
elif hasattr(choice.message, "dict"):
133+
response_messages.append(choice.message.dict())
134+
else:
135+
# Fallback for basic message objects
136+
msg = {}
137+
if hasattr(choice.message, "role"):
138+
msg["role"] = choice.message.role
139+
if hasattr(choice.message, "content"):
140+
msg["content"] = choice.message.content
141+
if hasattr(choice.message, "tool_calls"):
142+
msg["tool_calls"] = choice.message.tool_calls
143+
response_messages.append(msg)
144+
145+
if response_messages:
146+
set_data_normalized(
147+
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
148+
)
149+
150+
# Record token usage
151+
if hasattr(completion_response, "usage"):
152+
usage = completion_response.usage
153+
record_token_usage(
154+
span,
155+
input_tokens=getattr(usage, "prompt_tokens", None),
156+
output_tokens=getattr(usage, "completion_tokens", None),
157+
total_tokens=getattr(usage, "total_tokens", None),
158+
)
159+
160+
finally:
161+
# Always finish the span and clean up
162+
span.__exit__(None, None, None)
163+
164+
165+
def _failure_callback(kwargs, exception, start_time, end_time):
166+
# type: (Dict[str, Any], Exception, datetime, datetime) -> None
167+
"""Handle request failure."""
168+
span = _get_metadata_dict(kwargs).get("_sentry_span")
169+
if span is None:
170+
return
171+
172+
try:
173+
# Capture the exception
174+
event, hint = event_from_exception(
175+
exception,
176+
client_options=sentry_sdk.get_client().options,
177+
mechanism={"type": "litellm", "handled": False},
178+
)
179+
sentry_sdk.capture_event(event, hint=hint)
180+
finally:
181+
# Always finish the span and clean up
182+
span.__exit__(type(exception), exception, None)
183+
184+
185+
class LiteLLMIntegration(Integration):
186+
"""
187+
LiteLLM integration for Sentry.
188+
189+
This integration automatically captures LiteLLM API calls and sends them to Sentry
190+
for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM
191+
supports, including OpenAI, Anthropic, Google, Cohere, and many others.
192+
193+
Features:
194+
- Automatic exception capture for all LiteLLM calls
195+
- Token usage tracking across all providers
196+
- Provider detection and attribution
197+
- Input/output message capture (configurable)
198+
- Streaming response support
199+
- Cost tracking integration
200+
201+
Usage:
202+
203+
```python
204+
import litellm
205+
import sentry_sdk
206+
207+
# Initialize Sentry with the LiteLLM integration
208+
sentry_sdk.init(
209+
dsn="your-dsn",
210+
send_default_pii=True
211+
integrations=[
212+
sentry_sdk.integrations.LiteLLMIntegration(
213+
include_prompts=True # Set to False to exclude message content
214+
)
215+
]
216+
)
217+
218+
# All LiteLLM calls will now be monitored
219+
response = litellm.completion(
220+
model="gpt-3.5-turbo",
221+
messages=[{"role": "user", "content": "Hello!"}]
222+
)
223+
```
224+
225+
Configuration:
226+
- include_prompts (bool): Whether to include prompts and responses in spans.
227+
Defaults to True. Set to False to exclude potentially sensitive data.
228+
"""
229+
230+
identifier = "litellm"
231+
origin = f"auto.ai.{identifier}"
232+
233+
def __init__(self, include_prompts=True):
234+
# type: (LiteLLMIntegration, bool) -> None
235+
self.include_prompts = include_prompts
236+
237+
@staticmethod
238+
def setup_once():
239+
# type: () -> None
240+
"""Set up LiteLLM callbacks for monitoring."""
241+
litellm.input_callback = litellm.input_callback or []
242+
if _input_callback not in litellm.input_callback:
243+
litellm.input_callback.append(_input_callback)
244+
245+
litellm.success_callback = litellm.success_callback or []
246+
if _success_callback not in litellm.success_callback:
247+
litellm.success_callback.append(_success_callback)
248+
249+
litellm.failure_callback = litellm.failure_callback or []
250+
if _failure_callback not in litellm.failure_callback:
251+
litellm.failure_callback.append(_failure_callback)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def get_file_text(file_name):
6565
"langchain": ["langchain>=0.0.210"],
6666
"langgraph": ["langgraph>=0.6.6"],
6767
"launchdarkly": ["launchdarkly-server-sdk>=9.8.0"],
68+
"litellm": ["litellm>=1.77.5"],
6869
"litestar": ["litestar>=2.0.0"],
6970
"loguru": ["loguru>=0.5"],
7071
"openai": ["openai>=1.0.0", "tiktoken>=0.3.0"],

tests/integrations/litellm/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)