Skip to content

Commit bb31cad

Browse files
committed
feat(integrations): add support for the litellm responses/aresponses APIs
1 parent 18a5828 commit bb31cad

2 files changed

Lines changed: 345 additions & 30 deletions

File tree

sentry_sdk/integrations/litellm.py

Lines changed: 131 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from sentry_sdk.ai.monitoring import record_token_usage
77
from sentry_sdk.ai.utils import (
88
get_start_span_function,
9+
normalize_message_roles,
910
set_data_normalized,
1011
truncate_and_annotate_messages,
1112
transform_openai_content_part,
@@ -17,7 +18,7 @@
1718
from sentry_sdk.utils import event_from_exception
1819

1920
if TYPE_CHECKING:
20-
from typing import Any, Dict, List
21+
from typing import Any, Dict, List, Optional
2122
from datetime import datetime
2223

2324
try:
@@ -84,16 +85,17 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
8485
call_type = kwargs.get("call_type", None)
8586
if call_type == "embedding" or call_type == "aembedding":
8687
operation = "embeddings"
88+
op = consts.OP.GEN_AI_EMBEDDINGS
89+
elif call_type == "responses" or call_type == "aresponses":
90+
operation = "responses"
91+
op = consts.OP.GEN_AI_RESPONSES
8792
else:
8893
operation = "chat"
94+
op = consts.OP.GEN_AI_CHAT
8995

9096
# Start a new span/transaction
9197
span = get_start_span_function()(
92-
op=(
93-
consts.OP.GEN_AI_CHAT
94-
if operation == "chat"
95-
else consts.OP.GEN_AI_EMBEDDINGS
96-
),
98+
op=op,
9799
name=f"{operation} {model}",
98100
origin=LiteLLMIntegration.origin,
99101
)
@@ -106,14 +108,15 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
106108
set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
107109
set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation)
108110

109-
# Record input/messages if allowed
110-
if should_send_default_pii() and integration.include_prompts:
111-
if operation == "embeddings":
112-
# For embeddings, look for the 'input' parameter
111+
# Per-operation request data. Conversation id (responses) is set
112+
# unconditionally; user-content fields are gated on PII / include_prompts.
113+
record_prompts = should_send_default_pii() and integration.include_prompts
114+
scope = sentry_sdk.get_current_scope()
115+
116+
if operation == "embeddings":
117+
if record_prompts:
113118
embedding_input = kwargs.get("input")
114119
if embedding_input:
115-
scope = sentry_sdk.get_current_scope()
116-
# Normalize to list format
117120
input_list = (
118121
embedding_input
119122
if isinstance(embedding_input, list)
@@ -129,11 +132,50 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
129132
messages_data,
130133
unpack=False,
131134
)
132-
else:
133-
# For chat, look for the 'messages' parameter
135+
136+
elif operation == "responses":
137+
# litellm unpacks `extra_body` into the request body, so the
138+
# `conversation` field shows up in additional_args.complete_input_dict
139+
# rather than as a top-level kwarg.
140+
complete_input = (kwargs.get("additional_args") or {}).get(
141+
"complete_input_dict"
142+
) or {}
143+
conversation = complete_input.get("conversation")
144+
if conversation is not None:
145+
conversation_id: "Optional[str]" = None
146+
if isinstance(conversation, str):
147+
conversation_id = conversation
148+
elif isinstance(conversation, dict):
149+
conversation_id = conversation.get("id")
150+
if conversation_id is not None:
151+
set_data_normalized(
152+
span, SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id
153+
)
154+
155+
if record_prompts:
156+
# `input` is either a string or a list of message dicts (same
157+
# shape as OpenAI Responses API).
158+
responses_input = kwargs.get("input")
159+
if responses_input:
160+
if isinstance(responses_input, str):
161+
input_messages = [responses_input]
162+
else:
163+
input_messages = list(responses_input)
164+
normalized = normalize_message_roles(input_messages) # type: ignore[arg-type]
165+
messages_data = truncate_and_annotate_messages(normalized, span, scope)
166+
if messages_data is not None:
167+
set_data_normalized(
168+
span,
169+
SPANDATA.GEN_AI_REQUEST_MESSAGES,
170+
messages_data,
171+
unpack=False,
172+
)
173+
174+
else:
175+
# Chat completions.
176+
if record_prompts:
134177
messages = kwargs.get("messages", [])
135178
if messages:
136-
scope = sentry_sdk.get_current_scope()
137179
messages = _convert_message_parts(messages)
138180
messages_data = truncate_and_annotate_messages(messages, span, scope)
139181
if messages_data is not None:
@@ -166,11 +208,24 @@ async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
166208

167209
def _success_callback(
168210
kwargs: "Dict[str, Any]",
169-
completion_response: "Any",
211+
response: "Any",
170212
start_time: "datetime",
171213
end_time: "datetime",
172214
) -> None:
173-
"""Handle successful completion."""
215+
"""Handle a successful chat completion, embeddings, or Responses API call.
216+
217+
The shape of `response` differs between API paths:
218+
- Chat Completions: ModelResponse with ``.choices[].message`` and
219+
``.usage`` carrying ``prompt_tokens`` / ``completion_tokens``.
220+
- Responses API (non-streaming): ResponsesAPIResponse with ``.output[]``
221+
items (``message`` / ``function_call``) and ``.usage`` carrying
222+
``input_tokens`` / ``output_tokens``.
223+
- Responses API (streaming): a ResponseCompletedEvent wrapper
224+
``{type: "response.completed", response: ResponsesAPIResponse}``,
225+
which we unwrap below.
226+
- Embeddings: CreateEmbeddingResponse with ``.usage`` only (no choices
227+
or output).
228+
"""
174229

175230
metadata = _get_metadata_dict(kwargs)
176231
span = metadata.get("_sentry_span")
@@ -181,18 +236,25 @@ def _success_callback(
181236
if integration is None:
182237
return
183238

239+
# Streaming Responses API: unwrap the ResponseCompletedEvent so the rest of
240+
# the function sees the assembled ResponsesAPIResponse directly.
241+
if getattr(response, "type", None) == "response.completed" and hasattr(
242+
response, "response"
243+
):
244+
response = response.response
245+
184246
try:
185-
# Record model information
186-
if hasattr(completion_response, "model"):
187-
set_data_normalized(
188-
span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model
189-
)
247+
# `model` is set by all API shapes (chat / responses / embeddings).
248+
if hasattr(response, "model"):
249+
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_MODEL, response.model)
190250

191-
# Record response content if allowed
251+
# Response content: structure depends on the API shape. Embeddings have
252+
# neither ``choices`` nor ``output``, so we just skip this block.
192253
if should_send_default_pii() and integration.include_prompts:
193-
if hasattr(completion_response, "choices"):
254+
if hasattr(response, "choices"):
255+
# Chat Completions API.
194256
response_messages = []
195-
for choice in completion_response.choices:
257+
for choice in response.choices:
196258
if hasattr(choice, "message"):
197259
if hasattr(choice.message, "model_dump"):
198260
response_messages.append(choice.message.model_dump())
@@ -213,14 +275,53 @@ def _success_callback(
213275
set_data_normalized(
214276
span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages
215277
)
278+
elif hasattr(response, "output"):
279+
# Responses API: split message text from function-call items.
280+
output_text: "List[Any]" = []
281+
tool_calls: "List[Any]" = []
282+
for output in response.output:
283+
output_type = getattr(output, "type", None)
284+
if output_type == "function_call":
285+
if hasattr(output, "model_dump"):
286+
tool_calls.append(output.model_dump())
287+
elif hasattr(output, "dict"):
288+
tool_calls.append(output.dict())
289+
elif output_type == "message":
290+
for content_item in getattr(output, "content", []) or []:
291+
text = getattr(content_item, "text", None)
292+
if text is not None:
293+
output_text.append(text)
294+
elif hasattr(content_item, "model_dump"):
295+
output_text.append(content_item.model_dump())
296+
elif hasattr(content_item, "dict"):
297+
output_text.append(content_item.dict())
298+
299+
if tool_calls:
300+
set_data_normalized(
301+
span,
302+
SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
303+
tool_calls,
304+
unpack=False,
305+
)
306+
if output_text:
307+
set_data_normalized(
308+
span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_text
309+
)
216310

217-
# Record token usage
218-
if hasattr(completion_response, "usage"):
219-
usage = completion_response.usage
311+
# Token usage field names differ across APIs:
312+
# Chat Completions / Embeddings: prompt_tokens / completion_tokens
313+
# Responses API (non-streaming): input_tokens / output_tokens
314+
# Responses API (streaming): prompt_tokens / completion_tokens
315+
# (litellm normalizes to chat-completion names when assembling the
316+
# streaming response).
317+
if hasattr(response, "usage"):
318+
usage = response.usage
220319
record_token_usage(
221320
span,
222-
input_tokens=getattr(usage, "prompt_tokens", None),
223-
output_tokens=getattr(usage, "completion_tokens", None),
321+
input_tokens=getattr(usage, "prompt_tokens", None)
322+
or getattr(usage, "input_tokens", None),
323+
output_tokens=getattr(usage, "completion_tokens", None)
324+
or getattr(usage, "output_tokens", None),
224325
total_tokens=getattr(usage, "total_tokens", None),
225326
)
226327

0 commit comments

Comments
 (0)