66from sentry_sdk .ai .monitoring import record_token_usage
77from sentry_sdk .ai .utils import (
88 get_start_span_function ,
9+ normalize_message_roles ,
910 set_data_normalized ,
1011 truncate_and_annotate_messages ,
1112 transform_openai_content_part ,
1718from sentry_sdk .utils import event_from_exception
1819
1920if TYPE_CHECKING :
20- from typing import Any , Dict , List
21+ from typing import Any , Dict , List , Optional
2122 from datetime import datetime
2223
2324try :
@@ -84,16 +85,17 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
8485 call_type = kwargs .get ("call_type" , None )
8586 if call_type == "embedding" or call_type == "aembedding" :
8687 operation = "embeddings"
88+ op = consts .OP .GEN_AI_EMBEDDINGS
89+ elif call_type == "responses" or call_type == "aresponses" :
90+ operation = "responses"
91+ op = consts .OP .GEN_AI_RESPONSES
8792 else :
8893 operation = "chat"
94+ op = consts .OP .GEN_AI_CHAT
8995
9096 # Start a new span/transaction
9197 span = get_start_span_function ()(
92- op = (
93- consts .OP .GEN_AI_CHAT
94- if operation == "chat"
95- else consts .OP .GEN_AI_EMBEDDINGS
96- ),
98+ op = op ,
9799 name = f"{ operation } { model } " ,
98100 origin = LiteLLMIntegration .origin ,
99101 )
@@ -106,14 +108,15 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
106108 set_data_normalized (span , SPANDATA .GEN_AI_SYSTEM , provider )
107109 set_data_normalized (span , SPANDATA .GEN_AI_OPERATION_NAME , operation )
108110
109- # Record input/messages if allowed
110- if should_send_default_pii () and integration .include_prompts :
111- if operation == "embeddings" :
112- # For embeddings, look for the 'input' parameter
111+ # Per-operation request data. Conversation id (responses) is set
112+ # unconditionally; user-content fields are gated on PII / include_prompts.
113+ record_prompts = should_send_default_pii () and integration .include_prompts
114+ scope = sentry_sdk .get_current_scope ()
115+
116+ if operation == "embeddings" :
117+ if record_prompts :
113118 embedding_input = kwargs .get ("input" )
114119 if embedding_input :
115- scope = sentry_sdk .get_current_scope ()
116- # Normalize to list format
117120 input_list = (
118121 embedding_input
119122 if isinstance (embedding_input , list )
@@ -129,11 +132,50 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
129132 messages_data ,
130133 unpack = False ,
131134 )
132- else :
133- # For chat, look for the 'messages' parameter
135+
136+ elif operation == "responses" :
137+ # litellm unpacks `extra_body` into the request body, so the
138+ # `conversation` field shows up in additional_args.complete_input_dict
139+ # rather than as a top-level kwarg.
140+ complete_input = (kwargs .get ("additional_args" ) or {}).get (
141+ "complete_input_dict"
142+ ) or {}
143+ conversation = complete_input .get ("conversation" )
144+ if conversation is not None :
145+ conversation_id : "Optional[str]" = None
146+ if isinstance (conversation , str ):
147+ conversation_id = conversation
148+ elif isinstance (conversation , dict ):
149+ conversation_id = conversation .get ("id" )
150+ if conversation_id is not None :
151+ set_data_normalized (
152+ span , SPANDATA .GEN_AI_CONVERSATION_ID , conversation_id
153+ )
154+
155+ if record_prompts :
156+ # `input` is either a string or a list of message dicts (same
157+ # shape as OpenAI Responses API).
158+ responses_input = kwargs .get ("input" )
159+ if responses_input :
160+ if isinstance (responses_input , str ):
161+ input_messages = [responses_input ]
162+ else :
163+ input_messages = list (responses_input )
164+ normalized = normalize_message_roles (input_messages ) # type: ignore[arg-type]
165+ messages_data = truncate_and_annotate_messages (normalized , span , scope )
166+ if messages_data is not None :
167+ set_data_normalized (
168+ span ,
169+ SPANDATA .GEN_AI_REQUEST_MESSAGES ,
170+ messages_data ,
171+ unpack = False ,
172+ )
173+
174+ else :
175+ # Chat completions.
176+ if record_prompts :
134177 messages = kwargs .get ("messages" , [])
135178 if messages :
136- scope = sentry_sdk .get_current_scope ()
137179 messages = _convert_message_parts (messages )
138180 messages_data = truncate_and_annotate_messages (messages , span , scope )
139181 if messages_data is not None :
@@ -166,11 +208,24 @@ async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
166208
167209def _success_callback (
168210 kwargs : "Dict[str, Any]" ,
169- completion_response : "Any" ,
211+ response : "Any" ,
170212 start_time : "datetime" ,
171213 end_time : "datetime" ,
172214) -> None :
173- """Handle successful completion."""
215+ """Handle a successful chat completion, embeddings, or Responses API call.
216+
217+ The shape of `response` differs between API paths:
218+ - Chat Completions: ModelResponse with ``.choices[].message`` and
219+ ``.usage`` carrying ``prompt_tokens`` / ``completion_tokens``.
220+ - Responses API (non-streaming): ResponsesAPIResponse with ``.output[]``
221+ items (``message`` / ``function_call``) and ``.usage`` carrying
222+ ``input_tokens`` / ``output_tokens``.
223+ - Responses API (streaming): a ResponseCompletedEvent wrapper
224+ ``{type: "response.completed", response: ResponsesAPIResponse}``,
225+ which we unwrap below.
226+ - Embeddings: CreateEmbeddingResponse with ``.usage`` only (no choices
227+ or output).
228+ """
174229
175230 metadata = _get_metadata_dict (kwargs )
176231 span = metadata .get ("_sentry_span" )
@@ -181,18 +236,25 @@ def _success_callback(
181236 if integration is None :
182237 return
183238
239+ # Streaming Responses API: unwrap the ResponseCompletedEvent so the rest of
240+ # the function sees the assembled ResponsesAPIResponse directly.
241+ if getattr (response , "type" , None ) == "response.completed" and hasattr (
242+ response , "response"
243+ ):
244+ response = response .response
245+
184246 try :
185- # Record model information
186- if hasattr (completion_response , "model" ):
187- set_data_normalized (
188- span , SPANDATA .GEN_AI_RESPONSE_MODEL , completion_response .model
189- )
247+ # `model` is set by all API shapes (chat / responses / embeddings).
248+ if hasattr (response , "model" ):
249+ set_data_normalized (span , SPANDATA .GEN_AI_RESPONSE_MODEL , response .model )
190250
191- # Record response content if allowed
251+ # Response content: structure depends on the API shape. Embeddings have
252+ # neither ``choices`` nor ``output``, so we just skip this block.
192253 if should_send_default_pii () and integration .include_prompts :
193- if hasattr (completion_response , "choices" ):
254+ if hasattr (response , "choices" ):
255+ # Chat Completions API.
194256 response_messages = []
195- for choice in completion_response .choices :
257+ for choice in response .choices :
196258 if hasattr (choice , "message" ):
197259 if hasattr (choice .message , "model_dump" ):
198260 response_messages .append (choice .message .model_dump ())
@@ -213,14 +275,53 @@ def _success_callback(
213275 set_data_normalized (
214276 span , SPANDATA .GEN_AI_RESPONSE_TEXT , response_messages
215277 )
278+ elif hasattr (response , "output" ):
279+ # Responses API: split message text from function-call items.
280+ output_text : "List[Any]" = []
281+ tool_calls : "List[Any]" = []
282+ for output in response .output :
283+ output_type = getattr (output , "type" , None )
284+ if output_type == "function_call" :
285+ if hasattr (output , "model_dump" ):
286+ tool_calls .append (output .model_dump ())
287+ elif hasattr (output , "dict" ):
288+ tool_calls .append (output .dict ())
289+ elif output_type == "message" :
290+ for content_item in getattr (output , "content" , []) or []:
291+ text = getattr (content_item , "text" , None )
292+ if text is not None :
293+ output_text .append (text )
294+ elif hasattr (content_item , "model_dump" ):
295+ output_text .append (content_item .model_dump ())
296+ elif hasattr (content_item , "dict" ):
297+ output_text .append (content_item .dict ())
298+
299+ if tool_calls :
300+ set_data_normalized (
301+ span ,
302+ SPANDATA .GEN_AI_RESPONSE_TOOL_CALLS ,
303+ tool_calls ,
304+ unpack = False ,
305+ )
306+ if output_text :
307+ set_data_normalized (
308+ span , SPANDATA .GEN_AI_RESPONSE_TEXT , output_text
309+ )
216310
217- # Record token usage
218- if hasattr (completion_response , "usage" ):
219- usage = completion_response .usage
311+ # Token usage field names differ across APIs:
312+ # Chat Completions / Embeddings: prompt_tokens / completion_tokens
313+ # Responses API (non-streaming): input_tokens / output_tokens
314+ # Responses API (streaming): prompt_tokens / completion_tokens
315+ # (litellm normalizes to chat-completion names when assembling the
316+ # streaming response).
317+ if hasattr (response , "usage" ):
318+ usage = response .usage
220319 record_token_usage (
221320 span ,
222- input_tokens = getattr (usage , "prompt_tokens" , None ),
223- output_tokens = getattr (usage , "completion_tokens" , None ),
321+ input_tokens = getattr (usage , "prompt_tokens" , None )
322+ or getattr (usage , "input_tokens" , None ),
323+ output_tokens = getattr (usage , "completion_tokens" , None )
324+ or getattr (usage , "output_tokens" , None ),
224325 total_tokens = getattr (usage , "total_tokens" , None ),
225326 )
226327
0 commit comments