Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ API Reference
HuggingFaceEndpointTarget
limit_requests_per_minute
OpenAICompletionTarget
OpenAIChatAudioConfig
OpenAIImageTarget
OpenAIChatTarget
OpenAIResponseTarget
Expand Down
4 changes: 3 additions & 1 deletion pyrit/prompt_target/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pyrit.prompt_target.http_target.httpx_api_target import HTTPXAPITarget
from pyrit.prompt_target.hugging_face.hugging_face_chat_target import HuggingFaceChatTarget
from pyrit.prompt_target.hugging_face.hugging_face_endpoint_target import HuggingFaceEndpointTarget
from pyrit.prompt_target.openai.openai_chat_audio_config import OpenAIChatAudioConfig
from pyrit.prompt_target.openai.openai_chat_target import OpenAIChatTarget
from pyrit.prompt_target.openai.openai_completion_target import OpenAICompletionTarget
from pyrit.prompt_target.openai.openai_image_target import OpenAIImageTarget
Expand Down Expand Up @@ -51,8 +52,9 @@
"HuggingFaceEndpointTarget",
"limit_requests_per_minute",
"OpenAICompletionTarget",
"OpenAIImageTarget",
"OpenAIChatAudioConfig",
"OpenAIChatTarget",
"OpenAIImageTarget",
"OpenAIResponseTarget",
"OpenAIVideoTarget",
"OpenAITTSTarget",
Expand Down
55 changes: 55 additions & 0 deletions pyrit/prompt_target/openai/openai_chat_audio_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from dataclasses import dataclass
from typing import Any, Literal

# Voices supported by OpenAI Chat Completions API audio output.
# OpenAI SDK: openai/types/chat/chat_completion_audio_param.py voice field
# SDK Literal includes: alloy, ash, ballad, coral, echo, sage, shimmer, verse, marin, cedar
# SDK docstring also lists: fable, nova, onyx (we include these for completeness)
# Note: SDK uses Union[str, Literal[...]] so any string is accepted by the API.
ChatAudioVoice = Literal[
"alloy", "ash", "ballad", "coral", "echo", "fable", "nova", "onyx", "sage", "shimmer", "verse", "marin", "cedar"
]

# Audio output formats supported by OpenAI Chat Completions API.
# OpenAI SDK: openai/types/chat/chat_completion_audio_param.py format field
# defines format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
ChatAudioFormat = Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]


@dataclass
class OpenAIChatAudioConfig:
"""
Configuration for audio output from OpenAI Chat Completions API.

When provided to OpenAIChatTarget, this enables audio output from models
that support it (e.g., gpt-4o-audio-preview).

Note: This is specific to the Chat Completions API. The Responses API does not
support audio input or output. For real-time audio, use RealtimeTarget instead.
"""

# The voice to use for audio output. Supported voices are:
voice: ChatAudioVoice

# The audio format for the response. Supported formats are:
audio_format: ChatAudioFormat = "wav"

# If True, historical user messages that contain both audio and text will only send
# the text (transcript) to reduce bandwidth and token usage. The current (last) user
# message will still include audio. Defaults to True.
prefer_transcript_for_history: bool = True

def to_extra_body_parameters(self) -> dict[str, Any]:
"""
Convert the config to extra_body_parameters format for OpenAI API.

Returns:
dict: Parameters to include in the request body for audio output.
"""
return {
"modalities": ["text", "audio"],
"audio": {"voice": self.voice, "format": self.audio_format},
}
Loading