diff --git a/Sources/AnyLanguageModel/LanguageModelSession.swift b/Sources/AnyLanguageModel/LanguageModelSession.swift index 6996b18..51d3b0d 100644 --- a/Sources/AnyLanguageModel/LanguageModelSession.swift +++ b/Sources/AnyLanguageModel/LanguageModelSession.swift @@ -117,11 +117,6 @@ public final class LanguageModelSession: @unchecked Sendable { let relay = AsyncThrowingStream.Snapshot, any Error> { continuation in let stream = upstream Task { - // Add prompt to transcript when stream starts - await MainActor.run { - session.transcript.append(promptEntry) - } - await session.beginResponding() var lastSnapshot: ResponseStream.Snapshot? do { @@ -225,7 +220,7 @@ public final class LanguageModelSession: @unchecked Sendable { includeSchemaInPrompt: Bool = true, options: GenerationOptions = GenerationOptions() ) -> sending ResponseStream where Content: Generable { - // Create prompt entry that will be added when stream starts + // Add prompt to transcript let promptEntry = Transcript.Entry.prompt( Transcript.Prompt( segments: [.text(.init(content: prompt.description))], @@ -233,6 +228,7 @@ public final class LanguageModelSession: @unchecked Sendable { responseFormat: nil ) ) + transcript.append(promptEntry) return wrapStream( model.streamResponse( diff --git a/Sources/AnyLanguageModel/Models/AnthropicLanguageModel.swift b/Sources/AnyLanguageModel/Models/AnthropicLanguageModel.swift index d245095..dadcd54 100644 --- a/Sources/AnyLanguageModel/Models/AnthropicLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/AnthropicLanguageModel.swift @@ -325,11 +325,6 @@ public struct AnthropicLanguageModel: LanguageModel { let url = baseURL.appendingPathComponent("v1/messages") let headers = buildHeaders() - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - let messages = [ - AnthropicMessage(role: .user, content: convertSegmentsToAnthropicContent(userSegments)) - ] - // Convert available tools to Anthropic format let anthropicTools: [AnthropicTool] = try session.tools.map { tool in try convertToolToAnthropicFormat(tool) @@ -338,7 +333,7 @@ public struct AnthropicLanguageModel: LanguageModel { let params = try createMessageParams( model: model, system: nil, - messages: messages, + messages: session.transcript.toAnthropicMessages(), tools: anthropicTools.isEmpty ? nil : anthropicTools, options: options ) @@ -396,11 +391,6 @@ public struct AnthropicLanguageModel: LanguageModel { fatalError("AnthropicLanguageModel only supports generating String content") } - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - let messages = [ - AnthropicMessage(role: .user, content: convertSegmentsToAnthropicContent(userSegments)) - ] - let url = baseURL.appendingPathComponent("v1/messages") let stream: AsyncThrowingStream.Snapshot, any Error> = .init { @@ -417,7 +407,7 @@ public struct AnthropicLanguageModel: LanguageModel { var params = try createMessageParams( model: model, system: nil, - messages: messages, + messages: session.transcript.toAnthropicMessages(), tools: anthropicTools.isEmpty ? nil : anthropicTools, options: options ) @@ -640,8 +630,77 @@ private func toGeneratedContent(_ value: [String: JSONValue]?) throws -> Generat return try GeneratedContent(json: json) } +private func fromGeneratedContent(_ content: GeneratedContent) throws -> [String: JSONValue] { + let data = try JSONEncoder().encode(content) + let jsonValue = try JSONDecoder().decode(JSONValue.self, from: data) + + guard case .object(let dict) = jsonValue else { + return [:] + } + return dict +} + // MARK: - Supporting Types +extension Transcript { + fileprivate func toAnthropicMessages() -> [AnthropicMessage] { + var messages = [AnthropicMessage]() + for item in self { + switch item { + case .instructions(let instructions): + messages.append( + .init( + role: .user, + content: convertSegmentsToAnthropicContent(instructions.segments) + ) + ) + case .prompt(let prompt): + messages.append( + .init( + role: .user, + content: convertSegmentsToAnthropicContent(prompt.segments) + ) + ) + case .response(let response): + messages.append( + .init( + role: .assistant, + content: convertSegmentsToAnthropicContent(response.segments) + ) + ) + case .toolCalls(let toolCalls): + // Add assistant message with tool use blocks + let toolUseBlocks: [AnthropicContent] = toolCalls.map { call in + let input = try? fromGeneratedContent(call.arguments) + return .toolUse(AnthropicToolUse( + id: call.id, + name: call.toolName, + input: input + )) + } + messages.append( + .init( + role: .assistant, + content: toolUseBlocks + ) + ) + case .toolOutput(let toolOutput): + // Add user message with tool result + messages.append( + .init( + role: .user, + content: [.toolResult(AnthropicToolResult( + toolUseId: toolOutput.id, + content: convertSegmentsToAnthropicContent(toolOutput.segments) + ))] + ) + ) + } + } + return messages + } +} + private struct AnthropicTool: Codable, Sendable { let name: String let description: String @@ -665,10 +724,11 @@ private enum AnthropicContent: Codable, Sendable { case text(AnthropicText) case image(AnthropicImage) case toolUse(AnthropicToolUse) + case toolResult(AnthropicToolResult) enum CodingKeys: String, CodingKey { case type } - enum ContentType: String, Codable { case text = "text", image = "image", toolUse = "tool_use" } + enum ContentType: String, Codable { case text = "text", image = "image", toolUse = "tool_use", toolResult = "tool_result" } init(from decoder: any Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) @@ -680,6 +740,8 @@ private enum AnthropicContent: Codable, Sendable { self = .image(try AnthropicImage(from: decoder)) case .toolUse: self = .toolUse(try AnthropicToolUse(from: decoder)) + case .toolResult: + self = .toolResult(try AnthropicToolResult(from: decoder)) } } @@ -688,6 +750,7 @@ private enum AnthropicContent: Codable, Sendable { case .text(let t): try t.encode(to: encoder) case .image(let i): try i.encode(to: encoder) case .toolUse(let u): try u.encode(to: encoder) + case .toolResult(let r): try r.encode(to: encoder) } } } @@ -752,27 +815,6 @@ private func convertSegmentsToAnthropicContent(_ segments: [Transcript.Segment]) return blocks } -private func extractPromptSegments(from session: LanguageModelSession, fallbackText: String) -> [Transcript.Segment] { - for entry in session.transcript.reversed() { - if case .prompt(let p) = entry { - // Skip prompts that are effectively empty (single empty text block) - let hasMeaningfulContent = p.segments.contains { segment in - switch segment { - case .text(let t): - return !t.content.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty - case .structure: - return true - case .image: - return true - } - } - if hasMeaningfulContent { return p.segments } - // Otherwise continue searching older entries - } - } - return [.text(.init(content: fallbackText))] -} - private struct AnthropicToolUse: Codable, Sendable { let type: String let id: String @@ -787,6 +829,24 @@ private struct AnthropicToolUse: Codable, Sendable { } } +private struct AnthropicToolResult: Codable, Sendable { + let type: String + let toolUseId: String + let content: [AnthropicContent] + + enum CodingKeys: String, CodingKey { + case type + case toolUseId = "tool_use_id" + case content + } + + init(toolUseId: String, content: [AnthropicContent]) { + self.type = "tool_result" + self.toolUseId = toolUseId + self.content = content + } +} + private struct AnthropicMessageResponse: Codable, Sendable { let id: String let type: String diff --git a/Sources/AnyLanguageModel/Models/GeminiLanguageModel.swift b/Sources/AnyLanguageModel/Models/GeminiLanguageModel.swift index 65ba5a3..3f76e53 100644 --- a/Sources/AnyLanguageModel/Models/GeminiLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/GeminiLanguageModel.swift @@ -247,19 +247,14 @@ public struct GeminiLanguageModel: LanguageModel { .appendingPathComponent("models/\(model):generateContent") let headers = buildHeaders() - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - var contents = [ - GeminiContent(role: .user, parts: convertSegmentsToGeminiParts(userSegments)) - ] - let geminiTools = try buildTools(from: session.tools, serverTools: effectiveServerTools) - var allEntries: [Transcript.Entry] = [] + var transcript = session.transcript // Multi-turn conversation loop for tool calling while true { let params = try createGenerateContentParams( - contents: contents, + contents: transcript.toGeminiContent(), tools: geminiTools, options: options, thinking: effectiveThinking @@ -285,33 +280,14 @@ public struct GeminiLanguageModel: LanguageModel { } ?? [] if !functionCalls.isEmpty { - // Append the model's response with function calls to the conversation - contents.append(firstCandidate.content) - // Resolve function calls let invocations = try await resolveFunctionCalls(functionCalls, session: session) if !invocations.isEmpty { - allEntries.append(.toolCalls(Transcript.ToolCalls(invocations.map(\.call)))) + transcript.append(.toolCalls(Transcript.ToolCalls(invocations.map(\.call)))) - // Build tool response parts for Gemini - var toolParts: [GeminiPart] = [] for invocation in invocations { - allEntries.append(.toolOutput(invocation.output)) - - // Convert tool output to function response - let responseValue = try toJSONValue(invocation.output) - toolParts.append( - .functionResponse( - GeminiFunctionResponse( - name: invocation.call.toolName, - response: responseValue - ) - ) - ) + transcript.append(.toolOutput(invocation.output)) } - - // Append tool responses to the conversation - contents.append(GeminiContent(role: .tool, parts: toolParts)) } // Continue the loop to send the next request with tool results @@ -329,7 +305,7 @@ public struct GeminiLanguageModel: LanguageModel { return LanguageModelSession.Response( content: text as! Content, rawContent: GeneratedContent(text), - transcriptEntries: ArraySlice(allEntries) + transcriptEntries: ArraySlice(transcript) ) } } @@ -351,11 +327,6 @@ public struct GeminiLanguageModel: LanguageModel { let effectiveThinking = customOptions?.thinking ?? _thinking let effectiveServerTools = customOptions?.serverTools ?? _serverTools - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - let contents = [ - GeminiContent(role: .user, parts: convertSegmentsToGeminiParts(userSegments)) - ] - var streamURL = baseURL .appendingPathComponent(apiVersion) @@ -372,7 +343,7 @@ public struct GeminiLanguageModel: LanguageModel { let geminiTools = try buildTools(from: session.tools, serverTools: effectiveServerTools) let params = try createGenerateContentParams( - contents: contents, + contents: session.transcript.toGeminiContent(), tools: geminiTools, options: options, thinking: effectiveThinking @@ -586,6 +557,16 @@ private func toGeneratedContent(_ value: [String: JSONValue]?) throws -> Generat return try GeneratedContent(json: json) } +private func fromGeneratedContent(_ content: GeneratedContent) throws -> [String: JSONValue] { + let data = try JSONEncoder().encode(content) + let jsonValue = try JSONDecoder().decode(JSONValue.self, from: data) + + guard case .object(let dict) = jsonValue else { + return [:] + } + return dict +} + private func toJSONValue(_ toolOutput: Transcript.ToolOutput) throws -> [String: JSONValue] { var result: [String: JSONValue] = [:] @@ -608,6 +589,64 @@ private func toJSONValue(_ toolOutput: Transcript.ToolOutput) throws -> [String: return result } +// MARK: - Supporting Types + +extension Transcript { + fileprivate func toGeminiContent() -> [GeminiContent] { + var messages = [GeminiContent]() + for item in self { + switch item { + case .instructions(let instructions): + messages.append( + .init( + role: .user, + parts: convertSegmentsToGeminiParts(instructions.segments)) + ) + case .prompt(let prompt): + messages.append( + .init( + role: .user, + parts: convertSegmentsToGeminiParts(prompt.segments) + ) + ) + case .response(let response): + messages.append( + .init( + role: .model, + parts: convertSegmentsToGeminiParts(response.segments) + ) + ) + case .toolCalls(let toolCalls): + // Add model's response with function calls + let functionCallParts: [GeminiPart] = toolCalls.map { call in + let args = try? fromGeneratedContent(call.arguments) + return .functionCall(GeminiFunctionCall(name: call.toolName, args: args)) + } + messages.append( + .init( + role: .model, + parts: functionCallParts + ) + ) + case .toolOutput(let toolOutput): + // Add function response as a user message (Gemini API expects function responses from user role) + let response = try? toJSONValue(toolOutput) + let functionResponse = GeminiFunctionResponse( + name: toolOutput.toolName, + response: response ?? [:] + ) + messages.append( + .init( + role: .user, + parts: [.functionResponse(functionResponse)] + ) + ) + } + } + return messages + } +} + private enum GeminiTool: Sendable { case functionDeclarations([GeminiFunctionDeclaration]) case googleSearch @@ -768,15 +807,6 @@ private func convertSegmentsToGeminiParts(_ segments: [Transcript.Segment]) -> [ return parts } -private func extractPromptSegments(from session: LanguageModelSession, fallbackText: String) -> [Transcript.Segment] { - for entry in session.transcript.reversed() { - if case .prompt(let p) = entry { - return p.segments - } - } - return [.text(.init(content: fallbackText))] -} - private struct GeminiFunctionCall: Codable, Sendable { let name: String let args: [String: JSONValue]? diff --git a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift index 04d6700..c25b421 100644 --- a/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/OpenAILanguageModel.swift @@ -434,15 +434,6 @@ public struct OpenAILanguageModel: LanguageModel { fatalError("OpenAILanguageModel only supports generating String content") } - var messages: [OpenAIMessage] = [] - if let systemSegments = extractInstructionSegments(from: session) { - messages.append( - OpenAIMessage(role: .system, content: .blocks(convertSegmentsToOpenAIBlocks(systemSegments))) - ) - } - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - messages.append(OpenAIMessage(role: .user, content: .blocks(convertSegmentsToOpenAIBlocks(userSegments)))) - // Convert tools if any are available in the session let openAITools: [OpenAITool]? = { guard !session.tools.isEmpty else { return nil } @@ -457,14 +448,14 @@ public struct OpenAILanguageModel: LanguageModel { switch apiVariant { case .chatCompletions: return try await respondWithChatCompletions( - messages: messages, + messages: session.transcript.toOpenAIMessages(), tools: openAITools, options: options, session: session ) case .responses: return try await respondWithResponses( - messages: messages, + messages: session.transcript.toOpenAIMessages(), tools: openAITools, options: options, session: session @@ -619,15 +610,6 @@ public struct OpenAILanguageModel: LanguageModel { fatalError("OpenAILanguageModel only supports generating String content") } - var messages: [OpenAIMessage] = [] - if let systemSegments = extractInstructionSegments(from: session) { - messages.append( - OpenAIMessage(role: .system, content: .blocks(convertSegmentsToOpenAIBlocks(systemSegments))) - ) - } - let userSegments = extractPromptSegments(from: session, fallbackText: prompt.description) - messages.append(OpenAIMessage(role: .user, content: .blocks(convertSegmentsToOpenAIBlocks(userSegments)))) - // Convert tools if any are available in the session let openAITools: [OpenAITool]? = { guard !session.tools.isEmpty else { return nil } @@ -643,7 +625,7 @@ public struct OpenAILanguageModel: LanguageModel { case .responses: let params = Responses.createRequestBody( model: model, - messages: messages, + messages: session.transcript.toOpenAIMessages(), tools: openAITools, options: options, stream: true @@ -706,7 +688,7 @@ public struct OpenAILanguageModel: LanguageModel { case .chatCompletions: let params = ChatCompletions.createRequestBody( model: model, - messages: messages, + messages: session.transcript.toOpenAIMessages(), tools: openAITools, options: options, stream: true @@ -934,7 +916,7 @@ private enum Responses { case .imageURL(let url): return .object([ "type": .string("input_image"), - "image_url": .object(["url": .string(url)]), + "image_url": .string(url), ]) } } @@ -1122,6 +1104,78 @@ private enum Responses { // MARK: - Supporting Types +extension Transcript { + fileprivate func toOpenAIMessages() -> [OpenAIMessage] { + var messages = [OpenAIMessage]() + for item in self { + switch item { + case .instructions(let instructions): + messages.append( + .init( + role: .system, + content: .blocks(convertSegmentsToOpenAIBlocks(instructions.segments)) + ) + ) + case .prompt(let prompt): + messages.append( + .init( + role: .user, + content: .blocks(convertSegmentsToOpenAIBlocks(prompt.segments)) + ) + ) + case .response(let response): + messages.append( + .init( + role: .assistant, + content: .blocks(convertSegmentsToOpenAIBlocks(response.segments)) + ) + ) + case .toolCalls(let toolCalls): + // Add assistant message with tool calls + let openAIToolCalls: [JSONValue] = toolCalls.map { call in + let argumentsJSON: String + if let data = try? JSONEncoder().encode(call.arguments), + let jsonString = String(data: data, encoding: .utf8) { + argumentsJSON = jsonString + } else { + argumentsJSON = "{}" + } + + return .object([ + "id": .string(call.id), + "type": .string("function"), + "function": .object([ + "name": .string(call.toolName), + "arguments": .string(argumentsJSON) + ]) + ]) + } + + let rawMessage: JSONValue = .object([ + "role": .string("assistant"), + "content": .null, + "tool_calls": .array(openAIToolCalls) + ]) + + messages.append( + .init( + role: .raw(rawContent: rawMessage), + content: .text("") + ) + ) + case .toolOutput(let toolOutput): + messages.append( + .init( + role: .tool(id: toolOutput.id), + content: .blocks(convertSegmentsToOpenAIBlocks(toolOutput.segments)) + ) + ) + } + } + return messages + } +} + private struct OpenAIMessage: Hashable, Codable, Sendable { enum Role: Hashable, Codable, Sendable { case system, user, assistant, raw(rawContent: JSONValue), tool(id: String) @@ -1165,7 +1219,6 @@ private struct OpenAIMessage: Hashable, Codable, Sendable { } func jsonValue(for apiVariant: OpenAILanguageModel.APIVariant) -> JSONValue { - switch role { case .raw(rawContent: let rawContent): return rawContent @@ -1254,29 +1307,6 @@ private func convertSegmentsToOpenAIBlocks(_ segments: [Transcript.Segment]) -> return blocks } -private func extractPromptSegments(from session: LanguageModelSession, fallbackText: String) -> [Transcript.Segment] { - // Prefer the most recent Transcript.Prompt entry if present - for entry in session.transcript.reversed() { - if case .prompt(let p) = entry { - return p.segments - } - } - return [.text(.init(content: fallbackText))] -} - -private func extractInstructionSegments(from session: LanguageModelSession) -> [Transcript.Segment]? { - // Prefer the first Transcript.Instructions entry if present - for entry in session.transcript { - if case .instructions(let i) = entry { - return i.segments - } - } - if let instructions = session.instructions?.description, !instructions.isEmpty { - return [.text(.init(content: instructions))] - } - return nil -} - private struct OpenAITool: Hashable, Codable, Sendable { let type: String let function: OpenAIFunction diff --git a/Tests/AnyLanguageModelTests/AnthropicLanguageModelTests.swift b/Tests/AnyLanguageModelTests/AnthropicLanguageModelTests.swift index 2a03db3..66f263f 100644 --- a/Tests/AnyLanguageModelTests/AnthropicLanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/AnthropicLanguageModelTests.swift @@ -9,7 +9,7 @@ private let anthropicAPIKey: String? = ProcessInfo.processInfo.environment["ANTH struct AnthropicLanguageModelTests { let model = AnthropicLanguageModel( apiKey: anthropicAPIKey!, - model: "claude-sonnet-4-5-20250929" + model: "claude-sonnet-4-5" ) @Test func customHost() throws { @@ -83,7 +83,7 @@ struct AnthropicLanguageModelTests { #expect(!firstResponse.content.isEmpty) let secondResponse = try await session.respond(to: "What did I just tell you?") - #expect(!secondResponse.content.isEmpty) + #expect(secondResponse.content.contains("color")) } @Test func withTools() async throws { @@ -101,30 +101,20 @@ struct AnthropicLanguageModelTests { } @Test func multimodalWithImageURL() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(url: testImageURL)), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(url: testImageURL) + ) #expect(!response.content.isEmpty) } @Test func multimodalWithImageData() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(data: testImageData, mimeType: "image/png")), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(data: testImageData, mimeType: "image/png") + ) #expect(!response.content.isEmpty) } } diff --git a/Tests/AnyLanguageModelTests/GeminiLanguageModelTests.swift b/Tests/AnyLanguageModelTests/GeminiLanguageModelTests.swift index cc06246..bbd64ad 100644 --- a/Tests/AnyLanguageModelTests/GeminiLanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/GeminiLanguageModelTests.swift @@ -85,7 +85,7 @@ struct GeminiLanguageModelTests { #expect(!firstResponse.content.isEmpty) let secondResponse = try await session.respond(to: "What did I just tell you?") - #expect(!secondResponse.content.isEmpty) + #expect(secondResponse.content.contains("color")) } @Test func withClientTools() async throws { @@ -118,30 +118,20 @@ struct GeminiLanguageModelTests { } @Test func multimodalWithImageURL() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(url: testImageURL)), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(url: testImageURL) + ) #expect(!response.content.isEmpty) } @Test func multimodalWithImageData() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(data: testImageData, mimeType: "image/png")), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(data: testImageData, mimeType: "image/png") + ) #expect(!response.content.isEmpty) } } diff --git a/Tests/AnyLanguageModelTests/OpenAILanguageModelTests.swift b/Tests/AnyLanguageModelTests/OpenAILanguageModelTests.swift index d6dca28..12f15d3 100644 --- a/Tests/AnyLanguageModelTests/OpenAILanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/OpenAILanguageModelTests.swift @@ -110,30 +110,20 @@ struct OpenAILanguageModelTests { } @Test func multimodalWithImageURL() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(url: testImageURL)), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(url: testImageURL) + ) #expect(!response.content.isEmpty) } @Test func multimodalWithImageData() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(data: testImageData, mimeType: "image/png")), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(data: testImageData, mimeType: "image/png") + ) #expect(!response.content.isEmpty) } @@ -144,7 +134,7 @@ struct OpenAILanguageModelTests { #expect(!firstResponse.content.isEmpty) let secondResponse = try await session.respond(to: "What did I just tell you?") - #expect(!secondResponse.content.isEmpty) + #expect(secondResponse.content.contains("color")) } @Test func withTools() async throws { @@ -250,30 +240,20 @@ struct OpenAILanguageModelTests { } @Test func multimodalWithImageURL() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(url: testImageURL)), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(url: testImageURL) + ) #expect(!response.content.isEmpty) } @Test func multimodalWithImageData() async throws { - let transcript = Transcript(entries: [ - .prompt( - Transcript.Prompt(segments: [ - .text(.init(content: "Describe this image")), - .image(.init(data: testImageData, mimeType: "image/png")), - ]) - ) - ]) - let session = LanguageModelSession(model: model, transcript: transcript) - let response = try await session.respond(to: "") + let session = LanguageModelSession(model: model) + let response = try await session.respond( + to: "Describe this image", + image: .init(data: testImageData, mimeType: "image/png") + ) #expect(!response.content.isEmpty) } @@ -284,7 +264,7 @@ struct OpenAILanguageModelTests { #expect(!firstResponse.content.isEmpty) let secondResponse = try await session.respond(to: "What did I just tell you?") - #expect(!secondResponse.content.isEmpty) + #expect(secondResponse.content.contains("color")) } @Test func withTools() async throws { diff --git a/Tests/AnyLanguageModelTests/SystemLanguageModelTests.swift b/Tests/AnyLanguageModelTests/SystemLanguageModelTests.swift index 9acb400..05d2319 100644 --- a/Tests/AnyLanguageModelTests/SystemLanguageModelTests.swift +++ b/Tests/AnyLanguageModelTests/SystemLanguageModelTests.swift @@ -106,5 +106,17 @@ import AnyLanguageModel #expect(content.contains("San Francisco")) #expect(content.contains("72°F")) } + + @available(macOS 26.0, *) + @Test func conversationContext() async throws { + let model: SystemLanguageModel = SystemLanguageModel() + let session = LanguageModelSession(model: model) + + let firstResponse = try await session.respond(to: "My favorite color is blue") + #expect(!firstResponse.content.isEmpty) + + let secondResponse = try await session.respond(to: "What did I just tell you?") + #expect(secondResponse.content.contains("color")) + } } #endif