diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index 9c1601505..5ed4d59ed 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -13,6 +13,18 @@ extension RunnerTests { return (gestureStartUptimeMs, currentUptimeMs()) } + private func unsupportedResponse(for outcome: RunnerInteractionOutcome) -> Response? { + switch outcome { + case .performed: + return nil + case .unsupported(let message): + return Response( + ok: false, + error: ErrorPayload(code: "UNSUPPORTED_OPERATION", message: message) + ) + } + } + func execute(command: Command) throws -> Response { if Thread.isMainThread { return try executeOnMainSafely(command: command) @@ -231,11 +243,15 @@ extension RunnerTests { case .tap: if let text = command.text { if let element = findElement(app: activeApp, text: text) { + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - element.tap() + outcome = activateElement(app: activeApp, element: element, action: "tap by text") } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -249,11 +265,15 @@ extension RunnerTests { } if let x = command.x, let y = command.y { let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - tapAt(app: activeApp, x: x, y: y) + outcome = tapAt(app: activeApp, x: x, y: y) } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -309,13 +329,19 @@ extension RunnerTests { let doubleTap = command.doubleTap ?? false let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) if doubleTap { + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { runSeries(count: count, pauseMs: intervalMs) { _ in - doubleTapAt(app: activeApp, x: x, y: y) + if case .performed = outcome { + outcome = doubleTapAt(app: activeApp, x: x, y: y) + } } } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -329,13 +355,19 @@ extension RunnerTests { ) ) } + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { runSeries(count: count, pauseMs: intervalMs) { _ in - tapAt(app: activeApp, x: x, y: y) + if case .performed = outcome { + outcome = tapAt(app: activeApp, x: x, y: y) + } } } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -354,11 +386,15 @@ extension RunnerTests { } let duration = (command.durationMs ?? 800) / 1000.0 let touchFrame = resolvedTouchVisualizationFrame(app: activeApp, x: x, y: y) + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - longPressAt(app: activeApp, x: x, y: y, duration: duration) + outcome = longPressAt(app: activeApp, x: x, y: y, duration: duration) } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -377,11 +413,15 @@ extension RunnerTests { } let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) let dragFrame = resolvedDragVisualizationFrame(app: activeApp, x: x, y: y, x2: x2, y2: y2) + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { - dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + outcome = dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -407,18 +447,25 @@ extension RunnerTests { return Response(ok: false, error: ErrorPayload(message: "dragSeries pattern must be one-way or ping-pong")) } let holdDuration = min(max((command.durationMs ?? 60) / 1000.0, 0.016), 10.0) + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { withTemporaryScrollIdleTimeoutIfSupported(activeApp) { runSeries(count: count, pauseMs: pauseMs) { idx in + guard case .performed = outcome else { + return + } let reverse = pattern == "ping-pong" && (idx % 2 == 1) if reverse { - dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration) + outcome = dragAt(app: activeApp, x: x2, y: y2, x2: x, y2: y, holdDuration: holdDuration) } else { - dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) + outcome = dragAt(app: activeApp, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) } } } } + if let response = unsupportedResponse(for: outcome) { + return response + } return Response( ok: true, data: DataPayload( @@ -427,6 +474,18 @@ extension RunnerTests { gestureEndUptimeMs: timing.gestureEndUptimeMs ) ) + case .remotePress: + guard let button = tvRemoteButton(from: command.remoteButton) else { + return Response(ok: false, error: ErrorPayload(message: "remotePress requires remoteButton")) + } + let duration = (command.durationMs ?? 0) / 1000.0 + guard pressTvRemote(button, duration: duration) else { + return Response( + ok: false, + error: ErrorPayload(code: "UNSUPPORTED_OPERATION", message: "remotePress is only supported on tvOS") + ) + } + return Response(ok: true, data: DataPayload(message: "remote pressed")) case .type: guard let text = command.text else { return Response(ok: false, error: ErrorPayload(message: "type requires text")) @@ -633,13 +692,23 @@ extension RunnerTests { return Response(ok: false, error: ErrorPayload(message: "alert not found")) } if action == "accept" { - let button = alert.buttons.allElementsBoundByIndex.first - button?.tap() + guard let button = alert.buttons.allElementsBoundByIndex.first else { + return Response(ok: false, error: ErrorPayload(message: "alert accept button not found")) + } + let outcome = activateElement(app: activeApp, element: button, action: "alert accept") + if let response = unsupportedResponse(for: outcome) { + return response + } return Response(ok: true, data: DataPayload(message: "accepted")) } if action == "dismiss" { - let button = alert.buttons.allElementsBoundByIndex.last - button?.tap() + guard let button = alert.buttons.allElementsBoundByIndex.last else { + return Response(ok: false, error: ErrorPayload(message: "alert dismiss button not found")) + } + let outcome = activateElement(app: activeApp, element: button, action: "alert dismiss") + if let response = unsupportedResponse(for: outcome) { + return response + } return Response(ok: true, data: DataPayload(message: "dismissed")) } let buttonLabels = alert.buttons.allElementsBoundByIndex.map { $0.label } @@ -648,8 +717,12 @@ extension RunnerTests { guard let scale = command.scale, scale > 0 else { return Response(ok: false, error: ErrorPayload(message: "pinch requires scale > 0")) } + var outcome = RunnerInteractionOutcome.performed let timing = measureGesture { - pinch(app: activeApp, scale: scale, x: command.x, y: command.y) + outcome = pinch(app: activeApp, scale: scale, x: command.x, y: command.y) + } + if let response = unsupportedResponse(for: outcome) { + return response } return Response( ok: true, diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift index 25bd36e69..34bf72f4b 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift @@ -26,6 +26,9 @@ extension RunnerTests { return true } return false +#elseif os(tvOS) + _ = pressTvRemote(.menu) + return true #else let buttons = app.navigationBars.buttons.allElementsBoundByIndex if let back = buttons.first(where: { $0.isHittable }) { @@ -37,20 +40,26 @@ extension RunnerTests { } func performBackGesture(app: XCUIApplication) { - if pressTvRemoteMenuIfAvailable() { + if pressTvRemote(.menu) { return } + performCoordinateBackGesture(app: app) + } + + private func performCoordinateBackGesture(app: XCUIApplication) { +#if !os(tvOS) let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.05, dy: 0.5)) let end = target.coordinate(withNormalizedOffset: CGVector(dx: 0.8, dy: 0.5)) start.press(forDuration: 0.05, thenDragTo: end) +#endif } func performSystemBackAction(app: XCUIApplication) -> Bool { #if os(macOS) return false #else - if pressTvRemoteMenuIfAvailable() { + if pressTvRemote(.menu) { return true } performBackGesture(app: app) @@ -59,20 +68,28 @@ extension RunnerTests { } func performAppSwitcherGesture(app: XCUIApplication) { - if performTvRemoteAppSwitcherIfAvailable() { + if pressTvRemote(.home) { + sleepFor(resolveTvRemoteDoublePressDelay()) + _ = pressTvRemote(.home) return } + performCoordinateAppSwitcherGesture(app: app) + } + + private func performCoordinateAppSwitcherGesture(app: XCUIApplication) { +#if !os(tvOS) let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app let start = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.99)) let end = target.coordinate(withNormalizedOffset: CGVector(dx: 0.5, dy: 0.7)) start.press(forDuration: 0.6, thenDragTo: end) +#endif } func pressHomeButton() { #if os(macOS) return #else - if pressTvRemoteHomeIfAvailable() { + if pressTvRemote(.home) { return } XCUIDevice.shared.press(.home) @@ -80,7 +97,7 @@ extension RunnerTests { } func rotateDevice(to orientationName: String) -> Bool { -#if os(macOS) +#if os(macOS) || os(tvOS) return false #else switch orientationName { @@ -100,48 +117,6 @@ extension RunnerTests { #endif } - private func pressTvRemoteMenuIfAvailable() -> Bool { -#if os(tvOS) - XCUIRemote.shared.press(.menu) - return true -#else - return false -#endif - } - - private func pressTvRemoteHomeIfAvailable() -> Bool { -#if os(tvOS) - XCUIRemote.shared.press(.home) - return true -#else - return false -#endif - } - - private func performTvRemoteAppSwitcherIfAvailable() -> Bool { -#if os(tvOS) - XCUIRemote.shared.press(.home) - sleepFor(resolveTvRemoteDoublePressDelay()) - XCUIRemote.shared.press(.home) - return true -#else - return false -#endif - } - - private func resolveTvRemoteDoublePressDelay() -> TimeInterval { - guard - let raw = ProcessInfo.processInfo.environment["AGENT_DEVICE_TV_REMOTE_DOUBLE_PRESS_DELAY_MS"], - !raw.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty - else { - return tvRemoteDoublePressDelayDefault - } - guard let parsedMs = Double(raw), parsedMs >= 0 else { - return tvRemoteDoublePressDelayDefault - } - return min(parsedMs, 1000) / 1000.0 - } - func findElement(app: XCUIApplication, text: String) -> XCUIElement? { let predicate = NSPredicate(format: "label CONTAINS[c] %@ OR identifier CONTAINS[c] %@ OR value CONTAINS[c] %@", text, text, text) let element = app.descendants(matching: .any).matching(predicate).firstMatch @@ -183,7 +158,9 @@ extension RunnerTests { } func clearTextInput(_ element: XCUIElement) { +#if !os(tvOS) moveCaretToEnd(element: element) +#endif let count = estimatedDeleteCount(for: element) let deletes = String(repeating: XCUIKeyboardKey.delete.rawValue, count: count) element.typeText(deletes) @@ -268,6 +245,12 @@ extension RunnerTests { return (wasVisible: false, dismissed: false, visible: false) } +#if os(tvOS) + _ = pressTvRemote(.menu) + sleepFor(0.2) + let visible = isKeyboardVisible(app: app) + return (wasVisible: true, dismissed: !visible, visible: visible) +#else let keyboard = app.keyboards.firstMatch keyboard.swipeDown() sleepFor(0.2) @@ -282,9 +265,13 @@ extension RunnerTests { } return (wasVisible: true, dismissed: false, visible: isKeyboardVisible(app: app)) +#endif } private func tapKeyboardDismissControl(app: XCUIApplication) -> Bool { +#if os(tvOS) + return false +#else let keyboardFrame = app.keyboards.firstMatch.frame for label in ["Hide keyboard", "Dismiss keyboard", "Done"] { let candidates = [ @@ -313,6 +300,7 @@ extension RunnerTests { } } return false +#endif } private func isKeyboardAccessoryControl(_ element: XCUIElement, keyboardFrame: CGRect) -> Bool { @@ -324,6 +312,9 @@ extension RunnerTests { } private func moveCaretToEnd(element: XCUIElement) { +#if os(tvOS) + return +#else let frame = element.frame guard !frame.isEmpty else { element.tap() @@ -334,6 +325,7 @@ extension RunnerTests { CGVector(dx: max(2, frame.width - 4), dy: max(2, frame.height / 2)) ) target.tap() +#endif } private func estimatedDeleteCount(for element: XCUIElement) -> Int { @@ -379,49 +371,64 @@ extension RunnerTests { return element.exists ? element : nil } - func tapAt(app: XCUIApplication, x: Double, y: Double) { - let coordinate = interactionCoordinate(app: app, x: x, y: y) - coordinate.tap() + func tapAt(app: XCUIApplication, x: Double, y: Double) -> RunnerInteractionOutcome { + if let outcome = selectFocusedTvElement(app: app, point: CGPoint(x: x, y: y), action: "tap") { + return outcome + } + return performCoordinateTap(app: app, x: x, y: y) } func mouseClickAt(app: XCUIApplication, x: Double, y: Double, button: String) throws { +#if os(macOS) let coordinate = interactionCoordinate(app: app, x: x, y: y) - #if os(macOS) - switch button { - case "primary": - coordinate.tap() - case "secondary": - coordinate.rightClick() - case "middle": - throw NSError( - domain: "AgentDeviceRunner", - code: 1, - userInfo: [NSLocalizedDescriptionKey: "middle mouse button is not supported"] - ) - default: - throw NSError( - domain: "AgentDeviceRunner", - code: 1, - userInfo: [NSLocalizedDescriptionKey: "unsupported mouse button: \(button)"] - ) - } - #else + switch button { + case "primary": + coordinate.tap() + case "secondary": + coordinate.rightClick() + case "middle": + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "middle mouse button is not supported"] + ) + default: throw NSError( domain: "AgentDeviceRunner", code: 1, - userInfo: [NSLocalizedDescriptionKey: "mouseClick is only supported on macOS"] + userInfo: [NSLocalizedDescriptionKey: "unsupported mouse button: \(button)"] ) - #endif + } +#elseif os(tvOS) + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "mouseClick is not supported on tvOS"] + ) +#else + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "mouseClick is only supported on macOS"] + ) +#endif } - func doubleTapAt(app: XCUIApplication, x: Double, y: Double) { - let coordinate = interactionCoordinate(app: app, x: x, y: y) - coordinate.doubleTap() + func doubleTapAt(app: XCUIApplication, x: Double, y: Double) -> RunnerInteractionOutcome { + if let outcome = selectFocusedTvElement(app: app, point: CGPoint(x: x, y: y), action: "double tap") { + guard case .performed = outcome else { return outcome } + sleepFor(0.1) + _ = pressTvRemote(.select) + return .performed + } + return performCoordinateDoubleTap(app: app, x: x, y: y) } - func longPressAt(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) { - let coordinate = interactionCoordinate(app: app, x: x, y: y) - coordinate.press(forDuration: duration) + func longPressAt(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) -> RunnerInteractionOutcome { + if let outcome = longSelectFocusedTvElement(app: app, point: CGPoint(x: x, y: y), duration: duration) { + return outcome + } + return performCoordinateLongPress(app: app, x: x, y: y, duration: duration) } func dragAt( @@ -431,10 +438,17 @@ extension RunnerTests { x2: Double, y2: Double, holdDuration: TimeInterval - ) { - let start = interactionCoordinate(app: app, x: x, y: y) - let end = interactionCoordinate(app: app, x: x2, y: y2) - start.press(forDuration: holdDuration, thenDragTo: end) + ) -> RunnerInteractionOutcome { + // tvOS has no coordinate drag. Preserve the direction as a focus move. + let dx = x2 - x + let dy = y2 - y + let button: TvRemoteButton = abs(dx) > abs(dy) + ? (dx > 0 ? .right : .left) + : (dy > 0 ? .down : .up) + if pressTvRemote(button) { + return .performed + } + return performCoordinateDrag(app: app, x: x, y: y, x2: x2, y2: y2, holdDuration: holdDuration) } func resolvedTouchVisualizationFrame(app: XCUIApplication, x: Double, y: Double) -> TouchVisualizationFrame { @@ -510,26 +524,28 @@ extension RunnerTests { } private func performTvRemoteSwipeIfAvailable(direction: String) -> Bool { -#if os(tvOS) switch direction { case "up": - XCUIRemote.shared.press(.up) + return pressTvRemote(.up) case "down": - XCUIRemote.shared.press(.down) + return pressTvRemote(.down) case "left": - XCUIRemote.shared.press(.left) + return pressTvRemote(.left) case "right": - XCUIRemote.shared.press(.right) + return pressTvRemote(.right) default: return false } - return true -#else - return false -#endif } - func pinch(app: XCUIApplication, scale: Double, x: Double?, y: Double?) { + func pinch(app: XCUIApplication, scale: Double, x: Double?, y: Double?) -> RunnerInteractionOutcome { + return performCoordinatePinch(app: app, scale: scale, x: x, y: y) + } + + private func performCoordinatePinch(app: XCUIApplication, scale: Double, x: Double?, y: Double?) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("pinch is not supported on tvOS") +#else let target = app.windows.firstMatch.exists ? app.windows.firstMatch : app // Use double-tap + drag gesture for reliable map zoom @@ -560,6 +576,8 @@ extension RunnerTests { // Immediately press and drag (second tap + drag) center.press(forDuration: 0.05, thenDragTo: endPoint) + return .performed +#endif } private func interactionRoot(app: XCUIApplication) -> XCUIElement { @@ -570,6 +588,52 @@ extension RunnerTests { return app } + private func performCoordinateTap(app: XCUIApplication, x: Double, y: Double) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("coordinate tap is not supported on tvOS; move focus with swipe or scroll, then select the focused element") +#else + interactionCoordinate(app: app, x: x, y: y).tap() + return .performed +#endif + } + + private func performCoordinateDoubleTap(app: XCUIApplication, x: Double, y: Double) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("coordinate double tap is not supported on tvOS; move focus with swipe or scroll, then select the focused element") +#else + interactionCoordinate(app: app, x: x, y: y).doubleTap() + return .performed +#endif + } + + private func performCoordinateLongPress(app: XCUIApplication, x: Double, y: Double, duration: TimeInterval) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("coordinate long press is not supported on tvOS; move focus with swipe or scroll, then long-select the focused element") +#else + interactionCoordinate(app: app, x: x, y: y).press(forDuration: duration) + return .performed +#endif + } + + private func performCoordinateDrag( + app: XCUIApplication, + x: Double, + y: Double, + x2: Double, + y2: Double, + holdDuration: TimeInterval + ) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("coordinate drag is not supported on tvOS") +#else + let start = interactionCoordinate(app: app, x: x, y: y) + let end = interactionCoordinate(app: app, x: x2, y: y2) + start.press(forDuration: holdDuration, thenDragTo: end) + return .performed +#endif + } + +#if !os(tvOS) private func interactionCoordinate(app: XCUIApplication, x: Double, y: Double) -> XCUICoordinate { let root = interactionRoot(app: app) let origin = root.coordinate(withNormalizedOffset: CGVector(dx: 0, dy: 0)) @@ -578,14 +642,17 @@ extension RunnerTests { let offsetY = y - Double(rootFrame.origin.y) return origin.withOffset(CGVector(dx: offsetX, dy: offsetY)) } +#endif private func tapElementCenter(app: XCUIApplication, element: XCUIElement) { let frame = element.frame if !frame.isEmpty { - tapAt(app: app, x: frame.midX, y: frame.midY) + _ = tapAt(app: app, x: frame.midX, y: frame.midY) return } +#if !os(tvOS) element.tap() +#endif } private func macOSNavigationBackElement(app: XCUIApplication) -> XCUIElement? { diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift index 833ce5a10..adc247b26 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Lifecycle.swift @@ -184,6 +184,7 @@ extension RunnerTests { .tap, .longPress, .drag, + .remotePress, .type, .swipe, .back, diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift index 3f8cf72f7..d9c9aef36 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift @@ -8,6 +8,7 @@ enum CommandType: String, Codable { case interactionFrame case drag case dragSeries + case remotePress case type case swipe case findText @@ -39,6 +40,7 @@ struct Command: Codable { let x: Double? let y: Double? let button: String? + let remoteButton: String? let count: Double? let intervalMs: Double? let doubleTap: Bool? @@ -162,6 +164,7 @@ struct SnapshotNode: Codable { let value: String? let rect: SnapshotRect let enabled: Bool + let focused: Bool? let hittable: Bool let depth: Int let parentIndex: Int? diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index e78209b6f..4a2a1b3b2 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -28,6 +28,7 @@ extension RunnerTests { let identifier: String let valueText: String? let hittable: Bool + let focused: Bool let visible: Bool } @@ -341,6 +342,7 @@ extension RunnerTests { identifier: identifier, valueText: valueText, hittable: computedSnapshotHittable(snapshot, viewport: context.viewport, laterNodes: laterNodes), + focused: snapshotHasFocus(snapshot), visible: isVisibleInViewport(snapshot.frame, context.viewport) ) } @@ -360,6 +362,7 @@ extension RunnerTests { value: evaluation.valueText, rect: snapshotRect(from: snapshot.frame), enabled: snapshot.isEnabled, + focused: evaluation.focused ? true : nil, hittable: evaluation.hittable, depth: depth, parentIndex: parentIndex, @@ -525,6 +528,7 @@ extension RunnerTests { value: node.value, rect: node.rect, enabled: node.enabled, + focused: node.focused, hittable: node.hittable, depth: depth, parentIndex: parentIndex, @@ -575,6 +579,7 @@ extension RunnerTests { value: valueText, rect: snapshotRect(from: frame), enabled: element.isEnabled, + focused: elementHasFocus(element) ? true : nil, hittable: element.isHittable, depth: 0, parentIndex: nil, @@ -592,6 +597,16 @@ extension RunnerTests { return node } + private func snapshotHasFocus(_ snapshot: XCUIElementSnapshot) -> Bool { + var focused = false + _ = RunnerObjCExceptionCatcher.catchException({ + if let value = (snapshot as! NSObject).value(forKey: "hasFocus") as? Bool { + focused = value + } + }) + return focused + } + private func shouldExpandCollapsedTabContainer(_ snapshot: XCUIElementSnapshot) -> Bool { let frame = snapshot.frame if frame.isNull || frame.isEmpty { return false } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift index 01611125e..3737e2fbd 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SystemModal.swift @@ -186,6 +186,7 @@ extension RunnerTests { value: nil, rect: snapshotRect(from: element.frame), enabled: element.isEnabled, + focused: elementHasFocus(element) ? true : nil, hittable: hittableOverride ?? element.isHittable, depth: depth, parentIndex: nil, diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+TvRemote.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+TvRemote.swift new file mode 100644 index 000000000..abc771d84 --- /dev/null +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+TvRemote.swift @@ -0,0 +1,185 @@ +import XCTest + +enum RunnerInteractionOutcome { + case performed + case unsupported(String) +} + +enum TvRemoteButton { + case select + case menu + case home + case up + case down + case left + case right +} + +extension RunnerTests { + func resolveTvRemoteDoublePressDelay() -> TimeInterval { + guard + let raw = ProcessInfo.processInfo.environment["AGENT_DEVICE_TV_REMOTE_DOUBLE_PRESS_DELAY_MS"], + !raw.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + else { + return tvRemoteDoublePressDelayDefault + } + guard let parsedMs = Double(raw), parsedMs >= 0 else { + return tvRemoteDoublePressDelayDefault + } + return min(parsedMs, 1000) / 1000.0 + } + + @discardableResult + func pressTvRemote(_ button: TvRemoteButton, duration: TimeInterval? = nil) -> Bool { +#if os(tvOS) + let remoteButton = xcuiRemoteButton(button) + if let duration, duration > 0 { + XCUIRemote.shared.press(remoteButton, forDuration: duration) + } else { + XCUIRemote.shared.press(remoteButton) + } + return true +#else + return false +#endif + } + + func tvRemoteButton(from raw: String?) -> TvRemoteButton? { + switch raw?.lowercased() { + case "select": + return .select + case "menu": + return .menu + case "home": + return .home + case "up": + return .up + case "down": + return .down + case "left": + return .left + case "right": + return .right + default: + return nil + } + } + + func elementHasFocus(_ element: XCUIElement) -> Bool { + var focused = false + _ = RunnerObjCExceptionCatcher.catchException({ + if let value = (element as NSObject).value(forKey: "hasFocus") as? Bool { + focused = value + } + }) + return focused + } + + func activateElement(app: XCUIApplication, element: XCUIElement, action: String) -> RunnerInteractionOutcome { + if let outcome = selectFocusedTvElement(app: app, element: element, action: action) { + return outcome + } + return performElementTap(element) + } + + func selectFocusedTvElement(app: XCUIApplication, point: CGPoint, action: String) -> RunnerInteractionOutcome? { +#if os(tvOS) + guard let focused = focusedTvElement(app: app), !focused.frame.isEmpty, focused.frame.contains(point) else { + return .unsupported("\(action) is supported on tvOS only when the requested point is inside the focused element") + } + _ = pressTvRemote(.select) + return .performed +#else + return nil +#endif + } + + func longSelectFocusedTvElement(app: XCUIApplication, point: CGPoint, duration: TimeInterval) -> RunnerInteractionOutcome? { +#if os(tvOS) + guard let focused = focusedTvElement(app: app), !focused.frame.isEmpty, focused.frame.contains(point) else { + return .unsupported("long press is supported on tvOS only when the requested point is inside the focused element") + } + _ = pressTvRemote(.select, duration: duration) + return .performed +#else + return nil +#endif + } + + private func performElementTap(_ element: XCUIElement) -> RunnerInteractionOutcome { +#if os(tvOS) + return .unsupported("element tap is not supported on tvOS; move focus with swipe or scroll, then select the focused element") +#else + element.tap() + return .performed +#endif + } + + private func selectFocusedTvElement(app: XCUIApplication, element: XCUIElement, action: String) -> RunnerInteractionOutcome? { +#if os(tvOS) + guard tvFocusedElementMatches(app: app, target: element) else { + return .unsupported("\(action) is supported on tvOS only when the requested element is focused") + } + _ = pressTvRemote(.select) + return .performed +#else + return nil +#endif + } + + private func tvFocusedElementMatches(app: XCUIApplication, target: XCUIElement) -> Bool { +#if os(tvOS) + if target.hasFocus { + return true + } + guard let focused = focusedTvElement(app: app) else { + return false + } + let targetFrame = target.frame + let focusedFrame = focused.frame + guard !targetFrame.isEmpty && !focusedFrame.isEmpty else { + return false + } + let focusedCenter = CGPoint(x: focusedFrame.midX, y: focusedFrame.midY) + let targetCenter = CGPoint(x: targetFrame.midX, y: targetFrame.midY) + return targetFrame.contains(focusedCenter) + || focusedFrame.contains(targetCenter) + || targetFrame.intersects(focusedFrame) +#else + return false +#endif + } + + private func focusedTvElement(app: XCUIApplication) -> XCUIElement? { +#if os(tvOS) + let focused = app + .descendants(matching: .any) + .matching(NSPredicate(format: "hasFocus == true")) + .firstMatch + return focused.exists ? focused : nil +#else + return nil +#endif + } + +#if os(tvOS) + private func xcuiRemoteButton(_ button: TvRemoteButton) -> XCUIRemote.Button { + switch button { + case .select: + return .select + case .menu: + return .menu + case .home: + return .home + case .up: + return .up + case .down: + return .down + case .left: + return .left + case .right: + return .right + } + } +#endif +} diff --git a/src/core/__tests__/capabilities.test.ts b/src/core/__tests__/capabilities.test.ts index 14d3d32d6..3003f1710 100644 --- a/src/core/__tests__/capabilities.test.ts +++ b/src/core/__tests__/capabilities.test.ts @@ -250,9 +250,14 @@ test('tvOS follows iOS capability matrix by device kind', () => { [{ device: tvOsSimulator, expected: true, label: 'on tvOS' }], ); assertCommandSupport( - ['pinch', 'push', 'settings', 'alert'], + ['push', 'settings', 'alert'], [{ device: tvOsSimulator, expected: true, label: 'on tvOS simulator' }], ); + assert.equal( + isCommandSupportedOnDevice('pinch', tvOsSimulator), + false, + 'pinch on tvOS simulator', + ); assert.equal( isCommandSupportedOnDevice('keyboard', tvOsSimulator), false, diff --git a/src/core/__tests__/dispatch-pinch.test.ts b/src/core/__tests__/dispatch-pinch.test.ts index 452c42ae0..f28caf131 100644 --- a/src/core/__tests__/dispatch-pinch.test.ts +++ b/src/core/__tests__/dispatch-pinch.test.ts @@ -2,7 +2,7 @@ import { test } from 'vitest'; import assert from 'node:assert/strict'; import { dispatchCommand } from '../dispatch.ts'; import { AppError } from '../../utils/errors.ts'; -import { MACOS_DEVICE } from '../../__tests__/test-utils/device-fixtures.ts'; +import { MACOS_DEVICE, TVOS_SIMULATOR } from '../../__tests__/test-utils/device-fixtures.ts'; test('dispatch pinch rejects helper-backed macOS surfaces', async () => { await assert.rejects( @@ -13,3 +13,13 @@ test('dispatch pinch rejects helper-backed macOS surfaces', async () => { /macOS app sessions/i.test(error.message), ); }); + +test('dispatch pinch rejects tvOS before runner call', async () => { + await assert.rejects( + () => dispatchCommand(TVOS_SIMULATOR, 'pinch', ['1.5']), + (error: unknown) => + error instanceof AppError && + error.code === 'UNSUPPORTED_OPERATION' && + /pinch is not supported on tvOS/i.test(error.message), + ); +}); diff --git a/src/core/capabilities.ts b/src/core/capabilities.ts index 18e74d485..822d34586 100644 --- a/src/core/capabilities.ts +++ b/src/core/capabilities.ts @@ -17,6 +17,8 @@ type CommandCapability = { const isNotMacOs = (device: DeviceInfo): boolean => device.platform !== 'macos'; const isMacOsOrAppleSimulator = (device: DeviceInfo): boolean => device.platform === 'macos' || device.kind === 'simulator'; +const isMacOsOrMobileAppleSimulator = (device: DeviceInfo): boolean => + device.platform === 'macos' || (device.kind === 'simulator' && device.target !== 'tv'); // Linux desktop supports these commands via xdotool/ydotool + AT-SPI2. // Linux device kind is always 'device' (local desktop). @@ -39,7 +41,7 @@ const COMMAND_CAPABILITY_MATRIX: Record = { apple: { simulator: true, device: true }, android: {}, linux: LINUX_NONE, - supports: isMacOsOrAppleSimulator, + supports: isMacOsOrMobileAppleSimulator, }, 'app-switcher': { apple: { simulator: true, device: true }, diff --git a/src/core/dispatch.ts b/src/core/dispatch.ts index 245261209..a999fc85c 100644 --- a/src/core/dispatch.ts +++ b/src/core/dispatch.ts @@ -39,7 +39,7 @@ import { import { readNotificationPayload } from './dispatch-payload.ts'; import { parseDeviceRotation } from './device-rotation.ts'; -export { resolveTargetDevice, withResolveTargetDeviceCacheScope } from './dispatch-resolve.ts'; +export { resolveTargetDevice } from './dispatch-resolve.ts'; export { shouldUseIosTapSeries, shouldUseIosDragSeries }; export type BatchStep = { @@ -568,6 +568,9 @@ async function handlePinchCommand( 'Android pinch is not supported in current adb backend; requires instrumentation-based backend.', ); } + if (device.target === 'tv') { + throw new AppError('UNSUPPORTED_OPERATION', 'pinch is not supported on tvOS'); + } if (device.platform === 'macos' && context?.surface && context.surface !== 'app') { throw new AppError( 'UNSUPPORTED_OPERATION', diff --git a/src/core/interactors.ts b/src/core/interactors.ts index b3d9901b2..b8909e2f3 100644 --- a/src/core/interactors.ts +++ b/src/core/interactors.ts @@ -32,6 +32,7 @@ import { import { runMacOsScreenshotAction } from '../platforms/ios/macos-helper.ts'; import { runIosRunnerCommand } from '../platforms/ios/runner-client.ts'; import { + appleRemotePressCommand, iosRunnerOverrides, resolveAppleBackRunnerCommand, } from '../platforms/ios/interactions.ts'; @@ -132,6 +133,14 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): await screenshotIos(device, outPath, options?.appBundleId, options?.fullscreen); }, back: async (mode) => { + if (device.target === 'tv') { + await runIosRunnerCommand( + device, + appleRemotePressCommand('menu', runnerContext.appBundleId), + runnerOpts, + ); + return; + } await runIosRunnerCommand( device, { @@ -142,6 +151,14 @@ export function getInteractor(device: DeviceInfo, runnerContext: RunnerContext): ); }, home: async () => { + if (device.target === 'tv') { + await runIosRunnerCommand( + device, + appleRemotePressCommand('home', runnerContext.appBundleId), + runnerOpts, + ); + return; + } await runIosRunnerCommand( device, { command: 'home', appBundleId: runnerContext.appBundleId }, diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index 73397c814..0641759f4 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -340,6 +340,7 @@ function reindexSnapshotNodes(nodes: RawSnapshotNode[], depthOffset = 0): RawSna } function isInteractiveSnapshotNode(node: RawSnapshotNode): boolean { + if (node.focused) return true; if (node.hittable) return true; if (node.rect) return true; const role = `${node.type ?? ''} ${node.role ?? ''} ${node.subrole ?? ''}`.toLowerCase(); diff --git a/src/platforms/ios/__tests__/runner-client.test.ts b/src/platforms/ios/__tests__/runner-client.test.ts index 174437ffa..ecf51c572 100644 --- a/src/platforms/ios/__tests__/runner-client.test.ts +++ b/src/platforms/ios/__tests__/runner-client.test.ts @@ -113,6 +113,7 @@ const runnerProtocolCommandFixtures: Record; type RunIosRunnerCommand = typeof runIosRunnerCommand; type RunnerOpts = { verbose?: boolean; @@ -135,21 +137,17 @@ export function iosRunnerOverrides( }; } -function invertScrollDirection(direction: ScrollDirection): ScrollDirection { - switch (direction) { - case 'up': - return 'down'; - case 'down': - return 'up'; - case 'left': - return 'right'; - case 'right': - return 'left'; - default: { - const _exhaustive: never = direction; - return _exhaustive; - } - } +export function appleRemotePressCommand( + remoteButton: AppleRemoteButton, + appBundleId?: string, + durationMs?: number, +): Parameters[1] { + return { + command: 'remotePress', + remoteButton, + ...(durationMs !== undefined ? { durationMs } : {}), + ...(appBundleId !== undefined ? { appBundleId } : {}), + }; } async function runAppleScroll( @@ -164,11 +162,7 @@ async function runAppleScroll( if (device.target === 'tv') { const runnerResult = await runRunnerCommand( device, - { - command: 'swipe', - direction: invertScrollDirection(direction), - appBundleId: ctx.appBundleId, - }, + appleRemotePressCommand(direction, ctx.appBundleId), runnerOpts, ); return normalizeIosScrollResult(runnerResult, options); diff --git a/src/platforms/ios/runner-contract.ts b/src/platforms/ios/runner-contract.ts index f4397c1b3..0246288e0 100644 --- a/src/platforms/ios/runner-contract.ts +++ b/src/platforms/ios/runner-contract.ts @@ -14,6 +14,7 @@ export type RunnerCommand = { | 'interactionFrame' | 'drag' | 'dragSeries' + | 'remotePress' | 'type' | 'swipe' | 'findText' @@ -40,6 +41,7 @@ export type RunnerCommand = { x?: number; y?: number; button?: ClickButton; + remoteButton?: 'select' | 'menu' | 'home' | 'up' | 'down' | 'left' | 'right'; count?: number; intervalMs?: number; doubleTap?: boolean; diff --git a/src/utils/__tests__/interactors.test.ts b/src/utils/__tests__/interactors.test.ts index 97103b855..0e346f2ac 100644 --- a/src/utils/__tests__/interactors.test.ts +++ b/src/utils/__tests__/interactors.test.ts @@ -20,6 +20,15 @@ const iosSimulator: DeviceInfo = { booted: true, }; +const tvosSimulator: DeviceInfo = { + platform: 'ios', + id: 'tv-sim-1', + name: 'Apple TV', + kind: 'simulator', + target: 'tv', + booted: true, +}; + const mockRunIosRunnerCommand = vi.mocked(runIosRunnerCommand); beforeEach(() => { @@ -66,6 +75,21 @@ test('ios scroll reports planned pixels without recomputing from runner coordina assert.equal(pixels, 120); }); +test('tvos scroll preserves remote focus direction', async () => { + const commands: RunnerCommand[] = []; + mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { + commands.push(command); + return {}; + }); + const interactor = getInteractor(tvosSimulator, { appBundleId: 'com.example.app' }); + + await interactor.scroll('down'); + + assert.deepEqual(commands, [ + { command: 'remotePress', remoteButton: 'down', appBundleId: 'com.example.app' }, + ]); +}); + test('ios fill clears the focused field after tapping the target coordinates', async () => { const commands: RunnerCommand[] = []; mockRunIosRunnerCommand.mockImplementation(async (_device, command) => { diff --git a/src/utils/__tests__/snapshot-lines-focused.test.ts b/src/utils/__tests__/snapshot-lines-focused.test.ts new file mode 100644 index 000000000..9bdee6078 --- /dev/null +++ b/src/utils/__tests__/snapshot-lines-focused.test.ts @@ -0,0 +1,23 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import { formatSnapshotLine } from '../snapshot-lines.ts'; + +test('formatSnapshotLine marks focused nodes', () => { + const line = formatSnapshotLine( + { + ref: 'e1', + index: 0, + depth: 0, + type: 'Button', + label: 'General', + enabled: true, + focused: true, + }, + 0, + false, + undefined, + { summarizeTextSurfaces: true }, + ); + + assert.match(line, /\[focused\]/); +}); diff --git a/src/utils/command-schema.ts b/src/utils/command-schema.ts index 24cd4e106..927c54d5e 100644 --- a/src/utils/command-schema.ts +++ b/src/utils/command-schema.ts @@ -264,6 +264,7 @@ Snapshots and refs: Snapshot legend: @e12 [button] label="Add to cart" id="add-cart" enabled hittable -> press @e12 or press 'id="add-cart"'. @e13 [textinput] label="Notes" preview="Leave at side..." truncated -> snapshot -s @e13 before reading. + @e14 [cell] label="Profiles" focused -> tvOS focus is currently on this row. [off-screen below] 4 items: "Privacy", "About" -> scroll down, then snapshot -i; those are hints, not refs. Re-snapshot after navigation, submit, modal/list/reload/dynamic changes. Off-screen summaries are scroll hints; use scroll, not swipe, then snapshot -i. diff --git a/src/utils/snapshot-lines.ts b/src/utils/snapshot-lines.ts index 3d786a881..4a6fa0647 100644 --- a/src/utils/snapshot-lines.ts +++ b/src/utils/snapshot-lines.ts @@ -200,6 +200,7 @@ function buildLineMetadata( return metadata; } if (node.selected === true) metadata.push('selected'); + if (node.focused === true) metadata.push('focused'); if (isEditableRole(type)) metadata.push('editable'); if (looksScrollable(node, type)) metadata.push('scrollable'); if (!textSurface.shouldSummarize) { diff --git a/src/utils/snapshot.ts b/src/utils/snapshot.ts index 4d7c474a9..50de6a0b9 100644 --- a/src/utils/snapshot.ts +++ b/src/utils/snapshot.ts @@ -29,6 +29,7 @@ export type RawSnapshotNode = { rect?: Rect; enabled?: boolean; selected?: boolean; + focused?: boolean; hittable?: boolean; depth?: number; parentIndex?: number;