diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index e8b857252..82e20cf61 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -635,6 +635,84 @@ extension RunnerTests { synthesized: false, message: "scrolled" ) + case .desktopScroll: + guard let direction = command.direction, + direction == "up" || direction == "down" || direction == "left" || direction == "right" + else { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "desktopScroll requires direction up|down|left|right" + ) + ) + } + let appFrame = activeApp.frame + let frame = resolvedTouchReferenceFrame(app: activeApp, appFrame: appFrame) + guard frame.width > 0, frame.height > 0 else { + return Response( + ok: false, + error: ErrorPayload(message: "desktopScroll could not resolve a usable interaction frame") + ) + } + guard let plan = runnerScrollGesturePlan( + direction: direction, + amount: command.amount, + pixels: command.pixels, + referenceWidth: frame.width, + referenceHeight: frame.height + ) else { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "desktopScroll could not compute a wheel plan" + ) + ) + } + let x = frame.midX + let y = frame.midY + let localX = x - (appFrame.isEmpty ? frame.minX : appFrame.minX) + let localY = y - (appFrame.isEmpty ? frame.minY : appFrame.minY) + if let durationMs = command.durationMs, + durationMs.isFinite == false || durationMs < 0 || durationMs > 10000 + { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "desktopScroll durationMs must be between 0 and 10000" + ) + ) + } + let touchFrame = resolvedTouchVisualizationFrame( + app: activeApp, + x: localX, + y: localY + ) + do { + var scrollError: Error? + let timing = measureGesture { + do { + try desktopScrollAt( + app: activeApp, + x: x, + y: y, + direction: direction, + pixels: plan.travelPixels, + durationMs: command.durationMs + ) + } catch { + scrollError = error + } + } + if let scrollError { + throw scrollError + } + return gestureResponse(message: "scrolled", timing: timing, frame: .touch(touchFrame)) + } catch { + return Response(ok: false, error: ErrorPayload(message: error.localizedDescription)) + } case .remotePress: guard let button = tvRemoteButton(from: command.remoteButton) else { return Response(ok: false, error: ErrorPayload(message: "remotePress requires remoteButton")) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift index 117276622..2c28dd10e 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandJournal.swift @@ -144,7 +144,7 @@ final class RunnerCommandJournal { case .snapshot, .screenshot: return false case .tap, .mouseClick, .longPress, .drag, - .remotePress, .type, .swipe, .scroll, .findText, .querySelector, .readText, .back, + .remotePress, .type, .swipe, .scroll, .desktopScroll, .findText, .querySelector, .readText, .back, .backInApp, .backSystem, .home, .rotate, .appSwitcher, .keyboardDismiss, .keyboardReturn, .alert, .pinch, .sequence, .rotateGesture, .transformGesture, .recordStart, .recordStop, .status, .uptime, .shutdown: diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift index d17e004c1..93e666ef8 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Interaction.swift @@ -1,5 +1,9 @@ import XCTest +#if os(macOS) +import CoreGraphics +#endif + private enum RunnerInterfaceOrientation { static let unknown = 0 static let portrait = 1 @@ -605,6 +609,110 @@ extension RunnerTests { #endif } + func desktopScrollAt( + app: XCUIApplication, + x: Double, + y: Double, + direction: String, + pixels: Double, + durationMs: Double? + ) throws { +#if os(macOS) + guard let events = desktopScrollWheelDeltaEvents( + direction: direction, + pixels: pixels, + durationMs: durationMs + ) else { + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "unsupported desktop scroll direction: \(direction)"] + ) + } + + let coordinate = interactionCoordinate(app: app, x: x, y: y) + let interval = desktopScrollEventIntervalSeconds(durationMs: durationMs, eventCount: events.count) + for (index, deltas) in events.enumerated() { + // Keep desktop scrolling on XCTest's coordinate API so macOS owns wheel synthesis, natural + // scrolling preference handling, and cursor placement instead of posting raw CGEvents. + coordinate.scroll( + byDeltaX: CGFloat(deltas.horizontal), + deltaY: CGFloat(deltas.vertical) + ) + if interval > 0 && index < events.count - 1 { + Thread.sleep(forTimeInterval: interval) + } + } +#elseif os(tvOS) + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "desktopScroll is not supported on tvOS"] + ) +#else + throw NSError( + domain: "AgentDeviceRunner", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "desktopScroll is only supported on macOS"] + ) +#endif + } + + func desktopScrollWheelDeltas(direction: String, pixels: Double) -> (vertical: Int32, horizontal: Int32)? { + let magnitude = Int32(max(1, min(Double(Int32.max), pixels.rounded()))) + switch direction { + case "up": + return (vertical: magnitude, horizontal: 0) + case "down": + return (vertical: -magnitude, horizontal: 0) + case "left": + return (vertical: 0, horizontal: magnitude) + case "right": + return (vertical: 0, horizontal: -magnitude) + default: + return nil + } + } + + func desktopScrollWheelDeltaEvents( + direction: String, + pixels: Double, + durationMs: Double? + ) -> [(vertical: Int32, horizontal: Int32)]? { + guard let totalDeltas = desktopScrollWheelDeltas(direction: direction, pixels: pixels) else { + return nil + } + let magnitude = max(abs(Int(totalDeltas.vertical)), abs(Int(totalDeltas.horizontal))) + let duration = max(0, durationMs ?? 0) + let requestedEventCount = duration > 0 ? Int(ceil(duration / 16.0)) : 1 + let eventCount = max(1, min(magnitude, requestedEventCount)) + guard eventCount > 1 else { + return [totalDeltas] + } + + if totalDeltas.vertical != 0 { + return distributeDesktopScrollDelta(totalDeltas.vertical, eventCount: eventCount) + .map { (vertical: $0, horizontal: 0) } + } + return distributeDesktopScrollDelta(totalDeltas.horizontal, eventCount: eventCount) + .map { (vertical: 0, horizontal: $0) } + } + + func desktopScrollEventIntervalSeconds(durationMs: Double?, eventCount: Int) -> TimeInterval { + guard let durationMs, durationMs > 0, eventCount > 1 else { return 0 } + return (durationMs / 1000.0) / Double(eventCount - 1) + } + + private func distributeDesktopScrollDelta(_ delta: Int32, eventCount: Int) -> [Int32] { + let sign: Int32 = delta < 0 ? -1 : 1 + let magnitude = abs(Int(delta)) + let base = magnitude / eventCount + let remainder = magnitude % eventCount + return (0.. RunnerInteractionOutcome { if let outcome = selectFocusedTvElement(app: app, point: CGPoint(x: x, y: y), action: "double tap") { guard case .performed = outcome else { return outcome } @@ -1199,4 +1307,26 @@ extension RunnerTests { XCTAssertEqual(vector.dy, expected.dy, "dy interfaceOrientation \(orientation)") } } + + func testDesktopScrollWheelDeltasMapDirections() throws { + XCTAssertEqual(try XCTUnwrap(desktopScrollWheelDeltas(direction: "up", pixels: 120)).vertical, 120) + XCTAssertEqual(try XCTUnwrap(desktopScrollWheelDeltas(direction: "down", pixels: 120)).vertical, -120) + XCTAssertEqual(try XCTUnwrap(desktopScrollWheelDeltas(direction: "left", pixels: 120)).horizontal, 120) + XCTAssertEqual(try XCTUnwrap(desktopScrollWheelDeltas(direction: "right", pixels: 120)).horizontal, -120) + XCTAssertNil(desktopScrollWheelDeltas(direction: "diagonal", pixels: 120)) + } + + func testDesktopScrollWheelDeltaEventsHonorDurationAndPreservePixels() throws { + let events = try XCTUnwrap(desktopScrollWheelDeltaEvents(direction: "down", pixels: 200, durationMs: 50)) + XCTAssertEqual(events.count, 4) + XCTAssertEqual(events.map(\.vertical).reduce(0, +), -200) + XCTAssertEqual(events.map(\.horizontal).reduce(0, +), 0) + XCTAssertEqual(desktopScrollEventIntervalSeconds(durationMs: 50, eventCount: events.count), 0.05 / 3.0) + } + + func testDesktopScrollWheelDeltaEventsKeepInstantScrollSingleEvent() throws { + let events = try XCTUnwrap(desktopScrollWheelDeltaEvents(direction: "down", pixels: 200, durationMs: 0)) + XCTAssertEqual(events.count, 1) + XCTAssertEqual(events.first?.vertical, -200) + } } diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift index 26e376cb1..ac8a8919d 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Models.swift @@ -9,6 +9,7 @@ enum CommandType: String, Codable { case type case swipe case scroll + case desktopScroll case findText case querySelector case readText @@ -69,8 +70,9 @@ extension CommandType { // Interaction commands: require the foreground-guard + stabilization preflight. // keyboardReturn is the sibling of keyboardDismiss (missing from the historical switch — // drift the table now prevents). .scroll is the fused frame-resolve + drag scroll; same - // classification as .drag. .sequence is the fused multi-step gesture batch. - case .tap, .longPress, .drag, .remotePress, .type, .swipe, .scroll, + // classification as .drag. .desktopScroll is the macOS frame-resolve + wheel event sibling. + // .sequence is the fused multi-step gesture batch. + case .tap, .longPress, .drag, .remotePress, .type, .swipe, .scroll, .desktopScroll, .back, .backInApp, .backSystem, .rotate, .appSwitcher, .keyboardDismiss, .keyboardReturn, .pinch, .sequence, .rotateGesture, .transformGesture: return CommandTraits(isInteraction: true, readOnly: .never, isLifecycle: false) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift index 12f0d4d80..085a859d8 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+ScrollGesture.swift @@ -119,6 +119,24 @@ extension RunnerTests { XCTAssertEqual(plan.travelPixels, 720) } + func testRunnerScrollGesturePlanClampsExplicitPixelsVertically() throws { + // 400x800, down, pixels 1000 clamps travel to the safe band (720): (200,760)->(200,40). + let plan = try XCTUnwrap( + runnerScrollGesturePlan( + direction: "down", + amount: nil, + pixels: 1000, + referenceWidth: 400, + referenceHeight: 800 + ) + ) + XCTAssertEqual(plan.x1, 200) + XCTAssertEqual(plan.y1, 760) + XCTAssertEqual(plan.x2, 200) + XCTAssertEqual(plan.y2, 40) + XCTAssertEqual(plan.travelPixels, 720) + } + func testRunnerScrollGesturePlanFloorsTinyFrames() throws { // 2x2, down, pixels 10 engages every max(1, ...) floor and the .5 rounding cases the two // ports must agree on (halfTravel 0.5 -> 1, center 1 from 2/2): (1,2)->(1,0), travel 1. diff --git a/scripts/integration-progress-model.ts b/scripts/integration-progress-model.ts index 73f37ef17..b0aab6246 100644 --- a/scripts/integration-progress-model.ts +++ b/scripts/integration-progress-model.ts @@ -19,7 +19,9 @@ export function buildIntegrationProgressModel({ root = process.cwd() } = {}) { const handlerTests = listFiles(handlerTestDir, (file) => file.endsWith('.test.ts')); const providerScenarioTests = listFiles(providerScenarioDir, (file) => file.endsWith('.test.ts')); const providerScenarioSources = listFiles(providerScenarioDir, (file) => file.endsWith('.ts')); - const providerScenarioSupportSources = providerScenarioSources.filter((file) => !file.endsWith('.test.ts')); + const providerScenarioSupportSources = providerScenarioSources.filter( + (file) => !file.endsWith('.test.ts'), + ); const handlerStats = summarizeFiles(handlerTests); const providerScenarioStats = summarizeFiles(providerScenarioTests); const providerScenarioSupportStats = summarizeFiles(providerScenarioSupportSources); @@ -28,7 +30,10 @@ export function buildIntegrationProgressModel({ root = process.cwd() } = {}) { ); const mockHeavyHandlerRows = summarizeMockHeavyHandlerFiles(root, mockHeavyHandlerFiles); const providerPressureRows = summarizeProviderPressure(providerScenarioSources); - const publicCommandRows = summarizePublicCommandCoverage(providerScenarioTests, clientCommandMethods); + const publicCommandRows = summarizePublicCommandCoverage( + providerScenarioTests, + clientCommandMethods, + ); const missingPublicCommands = publicCommandRows.filter((command) => command.references === 0); const flagCoverageRows = summarizeProviderScenarioFlagCoverage(providerScenarioTests); const missingFlagRows = flagCoverageRows.filter((flag) => flag.references === 0); @@ -57,12 +62,18 @@ export function buildIntegrationProgressModel({ root = process.cwd() } = {}) { 'Public commands covered by provider-backed integration', `${publicCommandRows.length - missingPublicCommands.length}/${publicCommandRows.length}`, ], - ['Public commands missing provider-backed integration coverage', String(missingPublicCommands.length)], + [ + 'Public commands missing provider-backed integration coverage', + String(missingPublicCommands.length), + ], [ 'Device-observable workflow flags covered by provider-backed integration', `${flagCoverageRows.length - missingFlagRows.length}/${flagCoverageRows.length}`, ], - ['Device-observable workflow flags missing provider-backed integration coverage', String(missingFlagRows.length)], + [ + 'Device-observable workflow flags missing provider-backed integration coverage', + String(missingFlagRows.length), + ], [ 'Public CLI flags intentionally outside provider-backed integration', String(excludedFlagRows.reduce((sum, group) => sum + group.keys.length, 0)), @@ -140,6 +151,7 @@ function summarizeProviderScenarioFlagCoverage(files) { ['hideTouches', 'recording without touch overlays'], ['intervalMs', 'repeated press interval'], ['delayMs', 'typing/fill delay'], + ['durationMs', 'scroll and gesture duration'], ['holdMs', 'press hold duration'], ['jitterPx', 'press jitter'], ['pixels', 'scroll distance'], @@ -338,7 +350,8 @@ function summarizeProviderPressure(files) { const surfaces = [ { name: 'Android ADB provider', - pattern: /\bAndroidAdbProvider\b|\bandroidAdbProvider\b|\badbProvider\b|\badb\.(?:exec|installer|puller|portReverse)\b/g, + pattern: + /\bAndroidAdbProvider\b|\bandroidAdbProvider\b|\badbProvider\b|\badb\.(?:exec|installer|puller|portReverse)\b/g, }, { name: 'Apple runner provider', @@ -346,8 +359,7 @@ function summarizeProviderPressure(files) { }, { name: 'Apple simctl/devicectl provider', - pattern: - /\bsimctl\b|\bdevicectl\b|\brunXcrun\b|\bsimctl\s*:|\bdevicectl\s*:/g, + pattern: /\bsimctl\b|\bdevicectl\b|\brunXcrun\b|\bsimctl\s*:|\bdevicectl\s*:/g, }, { name: 'Apple macOS helper provider', @@ -451,7 +463,9 @@ function readClientCommandMethods(commandContractFiles) { for (const file of commandContractFiles) { const text = fs.readFileSync(file, 'utf8'); for (const block of readCommandContractBlocks(text)) { - for (const method of block.source.matchAll(/\bclient\.([A-Za-z0-9_]+)\.([A-Za-z0-9_]+)\s*\(/g)) { + for (const method of block.source.matchAll( + /\bclient\.([A-Za-z0-9_]+)\.([A-Za-z0-9_]+)\s*\(/g, + )) { commands.set(`${method[1]}.${method[2]}`, block.name); } } @@ -513,7 +527,9 @@ function extractProviderScenarioCommandReferences(text, clientCommandMethods) { function extractLiteralCommandReferences(text) { const commands = []; - for (const match of text.matchAll(/\bcommand:\s*['"]([^'"]+)['"]|\.callCommand\(\s*['"]([^'"]+)['"]/g)) { + for (const match of text.matchAll( + /\bcommand:\s*['"]([^'"]+)['"]|\.callCommand\(\s*['"]([^'"]+)['"]/g, + )) { commands.push(match[1] ?? match[2]); } return commands; diff --git a/src/backend.ts b/src/backend.ts index 0ad0502f3..1902dcb9b 100644 --- a/src/backend.ts +++ b/src/backend.ts @@ -168,6 +168,7 @@ export type BackendScrollOptions = { direction: ScrollDirection; amount?: number; pixels?: number; + durationMs?: number; }; export type BackendPinchOptions = { diff --git a/src/client-normalizers.ts b/src/client-normalizers.ts index 402a3c0d0..0424edcb0 100644 --- a/src/client-normalizers.ts +++ b/src/client-normalizers.ts @@ -309,6 +309,7 @@ export function buildFlags(options: InternalRequestOptions): CommandFlags { hideTouches: options.hideTouches, intervalMs: options.intervalMs, delayMs: options.delayMs, + durationMs: options.durationMs, holdMs: options.holdMs, jitterPx: options.jitterPx, pixels: options.pixels, diff --git a/src/client-types.ts b/src/client-types.ts index 210d9c449..9f86b2a0a 100644 --- a/src/client-types.ts +++ b/src/client-types.ts @@ -653,6 +653,7 @@ export type ScrollOptions = DeviceCommandBaseOptions & { direction: ScrollInputDirection; amount?: number; pixels?: number; + durationMs?: number; }; export type PinchOptions = DeviceCommandBaseOptions & { @@ -859,6 +860,7 @@ type CommandExecutionOptions = Partial & { hideTouches?: boolean; intervalMs?: number; delayMs?: number; + durationMs?: number; holdMs?: number; jitterPx?: number; pixels?: number; diff --git a/src/commands/batch/cli.test.ts b/src/commands/batch/cli.test.ts index 0eb957d1f..5a1761285 100644 --- a/src/commands/batch/cli.test.ts +++ b/src/commands/batch/cli.test.ts @@ -93,6 +93,21 @@ test('batch rejects invalid structured step input before daemon projection', asy assert.doesNotMatch(result.stderr, /undefined/); }); +test('batch rejects structured scroll duration above the shared cap before daemon projection', async () => { + const result = await runCliCapture([ + 'batch', + '--steps', + '[{"command":"scroll","input":{"direction":"down","pixels":200,"durationMs":10001}}]', + ]); + + assert.equal(result.code, 1); + assert.equal(result.calls.length, 0); + assert.match( + result.stderr, + /Batch step 1 scroll input is invalid: Expected durationMs to be at most 10000\./, + ); +}); + test('batch rejects structured replay steps before daemon dispatch', async () => { const result = await runCliCapture([ 'batch', diff --git a/src/commands/interaction/index.ts b/src/commands/interaction/index.ts index f3d0682c4..c02876a91 100644 --- a/src/commands/interaction/index.ts +++ b/src/commands/interaction/index.ts @@ -117,11 +117,11 @@ const interactionCliSchemas = { allowedFlags: [...SELECTOR_SNAPSHOT_FLAGS, 'delayMs'], }, scroll: { - usageOverride: 'scroll [amount] [--pixels ]', + usageOverride: 'scroll [amount] [--pixels ] [--duration-ms ]', helpDescription: 'Scroll in a direction, or toward the top/bottom edge of scrollable content.', summary: 'Scroll in a direction or to an edge', positionalArgs: ['directionOrEdge', 'amount?'], - allowedFlags: ['pixels'], + allowedFlags: ['pixels', 'durationMs'], }, } as const satisfies Record; diff --git a/src/commands/interaction/interactions.ts b/src/commands/interaction/interactions.ts index 23ce1e674..f6a3e9eaf 100644 --- a/src/commands/interaction/interactions.ts +++ b/src/commands/interaction/interactions.ts @@ -87,6 +87,7 @@ export const interactionCliReaders = { direction: readScrollDirection(positionals[0]), amount: optionalCliNumber(positionals[1]), pixels: flags.pixels, + durationMs: flags.durationMs, }), get: (positionals, flags) => ({ ...commonInputFromFlags(flags), diff --git a/src/commands/interaction/metadata.ts b/src/commands/interaction/metadata.ts index 8fac7575a..b37296440 100644 --- a/src/commands/interaction/metadata.ts +++ b/src/commands/interaction/metadata.ts @@ -28,6 +28,7 @@ import { import { defineFieldCommandMetadata } from '../field-command-contract.ts'; import { CLICK_BUTTONS } from '../../core/click-button.ts'; import { + SCROLL_DURATION_MAX_MS, SCROLL_DIRECTIONS, SWIPE_PATTERNS, SWIPE_PRESETS, @@ -114,6 +115,10 @@ const scrollFields = { direction: requiredField(enumField(SCROLL_INPUT_DIRECTIONS)), amount: numberField('Platform scroll amount.'), pixels: integerField('Pixel scroll amount.', { min: 0 }), + durationMs: integerField('Desktop scroll duration in milliseconds.', { + min: 0, + max: SCROLL_DURATION_MAX_MS, + }), }; const getFields = { diff --git a/src/commands/interaction/runtime/gestures.ts b/src/commands/interaction/runtime/gestures.ts index ec7da2da9..9dd38d465 100644 --- a/src/commands/interaction/runtime/gestures.ts +++ b/src/commands/interaction/runtime/gestures.ts @@ -4,6 +4,7 @@ import { centerOfRect } from '../../../utils/snapshot.ts'; import { buildSwipePresetGesturePlan, parseSwipePreset, + SCROLL_DURATION_MAX_MS, type GestureReferenceFrame, type ScrollDirection, type SwipePreset, @@ -64,6 +65,7 @@ export type ScrollCommandOptions = CommandContext & { direction: ScrollInputDirection; amount?: number; pixels?: number; + durationMs?: number; }; export type ScrollCommandResult = @@ -74,6 +76,7 @@ export type ScrollCommandResult = passes?: number; amount?: number; pixels?: number; + durationMs?: number; }> | BackendResultVariant< ResolvedInteractionTarget & { @@ -82,6 +85,7 @@ export type ScrollCommandResult = passes?: number; amount?: number; pixels?: number; + durationMs?: number; } >; @@ -182,6 +186,11 @@ export const scrollCommand: RuntimeCommand>> | undefined; let completedPasses = 0; @@ -216,12 +226,14 @@ export const scrollCommand: RuntimeCommand | undefined, +): number | undefined { + return typeof backendResult?.durationMs === 'number' ? backendResult.durationMs : undefined; +} + export const swipeCommand: RuntimeCommand = async ( runtime, options, @@ -516,6 +534,21 @@ function normalizeOptionalPositiveInteger( return value; } +function normalizeOptionalNonNegativeInteger( + value: number | undefined, + field: string, + max?: number, +): number | undefined { + if (value === undefined) return undefined; + if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) { + throw new AppError('INVALID_ARGS', `${field} must be a non-negative integer`); + } + if (max !== undefined && value > max) { + throw new AppError('INVALID_ARGS', `${field} must be at most ${max}`); + } + return value; +} + function resolveSnapshotViewport(nodes: SnapshotState['nodes']): Rect { const visibleRects = nodes .filter((node) => isNodeVisibleInEffectiveViewport(node, nodes)) diff --git a/src/commands/interaction/runtime/interactions.test.ts b/src/commands/interaction/runtime/interactions.test.ts index 5d716aaa0..e53055202 100644 --- a/src/commands/interaction/runtime/interactions.test.ts +++ b/src/commands/interaction/runtime/interactions.test.ts @@ -516,6 +516,7 @@ test('runtime scroll resolves selector targets before calling the backend primit target: selector('label=Continue'), direction: 'down', pixels: 120, + durationMs: 50, }); const viewportResult = await device.interactions.scroll({ direction: 'up', @@ -523,11 +524,12 @@ test('runtime scroll resolves selector targets before calling the backend primit }); assert.equal(selectorResult.kind, 'selector'); + assert.equal(selectorResult.durationMs, undefined); assert.equal(viewportResult.kind, 'viewport'); assert.deepEqual(calls, [ { target: { kind: 'point', point: { x: 60, y: 40 } }, - options: { direction: 'down', pixels: 120 }, + options: { direction: 'down', pixels: 120, durationMs: 50 }, }, { target: { kind: 'viewport' }, @@ -536,6 +538,42 @@ test('runtime scroll resolves selector targets before calling the backend primit ]); }); +test('runtime scroll reports duration only when the backend honored it', async () => { + const device = createInteractionDevice(selectorSnapshot(), { + scroll: async (_context, _target, options) => ({ durationMs: options?.durationMs }), + }); + + const result = await device.interactions.scroll({ + direction: 'down', + pixels: 120, + durationMs: 50, + }); + + assert.equal(result.durationMs, 50); + assert.deepEqual(result.backendResult, { durationMs: 50 }); +}); + +test('runtime scroll rejects duration above the shared cap', async () => { + const device = createInteractionDevice(selectorSnapshot(), { + scroll: async () => { + throw new Error('scroll should be rejected before backend call'); + }, + }); + + await assert.rejects( + () => + device.interactions.scroll({ + direction: 'down', + pixels: 120, + durationMs: 10_001, + }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /durationMs.*at most 10000/i.test(error.message), + ); +}); + test('runtime scroll bottom rejects blind scrolling without snapshot support', async () => { const calls: unknown[] = []; const device = createInteractionDevice(selectorSnapshot(), { diff --git a/src/core/__tests__/dispatch-scroll.test.ts b/src/core/__tests__/dispatch-scroll.test.ts index 497992911..82bee78d8 100644 --- a/src/core/__tests__/dispatch-scroll.test.ts +++ b/src/core/__tests__/dispatch-scroll.test.ts @@ -16,6 +16,60 @@ test('dispatch scroll rejects mixing amount and --pixels', async () => { ); }); +test('dispatch scroll forwards pixels and duration without reporting ignored duration', async () => { + const calls: Array<{ direction: string; options: unknown }> = []; + const interactor = { + scroll: async (direction: any, options: unknown) => { + calls.push({ direction, options }); + return { ok: true }; + }, + } as unknown as Interactor; + + const result = await handleScrollCommand(interactor, ['down'], { + pixels: 200, + durationMs: 50, + }); + + assert.deepEqual(calls, [ + { + direction: 'down', + options: { amount: undefined, pixels: 200, durationMs: 50 }, + }, + ]); + assert.equal(result.pixels, 200); + assert.equal(result.durationMs, undefined); +}); + +test('dispatch scroll reports duration when the interactor honored it', async () => { + const interactor = { + scroll: async () => ({ pixels: 200, durationMs: 50 }), + } as unknown as Interactor; + + const result = await handleScrollCommand(interactor, ['down'], { + pixels: 200, + durationMs: 50, + }); + + assert.equal(result.pixels, 200); + assert.equal(result.durationMs, 50); +}); + +test('dispatch scroll rejects duration above the shared cap', async () => { + const interactor = { + scroll: async () => { + throw new Error('scroll should be rejected before backend call'); + }, + } as unknown as Interactor; + + await assert.rejects( + () => handleScrollCommand(interactor, ['down'], { pixels: 200, durationMs: 10_001 }), + (error: unknown) => + error instanceof AppError && + error.code === 'INVALID_ARGS' && + /durationMs.*at most 10000/i.test(error.message), + ); +}); + test('dispatch scroll bottom rejects blind scrolling without snapshot support', async () => { const calls: Array<{ direction: string; options: unknown }> = []; const interactor = { @@ -93,7 +147,7 @@ test('dispatch scroll bottom scrolls only while scoped snapshot confirms hidden assert.equal(calls.length, 1); assert.deepEqual(calls[0], { direction: 'down', - options: { amount: undefined, pixels: undefined }, + options: { amount: undefined, pixels: undefined, durationMs: undefined }, }); assert.equal(result.passes, 1); assert.equal(result.lastPass, 1); diff --git a/src/core/__tests__/scroll-gesture.test.ts b/src/core/__tests__/scroll-gesture.test.ts index 3b5fa639a..e1606f4cc 100644 --- a/src/core/__tests__/scroll-gesture.test.ts +++ b/src/core/__tests__/scroll-gesture.test.ts @@ -75,6 +75,27 @@ test('buildScrollGesturePlan clamps amounts above 1 to the safe gesture band', ( }); }); +test('buildScrollGesturePlan clamps explicit pixel travel to the vertical safe gesture band', () => { + const plan = buildScrollGesturePlan({ + direction: 'down', + pixels: 1000, + referenceWidth: 400, + referenceHeight: 800, + }); + + assert.deepEqual(plan, { + direction: 'down', + x1: 200, + y1: 760, + x2: 200, + y2: 40, + referenceWidth: 400, + referenceHeight: 800, + amount: undefined, + pixels: 720, + }); +}); + test('buildScrollGesturePlan floors padding and travel on tiny frames', () => { // 2x2 engages every max(1, ...) floor and the .5 rounding cases the two ports must agree on // (halfTravel 0.5 -> 1, center 1 from 2/2). diff --git a/src/core/dispatch-context.ts b/src/core/dispatch-context.ts index 021ba1f46..1077543e9 100644 --- a/src/core/dispatch-context.ts +++ b/src/core/dispatch-context.ts @@ -51,6 +51,7 @@ export type DispatchContext = ScreenshotDispatchFlags & { count?: number; intervalMs?: number; delayMs?: number; + durationMs?: number; holdMs?: number; jitterPx?: number; pixels?: number; diff --git a/src/core/dispatch-interactions.ts b/src/core/dispatch-interactions.ts index 468a513e6..578c4185b 100644 --- a/src/core/dispatch-interactions.ts +++ b/src/core/dispatch-interactions.ts @@ -7,6 +7,7 @@ import { inferGestureReferenceFrame, parseScrollDirection, parseSwipePreset, + SCROLL_DURATION_MAX_MS, SCROLL_DIRECTIONS, SWIPE_PATTERNS, type ScrollDirection, @@ -44,6 +45,17 @@ import type { RunnerSequenceStep } from '../platforms/ios/runner-contract.ts'; import type { DispatchContext } from './dispatch-context.ts'; import type { Interactor, RunnerCallOptions } from './interactor-types.ts'; +type ScrollTarget = { + direction: ScrollDirection; + edge?: ScrollEdge; +}; + +type ScrollCommandOptions = { + amount?: number; + pixels?: number; + durationMs?: number; +}; + export async function handleLongPressCommand( interactor: Interactor, positionals: string[], @@ -735,50 +747,106 @@ export async function handleScrollCommand( const directionInput = positionals[0]; const amount = positionals[1] ? Number(positionals[1]) : undefined; const pixels = context?.pixels; + const durationMs = context?.durationMs; if (!directionInput) throw new AppError('INVALID_ARGS', 'scroll requires direction'); + assertScrollCommandInputs(amount, pixels, durationMs); + + const target = parseScrollTarget(directionInput); + const options = { amount, pixels, durationMs }; + const { interactionResult, completedPasses } = await runDispatchedScroll( + interactor, + context, + target, + options, + ); + + const result = buildDispatchedScrollResult(target, options, completedPasses, interactionResult); + return withSuccessText( + result, + formatScrollEdgeMessage(target.direction, target.edge, completedPasses, amount, pixels), + ); +} + +function assertScrollCommandInputs( + amount: number | undefined, + pixels: number | undefined, + durationMs: number | undefined, +): void { + assertScrollAmountInput(amount); + assertScrollDurationInput(durationMs); + assertExclusiveScrollDistanceInputs(amount, pixels); +} + +function assertScrollAmountInput(amount: number | undefined): void { if (amount !== undefined && !Number.isFinite(amount)) { throw new AppError('INVALID_ARGS', 'scroll amount must be a number'); } +} + +function assertScrollDurationInput(durationMs: number | undefined): void { + if (durationMs === undefined) return; + if (!Number.isFinite(durationMs) || !Number.isInteger(durationMs) || durationMs < 0) { + throw new AppError('INVALID_ARGS', 'scroll durationMs must be a non-negative integer'); + } + if (durationMs > SCROLL_DURATION_MAX_MS) { + throw new AppError( + 'INVALID_ARGS', + `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, + ); + } +} + +function assertExclusiveScrollDistanceInputs( + amount: number | undefined, + pixels: number | undefined, +): void { if (amount !== undefined && pixels !== undefined) { throw new AppError( 'INVALID_ARGS', 'scroll accepts either a relative amount or --pixels, not both', ); } - const target = parseScrollTarget(directionInput); - let interactionResult: Record | void = {}; - let completedPasses = 0; +} +async function runDispatchedScroll( + interactor: Interactor, + context: DispatchContext | undefined, + target: ScrollTarget, + options: ScrollCommandOptions, +): Promise<{ interactionResult: Record; completedPasses: number }> { if (target.edge) { const edge = target.edge; const edgeResult = await runScrollEdgePasses({ edge, captureState: async (scope) => await captureVerifiedScrollEdgeState(interactor, context, edge, scope), - scroll: async () => await interactor.scroll(target.direction, { amount, pixels }), + scroll: async () => await interactor.scroll(target.direction, options), }); - interactionResult = edgeResult.result ?? {}; - completedPasses = edgeResult.passes; - } else { - interactionResult = await interactor.scroll(target.direction, { amount, pixels }); - completedPasses = 1; + return { + interactionResult: edgeResult.result ?? {}, + completedPasses: edgeResult.passes, + }; } - return withSuccessText( - { - direction: target.direction, - ...(target.edge - ? { - edge: target.edge, - passes: completedPasses, - } - : {}), - ...(amount !== undefined ? { amount } : {}), - ...(pixels !== undefined ? { pixels } : {}), - ...interactionResult, - }, - formatScrollEdgeMessage(target.direction, target.edge, completedPasses, amount, pixels), - ); + return { + interactionResult: (await interactor.scroll(target.direction, options)) ?? {}, + completedPasses: 1, + }; +} + +function buildDispatchedScrollResult( + target: ScrollTarget, + options: ScrollCommandOptions, + completedPasses: number, + interactionResult: Record, +): Record { + return { + direction: target.direction, + ...(target.edge ? { edge: target.edge, passes: completedPasses } : {}), + ...(options.amount !== undefined ? { amount: options.amount } : {}), + ...(options.pixels !== undefined ? { pixels: options.pixels } : {}), + ...interactionResult, + }; } async function captureVerifiedScrollEdgeState( diff --git a/src/core/interactor-types.ts b/src/core/interactor-types.ts index a7d6fb7df..8cf38c977 100644 --- a/src/core/interactor-types.ts +++ b/src/core/interactor-types.ts @@ -114,7 +114,7 @@ export type Interactor = { ): Promise | void>; scroll( direction: ScrollDirection, - options?: { amount?: number; pixels?: number }, + options?: { amount?: number; pixels?: number; durationMs?: number }, ): Promise | void>; pinch(scale: number, x?: number, y?: number): Promise | void>; screenshot(outPath: string, options?: ScreenshotOptions): Promise; diff --git a/src/core/scroll-gesture.ts b/src/core/scroll-gesture.ts index 80f0d67de..62802c05b 100644 --- a/src/core/scroll-gesture.ts +++ b/src/core/scroll-gesture.ts @@ -4,6 +4,7 @@ import type { Rect, SnapshotNode } from '../utils/snapshot.ts'; export const SCROLL_DIRECTIONS = ['up', 'down', 'left', 'right'] as const; export type ScrollDirection = (typeof SCROLL_DIRECTIONS)[number]; +export const SCROLL_DURATION_MAX_MS = 10_000; export const SWIPE_PRESETS = ['left', 'right', 'left-edge', 'right-edge'] as const; export type SwipePreset = (typeof SWIPE_PRESETS)[number]; export const SWIPE_PATTERNS = ['one-way', 'ping-pong'] as const; diff --git a/src/daemon/context.ts b/src/daemon/context.ts index a4c577106..ffd99cfd3 100644 --- a/src/daemon/context.ts +++ b/src/daemon/context.ts @@ -40,6 +40,7 @@ export function contextFromFlags( count: flags?.count, intervalMs: flags?.intervalMs, delayMs: flags?.delayMs, + durationMs: flags?.durationMs, holdMs: flags?.holdMs, jitterPx: flags?.jitterPx, pixels: flags?.pixels, diff --git a/src/platforms/android/input-actions.ts b/src/platforms/android/input-actions.ts index 6036cde20..94e17ddc5 100644 --- a/src/platforms/android/input-actions.ts +++ b/src/platforms/android/input-actions.ts @@ -204,7 +204,7 @@ function throwAndroidFillFailure( export async function scrollAndroid( device: DeviceInfo, direction: ScrollDirection, - options?: { amount?: number; pixels?: number }, + options?: { amount?: number; pixels?: number; durationMs?: number }, ): Promise> { const size = await getAndroidScreenSize(device); const plan = buildScrollGesturePlan({ diff --git a/src/platforms/ios/__tests__/index.test.ts b/src/platforms/ios/__tests__/index.test.ts index a6318c213..522e87651 100644 --- a/src/platforms/ios/__tests__/index.test.ts +++ b/src/platforms/ios/__tests__/index.test.ts @@ -341,37 +341,104 @@ for (const [name, device] of [ }); } -for (const [name, device] of [ - ['iOS', IOS_TEST_SIMULATOR], - ['macOS', MACOS_TEST_DEVICE], -] as const) { - test(`iosRunnerOverrides maps ${name} scroll to a single fused scroll command`, async () => { - // The fused scroll resolves the frame and performs the drag in one runner lifecycle command; - // no separate interactionFrame request and no durationMs (the runner pins the non-synthesized - // drag path that ignores it). - mockRunIosRunnerCommand.mockResolvedValueOnce({ - x: 200, - y: 640, - x2: 200, - y2: 160, - referenceWidth: 400, - referenceHeight: 800, - }); +test('iosRunnerOverrides maps iOS scroll to a single fused scroll command', async () => { + // The fused scroll resolves the frame and performs the drag in one runner lifecycle command; + // no separate interactionFrame request and no durationMs (the runner pins the non-synthesized + // drag path that ignores it). + mockRunIosRunnerCommand.mockResolvedValueOnce({ + x: 200, + y: 640, + x2: 200, + y2: 160, + referenceWidth: 400, + referenceHeight: 800, + }); - const { overrides } = iosRunnerOverrides(device, { - appBundleId: 'com.example.App', - }); + const { overrides } = iosRunnerOverrides(IOS_TEST_SIMULATOR, { + appBundleId: 'com.example.App', + }); - await overrides.scroll('down'); + const result = await overrides.scroll('down', { durationMs: 50 }); - assert.equal(mockRunIosRunnerCommand.mock.calls.length, 1); - assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { - command: 'scroll', - direction: 'down', - appBundleId: 'com.example.App', - }); + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 1); + assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { + command: 'scroll', + direction: 'down', + appBundleId: 'com.example.App', }); -} + assert.deepEqual(result, { + x1: 200, + y1: 640, + x2: 200, + y2: 160, + referenceWidth: 400, + referenceHeight: 800, + pixels: 480, + }); +}); + +test('iosRunnerOverrides does not report duration for tvOS remote scroll', async () => { + mockRunIosRunnerCommand.mockResolvedValueOnce({ + ok: true, + }); + + const { overrides } = iosRunnerOverrides(TVOS_TEST_SIMULATOR, { + appBundleId: 'com.example.App', + }); + + const result = await overrides.scroll('down', { durationMs: 50 }); + + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 1); + assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { + command: 'remotePress', + remoteButton: 'down', + appBundleId: 'com.example.App', + }); + assert.deepEqual(result, {}); +}); + +test('iosRunnerOverrides maps macOS desktop scroll to a desktop wheel command', async () => { + mockRunIosRunnerCommand.mockResolvedValueOnce({ + x: 737.5, + y: 476.5, + referenceWidth: 400, + referenceHeight: 800, + }); + + const { overrides } = iosRunnerOverrides(MACOS_TEST_DEVICE, { + appBundleId: 'com.example.App', + }); + + const result = await overrides.scroll('down', { pixels: 200, durationMs: 50 }); + + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 1); + assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { + command: 'desktopScroll', + direction: 'down', + pixels: 200, + durationMs: 50, + appBundleId: 'com.example.App', + }); + assert.deepEqual(result, { + x1: 737.5, + y1: 476.5, + referenceWidth: 400, + referenceHeight: 800, + pixels: 200, + durationMs: 50, + }); +}); + +test('iosRunnerOverrides rejects macOS desktop scroll duration above the shared cap', async () => { + const { overrides } = iosRunnerOverrides(MACOS_TEST_DEVICE, { + appBundleId: 'com.example.App', + }); + + await assert.rejects(() => overrides.scroll('down', { pixels: 200, durationMs: 10_001 }), { + code: 'INVALID_ARGS', + }); + assert.equal(mockRunIosRunnerCommand.mock.calls.length, 0); +}); test('AGENT_DEVICE_MACOS_HELPER_BIN rejects relative override paths', async () => { const previousHelperPath = process.env.AGENT_DEVICE_MACOS_HELPER_BIN; diff --git a/src/platforms/ios/__tests__/runner-client.test.ts b/src/platforms/ios/__tests__/runner-client.test.ts index 971f7609a..0bc0ee8e4 100644 --- a/src/platforms/ios/__tests__/runner-client.test.ts +++ b/src/platforms/ios/__tests__/runner-client.test.ts @@ -113,6 +113,13 @@ const runnerProtocolCommandFixtures: Record { assert.match(command.commandId ?? '', /^runner-/); }); +test('desktopScroll is a mutating, command-id-tracked runner command', () => { + assert.equal(isReadOnlyRunnerCommand('desktopScroll'), false); + + const command = withRunnerCommandId({ + command: 'desktopScroll', + direction: 'down', + pixels: 120, + }); + assert.match(command.commandId ?? '', /^runner-/); +}); + test('withRunnerCommandId does not add command ids to status probes', () => { const command = withRunnerCommandId({ command: 'status', diff --git a/src/platforms/ios/__tests__/runner-command-traits.test.ts b/src/platforms/ios/__tests__/runner-command-traits.test.ts index 8248c6021..e4eeb3efb 100644 --- a/src/platforms/ios/__tests__/runner-command-traits.test.ts +++ b/src/platforms/ios/__tests__/runner-command-traits.test.ts @@ -18,6 +18,7 @@ const EXPECTED_RUNNER_COMMAND_TRAITS = { type: defaults(), swipe: hotMutation(), scroll: hotMutation(), + desktopScroll: hotMutation(), findText: readOnly(), querySelector: readOnly(), readText: readOnly(), diff --git a/src/platforms/ios/__tests__/runner-session.test.ts b/src/platforms/ios/__tests__/runner-session.test.ts index bdb590c34..eb8a34d68 100644 --- a/src/platforms/ios/__tests__/runner-session.test.ts +++ b/src/platforms/ios/__tests__/runner-session.test.ts @@ -958,6 +958,7 @@ const ALLOWLISTED_MUTATIONS: { name: string; command: Record }[ { name: 'drag', command: { command: 'drag', x: 1, y: 2, x2: 3, y2: 4 } }, { name: 'swipe', command: { command: 'swipe', x: 1, y: 2, x2: 3, y2: 4 } }, { name: 'scroll', command: { command: 'scroll', direction: 'down' } }, + { name: 'desktopScroll', command: { command: 'desktopScroll', direction: 'down' } }, { name: 'sequence', command: { command: 'sequence', steps: [{ kind: 'tap', x: 120, y: 240 }] }, diff --git a/src/platforms/ios/interactions.ts b/src/platforms/ios/interactions.ts index 4939ee45e..60346eee5 100644 --- a/src/platforms/ios/interactions.ts +++ b/src/platforms/ios/interactions.ts @@ -2,8 +2,10 @@ import type { DeviceInfo } from '../../utils/device.ts'; import { assertScrollGestureInput, buildScrollGesturePlan, + SCROLL_DURATION_MAX_MS, type ScrollDirection, } from '../../core/scroll-gesture.ts'; +import { AppError } from '../../utils/errors.ts'; import { runIosRunnerCommand } from './runner-client.ts'; import { buildRunnerSequenceCommand, parseRunnerSequenceResult } from './runner-sequence.ts'; import type { RunnerCommand } from './runner-contract.ts'; @@ -26,9 +28,12 @@ const IOS_SWIPE_MAX_DURATION_MS = 10_000; type NormalizedScrollOptions = { amount?: number; pixels?: number; + durationMs?: number; preferProvidedPixels?: boolean; }; +type AppleScrollOptions = Omit; + type IosDragCommandOptions = { defaultDurationMs: number; legacyDefaultDurationMs?: number; @@ -322,7 +327,7 @@ async function runAppleScroll( ctx: RunnerContext, runnerOpts: RunnerOpts, direction: ScrollDirection, - options?: { amount?: number; pixels?: number }, + options?: AppleScrollOptions, ): Promise> { if (device.target === 'tv') { const runnerResult = await runRunnerCommand( @@ -330,13 +335,30 @@ async function runAppleScroll( appleRemotePressCommand(direction, ctx.appBundleId), runnerOpts, ); - return normalizeIosScrollResult(runnerResult, options); + return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); } // Validate amount/pixels up front so bad inputs throw INVALID_ARGS before any runner command // is sent (previously validation ran between the frame request and the drag, so a bad amount // could cost one runner request first). assertScrollGestureInput(options ?? {}); + assertScrollDurationInput(options?.durationMs); + + if (device.platform === 'macos') { + const runnerResult = await runRunnerCommand( + device, + { + command: 'desktopScroll', + direction, + ...scrollRunnerFields(options, { includeDuration: true }), + appBundleId: ctx.appBundleId, + }, + runnerOpts, + ); + return normalizeScrollResultWithResolvedFrame(runnerResult, direction, options, { + includeDuration: true, + }); + } // Single fused lifecycle command: the runner resolves the interaction frame and runs the drag. // durationMs is intentionally not sent — scroll's drag used 250ms today, but the runner's @@ -347,8 +369,7 @@ async function runAppleScroll( { command: 'scroll', direction, - ...(options?.amount !== undefined ? { amount: options.amount } : {}), - ...(options?.pixels !== undefined ? { pixels: options.pixels } : {}), + ...scrollRunnerFields(options), appBundleId: ctx.appBundleId, }, runnerOpts, @@ -356,26 +377,70 @@ async function runAppleScroll( const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); - if (referenceWidth !== undefined && referenceHeight !== undefined) { - // Recompute the plan from the runner's resolved frame so reported pixels match the planned - // travel (TS keeps buildScrollGesturePlan for Android and recording anyway). - const plan = buildScrollGesturePlan({ - direction, - amount: options?.amount, - pixels: options?.pixels, - referenceWidth, - referenceHeight, - }); - return normalizeIosScrollResult(runnerResult, { - amount: options?.amount, - pixels: plan.pixels, - preferProvidedPixels: true, - }); - } + if (referenceWidth !== undefined && referenceHeight !== undefined) + return normalizeScrollResultWithResolvedFrame(runnerResult, direction, options); + // Missing frame dims: derive pixels from endpoint travel instead of throwing. return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); } +function assertScrollDurationInput(durationMs: number | undefined): void { + if (durationMs === undefined) return; + if ( + !Number.isFinite(durationMs) || + !Number.isInteger(durationMs) || + durationMs < 0 || + durationMs > SCROLL_DURATION_MAX_MS + ) { + throw new AppError( + 'INVALID_ARGS', + `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, + ); + } +} + +function normalizeScrollResultWithResolvedFrame( + runnerResult: Record, + direction: ScrollDirection, + options?: AppleScrollOptions, + config?: { includeDuration?: boolean }, +): Record { + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); + const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); + if (referenceWidth === undefined || referenceHeight === undefined) { + return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); + } + + // Recompute the plan from the runner's resolved frame so reported pixels match the planned + // travel (TS keeps buildScrollGesturePlan for Android and recording anyway). + const plan = buildScrollGesturePlan({ + direction, + amount: options?.amount, + pixels: options?.pixels, + referenceWidth, + referenceHeight, + }); + return normalizeIosScrollResult(runnerResult, { + amount: options?.amount, + pixels: plan.pixels, + durationMs: config?.includeDuration ? options?.durationMs : undefined, + preferProvidedPixels: true, + }); +} + +function scrollRunnerFields( + options: AppleScrollOptions | undefined, + config?: { includeDuration?: boolean }, +): Record { + return { + ...(options?.amount !== undefined ? { amount: options.amount } : {}), + ...(options?.pixels !== undefined ? { pixels: options.pixels } : {}), + ...(config?.includeDuration && options?.durationMs !== undefined + ? { durationMs: options.durationMs } + : {}), + }; +} + function readFiniteNumber(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } @@ -391,25 +456,38 @@ function normalizeIosScrollResult( x1 !== undefined && x2 !== undefined ? Math.round(Math.abs(x2 - x1)) : undefined; const verticalTravel = y1 !== undefined && y2 !== undefined ? Math.round(Math.abs(y2 - y1)) : undefined; - const travelPixels = - options?.preferProvidedPixels && options.pixels !== undefined - ? options.pixels - : horizontalTravel && horizontalTravel > 0 - ? horizontalTravel - : verticalTravel && verticalTravel > 0 - ? verticalTravel - : undefined; + const travelPixels = selectScrollTravelPixels(options, horizontalTravel, verticalTravel); - return { - ...(x1 !== undefined ? { x1 } : {}), - ...(y1 !== undefined ? { y1 } : {}), - ...(x2 !== undefined ? { x2 } : {}), - ...(y2 !== undefined ? { y2 } : {}), - ...(referenceWidth !== undefined ? { referenceWidth } : {}), - ...(referenceHeight !== undefined ? { referenceHeight } : {}), - ...(options?.amount !== undefined ? { amount: options.amount } : {}), - ...(travelPixels !== undefined ? { pixels: travelPixels } : {}), - }; + const result: Record = {}; + setDefinedNumber(result, 'x1', x1); + setDefinedNumber(result, 'y1', y1); + setDefinedNumber(result, 'x2', x2); + setDefinedNumber(result, 'y2', y2); + setDefinedNumber(result, 'referenceWidth', referenceWidth); + setDefinedNumber(result, 'referenceHeight', referenceHeight); + setDefinedNumber(result, 'amount', options?.amount); + setDefinedNumber(result, 'pixels', travelPixels); + setDefinedNumber(result, 'durationMs', options?.durationMs); + return result; +} + +function setDefinedNumber( + result: Record, + key: string, + value: number | undefined, +): void { + if (value !== undefined) result[key] = value; +} + +function selectScrollTravelPixels( + options: NormalizedScrollOptions | undefined, + horizontalTravel: number | undefined, + verticalTravel: number | undefined, +): number | undefined { + if (options?.preferProvidedPixels && options.pixels !== undefined) return options.pixels; + if (horizontalTravel !== undefined && horizontalTravel > 0) return horizontalTravel; + if (verticalTravel !== undefined && verticalTravel > 0) return verticalTravel; + return undefined; } function remapRunnerCoordinates(runnerResult: Record): { diff --git a/src/platforms/ios/runner-command-traits.ts b/src/platforms/ios/runner-command-traits.ts index af955ef8e..09c6210c4 100644 --- a/src/platforms/ios/runner-command-traits.ts +++ b/src/platforms/ios/runner-command-traits.ts @@ -42,6 +42,7 @@ const RUNNER_COMMAND_TRAITS = { type: DEFAULT_TRAITS, swipe: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, scroll: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, + desktopScroll: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, findText: READ_ONLY_TRAITS, querySelector: READ_ONLY_TRAITS, readText: READ_ONLY_TRAITS, diff --git a/src/platforms/ios/runner-contract.ts b/src/platforms/ios/runner-contract.ts index 3cdddfe74..8d33f9733 100644 --- a/src/platforms/ios/runner-contract.ts +++ b/src/platforms/ios/runner-contract.ts @@ -24,6 +24,9 @@ export type RunnerCommand = { // traits so it routes through single-send, command-id tracking, and lost-response status // recovery like other gestures. | 'scroll' + // macOS-only frame-resolve + desktop wheel scroll. Kept distinct from `scroll` so mobile + // touch drag semantics remain stable. + | 'desktopScroll' | 'findText' | 'querySelector' | 'readText' diff --git a/src/platforms/linux/input-actions.ts b/src/platforms/linux/input-actions.ts index f3e73f956..b26e83b33 100644 --- a/src/platforms/linux/input-actions.ts +++ b/src/platforms/linux/input-actions.ts @@ -177,7 +177,7 @@ const DEFAULT_SCROLL_CLICKS = 5; export async function scrollLinux( direction: ScrollDirection, - options?: { amount?: number; pixels?: number }, + options?: { amount?: number; pixels?: number; durationMs?: number }, ): Promise { const provider = resolveLinuxInputProvider(); if (provider) { diff --git a/src/platforms/linux/tool-provider.ts b/src/platforms/linux/tool-provider.ts index d8dadb4e1..eef88b7a2 100644 --- a/src/platforms/linux/tool-provider.ts +++ b/src/platforms/linux/tool-provider.ts @@ -53,7 +53,10 @@ export type LinuxInputProvider = { doubleClick(x: number, y: number): Promise; longPress(x: number, y: number, durationMs: number): Promise; drag(x1: number, y1: number, x2: number, y2: number, durationMs: number): Promise; - scroll(direction: ScrollDirection, options?: { amount?: number; pixels?: number }): Promise; + scroll( + direction: ScrollDirection, + options?: { amount?: number; pixels?: number; durationMs?: number }, + ): Promise; typeText(text: string, options?: { delayMs?: number }): Promise; key(combo: string, scancodes: string[]): Promise; }; diff --git a/src/platforms/web/provider.ts b/src/platforms/web/provider.ts index 07077a0be..3d14674df 100644 --- a/src/platforms/web/provider.ts +++ b/src/platforms/web/provider.ts @@ -39,7 +39,10 @@ export type WebProvider = { fill(x: number, y: number, text: string, options?: { delayMs?: number }): Promise; fillRef?(ref: string, text: string, options?: { delayMs?: number }): Promise; typeText(text: string, options?: { delayMs?: number }): Promise; - scroll(direction: ScrollDirection, options?: { amount?: number; pixels?: number }): Promise; + scroll( + direction: ScrollDirection, + options?: { amount?: number; pixels?: number; durationMs?: number }, + ): Promise; readText?(x: number, y: number): Promise; dumpNetwork?(options?: BackendDumpNetworkOptions): Promise; }; diff --git a/src/utils/__tests__/args.test.ts b/src/utils/__tests__/args.test.ts index 7598b9a39..ce539d750 100644 --- a/src/utils/__tests__/args.test.ts +++ b/src/utils/__tests__/args.test.ts @@ -511,11 +511,14 @@ test('parseArgs accepts keyboard subcommands', () => { assert.deepEqual(enter.positionals, ['enter']); }); -test('parseArgs accepts scroll pixel distance flag', () => { - const parsed = parseArgs(['scroll', 'down', '--pixels', '240'], { strictFlags: true }); +test('parseArgs accepts scroll pixel distance and duration flags', () => { + const parsed = parseArgs(['scroll', 'down', '--pixels', '240', '--duration-ms', '50'], { + strictFlags: true, + }); assert.equal(parsed.command, 'scroll'); assert.deepEqual(parsed.positionals, ['down']); assert.equal(parsed.flags.pixels, 240); + assert.equal(parsed.flags.durationMs, 50); }); test('parseArgs recognizes --debug alias for verbose mode', () => { diff --git a/src/utils/cli-flags.ts b/src/utils/cli-flags.ts index e8c62fee2..8d0438f5e 100644 --- a/src/utils/cli-flags.ts +++ b/src/utils/cli-flags.ts @@ -83,6 +83,7 @@ export type CliFlags = RemoteConfigMetroOptions & hideTouches?: boolean; intervalMs?: number; delayMs?: number; + durationMs?: number; holdMs?: number; jitterPx?: number; pixels?: number; @@ -677,6 +678,15 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ usageLabel: '--delay-ms ', usageDescription: 'Delay between typed characters', }, + { + key: 'durationMs', + names: ['--duration-ms'], + type: 'int', + min: 0, + max: 10_000, + usageLabel: '--duration-ms ', + usageDescription: 'Scroll: spread desktop wheel events over this duration', + }, { key: 'holdMs', names: ['--hold-ms'], diff --git a/src/utils/cli-help.ts b/src/utils/cli-help.ts index 22679fc58..5ea22267f 100644 --- a/src/utils/cli-help.ts +++ b/src/utils/cli-help.ts @@ -194,6 +194,9 @@ Read-only and waits: Navigation and gestures: Use scroll for lists; swipe for coordinate gestures/carousels; gesture pan for deliberate drags; gesture fling for fast directional throws. + For fast macOS desktop list traversal, prefer fixed pixel wheel steps and batch them when no snapshot is needed between passes: + agent-device scroll down --pixels 200 --duration-ms 50 --platform macos + agent-device batch --steps '[{"command":"scroll","input":{"direction":"down","pixels":200,"durationMs":50}},{"command":"scroll","input":{"direction":"down","pixels":200,"durationMs":50}}]' --platform macos For raw coordinate gestures, run snapshot -i first and choose a point near the center of the intended app-owned target. Avoid screen edges, tab bars, navigation bars, and home indicators because those areas can trigger system or app navigation instead of the gesture under test. If app-owned back is ambiguous or has just misrouted, prefer a visible nav/back button ref, tab-bar ref, or deep link over repeated back/system back. App-owned action sheets, menus, and camera/scan screens are normal UI. After opening one, run snapshot -i or wait for the option, press by label/ref, handle visible permission sheets through UI or platform-supported native alerts, then wait for a concrete result before returning to chat/form state. diff --git a/test/integration/provider-scenarios/macos-desktop.test.ts b/test/integration/provider-scenarios/macos-desktop.test.ts index 4537d7fdf..59f14720f 100644 --- a/test/integration/provider-scenarios/macos-desktop.test.ts +++ b/test/integration/provider-scenarios/macos-desktop.test.ts @@ -63,6 +63,19 @@ test('Provider-backed integration macOS desktop flow uses semantic host and help request: { command: 'uptime' }, result: { uptimeMs: 84 }, }, + { + command: 'macos.runner.desktopScroll', + deviceId: PROVIDER_SCENARIO_MACOS.id, + platform: 'macos', + request: { + command: 'desktopScroll', + direction: 'down', + pixels: 200, + durationMs: 50, + appBundleId: 'com.apple.systempreferences', + }, + result: { x: 737.5, y: 476.5, referenceWidth: 400, referenceHeight: 800 }, + }, ]); const appleRunnerProvider = createAppleRunnerProviderFromTranscript( runnerTranscript, @@ -124,6 +137,20 @@ test('Provider-backed integration macOS desktop flow uses semantic host and help surface: 'app', }, }, + { + name: 'scroll app session with desktop wheel event', + command: 'scroll', + positionals: ['down'], + flags: { pixels: 200, durationMs: 50 }, + expectData: { + x1: 737.5, + y1: 476.5, + referenceWidth: 400, + referenceHeight: 800, + pixels: 200, + durationMs: 50, + }, + }, { name: 'read logs path', command: 'logs',