From dea24f857191384cd5b0c72def4832ab4c28f4e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Wed, 24 Jun 2026 21:31:49 +0200 Subject: [PATCH 1/3] refactor: modularize scroll command plumbing --- src/client.ts | 13 +- src/commands/__tests__/command-flags.test.ts | 23 +++ src/commands/batch/projection.ts | 4 +- src/commands/cli-grammar/types.ts | 2 + src/commands/command-flags.ts | 40 ++++ src/commands/command-projection.ts | 22 ++- src/commands/interaction/metadata.ts | 2 +- src/commands/interaction/runtime/gestures.ts | 38 +--- src/core/dispatch-interactions.ts | 44 +---- src/core/scroll-command.ts | 46 +++++ src/core/scroll-gesture.ts | 1 - .../__tests__/runner-command-traits.test.ts | 88 +++++---- src/platforms/ios/desktop-scroll.ts | 38 ++++ src/platforms/ios/interactions.ts | 174 ++---------------- src/platforms/ios/runner-command-manifest.ts | 42 +++++ src/platforms/ios/runner-command-traits.ts | 57 +++--- src/platforms/ios/scroll.ts | 111 +++++++++++ 17 files changed, 449 insertions(+), 296 deletions(-) create mode 100644 src/commands/__tests__/command-flags.test.ts create mode 100644 src/commands/command-flags.ts create mode 100644 src/core/scroll-command.ts create mode 100644 src/platforms/ios/desktop-scroll.ts create mode 100644 src/platforms/ios/runner-command-manifest.ts create mode 100644 src/platforms/ios/scroll.ts diff --git a/src/client.ts b/src/client.ts index 99ec9a678..efca6d455 100644 --- a/src/client.ts +++ b/src/client.ts @@ -7,9 +7,9 @@ import { prepareDaemonCommandRequest, type DaemonCommandName, } from './commands/command-projection.ts'; +import { buildRequestFlags } from './commands/command-flags.ts'; import { throwDaemonError } from './daemon-error.ts'; import { - buildFlags, buildMeta, normalizeDeployResult, normalizeDevice, @@ -45,6 +45,7 @@ import type { } from './client-types.ts'; import { readSerializedSnapshotCaptureAnnotations } from './snapshot-capture-annotations.ts'; import { readSnapshotDiagnosticsSummary } from './snapshot-diagnostics.ts'; +import type { CommandFlags } from './core/dispatch-context.ts'; export function createAgentDeviceClient( config: AgentDeviceClientConfig = {}, @@ -56,13 +57,14 @@ export function createAgentDeviceClient( command: string, positionals: string[] = [], options: InternalRequestOptions = {}, + metadataFlags?: Partial, ): Promise> => { const merged = mergeClientOptions(config, options); const response = await transport({ session: resolveSessionName(merged.session), command, positionals, - flags: buildFlags(merged), + flags: buildRequestFlags(merged, metadataFlags), runtime: merged.runtime, meta: buildMeta(merged), }); @@ -83,7 +85,12 @@ export function createAgentDeviceClient( options: InternalRequestOptions = {}, ): Promise => { const request = prepareDaemonCommandRequest(command, options); - return (await execute(request.command, request.positionals, request.options)) as T; + return (await execute( + request.command, + request.positionals, + request.options, + request.metadataFlags, + )) as T; }; const resolveRequestSession = (options: InternalRequestOptions = {}) => diff --git a/src/commands/__tests__/command-flags.test.ts b/src/commands/__tests__/command-flags.test.ts new file mode 100644 index 000000000..ee32b99c9 --- /dev/null +++ b/src/commands/__tests__/command-flags.test.ts @@ -0,0 +1,23 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; + +import type { InternalRequestOptions } from '../../client-types.ts'; +import { findCommandMetadata } from '../command-metadata.ts'; +import { readMetadataCommandFlags } from '../command-flags.ts'; + +test('readMetadataCommandFlags projects CLI-backed command fields and skips positionals', () => { + const metadata = findCommandMetadata('scroll'); + assert.ok(metadata); + + const flags = readMetadataCommandFlags(metadata, { + direction: 'down', + amount: 0.4, + pixels: 200, + durationMs: 50, + } as InternalRequestOptions); + + assert.deepEqual(flags, { + pixels: 200, + durationMs: 50, + }); +}); diff --git a/src/commands/batch/projection.ts b/src/commands/batch/projection.ts index c4e7cd819..b124b7c15 100644 --- a/src/commands/batch/projection.ts +++ b/src/commands/batch/projection.ts @@ -3,11 +3,11 @@ import { STRUCTURED_BATCH_COMMAND_NAMES, readStructuredBatchCommandName, } from '../../batch-policy.ts'; -import { buildFlags } from '../../client-normalizers.ts'; import type { DaemonBatchStep } from '../../core/batch.ts'; import { AppError } from '../../utils/errors.ts'; import { request } from '../cli-grammar/common.ts'; import type { CommandInput, DaemonCommandRequest, DaemonWriter } from '../cli-grammar/types.ts'; +import { buildRequestFlags } from '../command-flags.ts'; import type { DaemonCommandName } from '../command-projection.ts'; const batchCommandNames = STRUCTURED_BATCH_COMMAND_NAMES satisfies readonly DaemonCommandName[]; @@ -57,7 +57,7 @@ function readBatchDaemonStep( return { command: prepared.command, positionals: prepared.positionals, - flags: buildFlags(prepared.options), + flags: buildRequestFlags(prepared.options, prepared.metadataFlags), runtime: runtime ?? prepared.options.runtime, }; } diff --git a/src/commands/cli-grammar/types.ts b/src/commands/cli-grammar/types.ts index 447167031..a7623f8d7 100644 --- a/src/commands/cli-grammar/types.ts +++ b/src/commands/cli-grammar/types.ts @@ -1,4 +1,5 @@ import type { InteractionTarget, InternalRequestOptions } from '../../client-types.ts'; +import type { CommandFlags } from '../../core/dispatch-context.ts'; import type { CliFlags } from '../../utils/cli-flags.ts'; import type { ClickButton } from '../../core/click-button.ts'; import type { DecodedFillTarget } from '../../core/interaction-positionals.ts'; @@ -8,6 +9,7 @@ export type DaemonCommandRequest = { command: string; positionals: string[]; options: InternalRequestOptions; + metadataFlags?: Partial; }; type PointInput = { diff --git a/src/commands/command-flags.ts b/src/commands/command-flags.ts new file mode 100644 index 000000000..5097c2ff7 --- /dev/null +++ b/src/commands/command-flags.ts @@ -0,0 +1,40 @@ +import { buildFlags } from '../client-normalizers.ts'; +import type { CommandFlags } from '../core/dispatch-context.ts'; +import { getFlagDefinitions } from '../utils/cli-flags.ts'; +import type { InternalRequestOptions } from '../client-types.ts'; +import type { CommandMetadata } from './command-contract.ts'; + +const CLI_FLAG_KEYS: ReadonlySet = new Set( + getFlagDefinitions().map((definition) => definition.key), +); + +export function buildRequestFlags( + options: InternalRequestOptions, + metadataFlags: Partial | undefined, +): CommandFlags { + return { + ...buildFlags(options), + ...metadataFlags, + }; +} + +export function readMetadataCommandFlags( + metadata: Pick, 'inputSchema'>, + options: InternalRequestOptions, +): Partial { + const properties = metadata.inputSchema.properties; + if (!properties) return {}; + + const flags: Record = {}; + const record = options as Record; + for (const key of Object.keys(properties)) { + if (!CLI_FLAG_KEYS.has(key)) continue; + const value = record[key]; + if (isMetadataFlagValue(value)) flags[key] = value; + } + return flags as Partial; +} + +function isMetadataFlagValue(value: unknown): value is boolean | number | string { + return typeof value === 'boolean' || typeof value === 'number' || typeof value === 'string'; +} diff --git a/src/commands/command-projection.ts b/src/commands/command-projection.ts index 803d30296..97f2736e7 100644 --- a/src/commands/command-projection.ts +++ b/src/commands/command-projection.ts @@ -1,6 +1,7 @@ import { createBatchDaemonWriter, type BatchCommandName } from './batch/index.ts'; import type { CommandInput, DaemonCommandRequest, DaemonWriter } from './cli-grammar/types.ts'; import { findCommandMetadata } from './command-metadata.ts'; +import { readMetadataCommandFlags } from './command-flags.ts'; import { listCommandFamilyDaemonWriters } from './family/registry.ts'; import { AppError } from '../utils/errors.ts'; @@ -27,7 +28,11 @@ function prepareBatchDaemonCommandRequest( throw new Error(`Missing command metadata for batch command: ${command}`); } try { - return writer(metadata.readInput(input) as CommandInput); + return prepareRequestWithMetadataFlags( + writer, + metadata, + metadata.readInput(input) as CommandInput, + ); } catch (err) { const message = err instanceof Error ? err.message : String(err); throw new AppError( @@ -47,5 +52,18 @@ export function prepareDaemonCommandRequest( if (!writer) { throw new Error(`Missing daemon writer for command: ${command}`); } - return writer(input); + const metadata = findCommandMetadata(command); + return prepareRequestWithMetadataFlags(writer, metadata, input); +} + +function prepareRequestWithMetadataFlags( + writer: DaemonWriter, + metadata: ReturnType, + input: CommandInput, +): DaemonCommandRequest { + const request = writer(input); + return { + ...request, + ...(metadata ? { metadataFlags: readMetadataCommandFlags(metadata, request.options) } : {}), + }; } diff --git a/src/commands/interaction/metadata.ts b/src/commands/interaction/metadata.ts index b37296440..3b216d5dc 100644 --- a/src/commands/interaction/metadata.ts +++ b/src/commands/interaction/metadata.ts @@ -27,8 +27,8 @@ import { } from '../command-input.ts'; import { defineFieldCommandMetadata } from '../field-command-contract.ts'; import { CLICK_BUTTONS } from '../../core/click-button.ts'; +import { SCROLL_DURATION_MAX_MS } from '../../core/scroll-command.ts'; import { - SCROLL_DURATION_MAX_MS, SCROLL_DIRECTIONS, SWIPE_PATTERNS, SWIPE_PRESETS, diff --git a/src/commands/interaction/runtime/gestures.ts b/src/commands/interaction/runtime/gestures.ts index 9dd38d465..0a27492bc 100644 --- a/src/commands/interaction/runtime/gestures.ts +++ b/src/commands/interaction/runtime/gestures.ts @@ -4,11 +4,15 @@ import { centerOfRect } from '../../../utils/snapshot.ts'; import { buildSwipePresetGesturePlan, parseSwipePreset, - SCROLL_DURATION_MAX_MS, type GestureReferenceFrame, type ScrollDirection, type SwipePreset, } from '../../../core/scroll-gesture.ts'; +import { + assertExclusiveScrollDistanceInputs, + honoredScrollDurationMs, + normalizeScrollDurationMs, +} from '../../../core/scroll-command.ts'; import type { AgentDeviceRuntime, CommandContext } from '../../../runtime-contract.ts'; import { requireIntInRange } from '../../../utils/validation.ts'; import { successText } from '../../../utils/success-text.ts'; @@ -186,14 +190,11 @@ export const scrollCommand: RuntimeCommand | undefined, -): number | undefined { - return typeof backendResult?.durationMs === 'number' ? backendResult.durationMs : undefined; -} - export const swipeCommand: RuntimeCommand = async ( runtime, options, @@ -534,21 +529,6 @@ function normalizeOptionalPositiveInteger( return value; } -function normalizeOptionalNonNegativeInteger( - value: number | undefined, - field: string, - max?: number, -): number | undefined { - if (value === undefined) return undefined; - if (!Number.isFinite(value) || !Number.isInteger(value) || value < 0) { - throw new AppError('INVALID_ARGS', `${field} must be a non-negative integer`); - } - if (max !== undefined && value > max) { - throw new AppError('INVALID_ARGS', `${field} must be at most ${max}`); - } - return value; -} - function resolveSnapshotViewport(nodes: SnapshotState['nodes']): Rect { const visibleRects = nodes .filter((node) => isNodeVisibleInEffectiveViewport(node, nodes)) diff --git a/src/core/dispatch-interactions.ts b/src/core/dispatch-interactions.ts index 578c4185b..484e85893 100644 --- a/src/core/dispatch-interactions.ts +++ b/src/core/dispatch-interactions.ts @@ -7,7 +7,6 @@ import { inferGestureReferenceFrame, parseScrollDirection, parseSwipePreset, - SCROLL_DURATION_MAX_MS, SCROLL_DIRECTIONS, SWIPE_PATTERNS, type ScrollDirection, @@ -15,6 +14,12 @@ import { type SwipePreset, type TransformGestureParams, } from './scroll-gesture.ts'; +import { + assertExclusiveScrollDistanceInputs, + honoredScrollDurationMs, + normalizeScrollDurationMs, + type ScrollCommandOptions, +} from './scroll-command.ts'; import { isStringMember, parseStringMember } from '../utils/string-enum.ts'; import { getClickButtonValidationError, @@ -50,12 +55,6 @@ type ScrollTarget = { edge?: ScrollEdge; }; -type ScrollCommandOptions = { - amount?: number; - pixels?: number; - durationMs?: number; -}; - export async function handleLongPressCommand( interactor: Interactor, positionals: string[], @@ -773,8 +772,8 @@ function assertScrollCommandInputs( durationMs: number | undefined, ): void { assertScrollAmountInput(amount); - assertScrollDurationInput(durationMs); - assertExclusiveScrollDistanceInputs(amount, pixels); + normalizeScrollDurationMs(durationMs); + assertExclusiveScrollDistanceInputs({ amount, pixels }); } function assertScrollAmountInput(amount: number | undefined): void { @@ -783,31 +782,6 @@ function assertScrollAmountInput(amount: number | undefined): void { } } -function assertScrollDurationInput(durationMs: number | undefined): void { - if (durationMs === undefined) return; - if (!Number.isFinite(durationMs) || !Number.isInteger(durationMs) || durationMs < 0) { - throw new AppError('INVALID_ARGS', 'scroll durationMs must be a non-negative integer'); - } - if (durationMs > SCROLL_DURATION_MAX_MS) { - throw new AppError( - 'INVALID_ARGS', - `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, - ); - } -} - -function assertExclusiveScrollDistanceInputs( - amount: number | undefined, - pixels: number | undefined, -): void { - if (amount !== undefined && pixels !== undefined) { - throw new AppError( - 'INVALID_ARGS', - 'scroll accepts either a relative amount or --pixels, not both', - ); - } -} - async function runDispatchedScroll( interactor: Interactor, context: DispatchContext | undefined, @@ -840,11 +814,13 @@ function buildDispatchedScrollResult( completedPasses: number, interactionResult: Record, ): Record { + const durationMs = honoredScrollDurationMs(interactionResult); return { direction: target.direction, ...(target.edge ? { edge: target.edge, passes: completedPasses } : {}), ...(options.amount !== undefined ? { amount: options.amount } : {}), ...(options.pixels !== undefined ? { pixels: options.pixels } : {}), + ...(durationMs !== undefined ? { durationMs } : {}), ...interactionResult, }; } diff --git a/src/core/scroll-command.ts b/src/core/scroll-command.ts new file mode 100644 index 000000000..adac62f75 --- /dev/null +++ b/src/core/scroll-command.ts @@ -0,0 +1,46 @@ +import { AppError } from '../utils/errors.ts'; + +export const SCROLL_DURATION_MAX_MS = 10_000; + +export type ScrollDistanceOptions = { + amount?: number; + pixels?: number; +}; + +export type ScrollTimingOptions = { + durationMs?: number; +}; + +export type ScrollCommandOptions = ScrollDistanceOptions & ScrollTimingOptions; + +export function assertExclusiveScrollDistanceInputs( + options: ScrollDistanceOptions, + message = 'scroll accepts either a relative amount or --pixels, not both', +): void { + if (options.amount !== undefined && options.pixels !== undefined) { + throw new AppError('INVALID_ARGS', message); + } +} + +export function normalizeScrollDurationMs( + durationMs: number | undefined, + options: { field?: string; invalidMessage?: string; max?: number } = {}, +): number | undefined { + if (durationMs === undefined) return undefined; + const field = options.field ?? 'scroll durationMs'; + const max = options.max ?? SCROLL_DURATION_MAX_MS; + const invalidMessage = options.invalidMessage ?? `${field} must be a non-negative integer`; + if (!Number.isFinite(durationMs) || !Number.isInteger(durationMs) || durationMs < 0) { + throw new AppError('INVALID_ARGS', invalidMessage); + } + if (durationMs > max) { + throw new AppError('INVALID_ARGS', `${field} must be a non-negative integer at most ${max}`); + } + return durationMs; +} + +export function honoredScrollDurationMs( + result: Record | undefined, +): number | undefined { + return typeof result?.durationMs === 'number' ? result.durationMs : undefined; +} diff --git a/src/core/scroll-gesture.ts b/src/core/scroll-gesture.ts index 62802c05b..80f0d67de 100644 --- a/src/core/scroll-gesture.ts +++ b/src/core/scroll-gesture.ts @@ -4,7 +4,6 @@ import type { Rect, SnapshotNode } from '../utils/snapshot.ts'; export const SCROLL_DIRECTIONS = ['up', 'down', 'left', 'right'] as const; export type ScrollDirection = (typeof SCROLL_DIRECTIONS)[number]; -export const SCROLL_DURATION_MAX_MS = 10_000; export const SWIPE_PRESETS = ['left', 'right', 'left-edge', 'right-edge'] as const; export type SwipePreset = (typeof SWIPE_PRESETS)[number]; export const SWIPE_PATTERNS = ['one-way', 'ping-pong'] as const; diff --git a/src/platforms/ios/__tests__/runner-command-traits.test.ts b/src/platforms/ios/__tests__/runner-command-traits.test.ts index e4eeb3efb..731a17a3d 100644 --- a/src/platforms/ios/__tests__/runner-command-traits.test.ts +++ b/src/platforms/ios/__tests__/runner-command-traits.test.ts @@ -8,43 +8,16 @@ import { readRunnerCommandTraits, type RunnerCommandTraits, } from '../runner-command-traits.ts'; +import { RUNNER_COMMAND_TRAIT_MANIFEST } from '../runner-command-manifest.ts'; -const EXPECTED_RUNNER_COMMAND_TRAITS = { - tap: hotMutation(), - mouseClick: defaults(), - longPress: hotMutation(), - drag: hotMutation(), - remotePress: defaults(), - type: defaults(), - swipe: hotMutation(), - scroll: hotMutation(), - desktopScroll: hotMutation(), - findText: readOnly(), - querySelector: readOnly(), - readText: readOnly(), - snapshot: readOnly(), - screenshot: readOnly(), - back: defaults(), - backInApp: defaults(), - backSystem: defaults(), - home: defaults(), - rotate: defaults(), - rotateGesture: defaults(), - transformGesture: defaults(), - appSwitcher: defaults(), - keyboardDismiss: defaults(), - keyboardReturn: defaults(), - alert: readOnly(), - pinch: defaults(), - sequence: hotMutation(), - recordStart: defaults(), - recordStop: defaults(), - status: readOnlyReadinessProbe(), - uptime: readOnlyReadinessProbe(), - shutdown: defaults(), -} satisfies Record; +const EXPECTED_RUNNER_COMMAND_TRAITS = Object.fromEntries( + Object.entries(RUNNER_COMMAND_TRAIT_MANIFEST).map(([command, traitClass]) => [ + command, + expectedTraitsForClass(traitClass), + ]), +) as Record; -test('runner command traits classify every runner command in one table', () => { +test('runner command traits are derived from the runner command manifest', () => { for (const [command, expectedTraits] of Object.entries(EXPECTED_RUNNER_COMMAND_TRAITS) as Array< [RunnerCommand['command'], RunnerCommandTraits] >) { @@ -52,6 +25,27 @@ test('runner command traits classify every runner command in one table', () => { } }); +test('runner command manifest pins lifecycle-sensitive command groups', () => { + assert.deepEqual(commandsForClass('preflightSkippableTouchMutation'), [ + 'desktopScroll', + 'drag', + 'longPress', + 'scroll', + 'sequence', + 'swipe', + 'tap', + ]); + assert.deepEqual(commandsForClass('readOnly'), [ + 'alert', + 'findText', + 'querySelector', + 'readText', + 'screenshot', + 'snapshot', + ]); + assert.deepEqual(commandsForClass('readOnlyReadinessProbe'), ['status', 'uptime']); +}); + test('runner command trait helpers read from the shared trait table', () => { for (const command of Object.keys(EXPECTED_RUNNER_COMMAND_TRAITS) as Array< RunnerCommand['command'] @@ -67,6 +61,30 @@ test('runner command trait helpers read from the shared trait table', () => { } }); +function commandsForClass( + traitClass: (typeof RUNNER_COMMAND_TRAIT_MANIFEST)[RunnerCommand['command']], +): RunnerCommand['command'][] { + return Object.entries(RUNNER_COMMAND_TRAIT_MANIFEST) + .filter((entry) => entry[1] === traitClass) + .map((entry) => entry[0] as RunnerCommand['command']) + .sort(); +} + +function expectedTraitsForClass( + traitClass: (typeof RUNNER_COMMAND_TRAIT_MANIFEST)[RunnerCommand['command']], +): RunnerCommandTraits { + switch (traitClass) { + case 'default': + return defaults(); + case 'readOnly': + return readOnly(); + case 'readOnlyReadinessProbe': + return readOnlyReadinessProbe(); + case 'preflightSkippableTouchMutation': + return hotMutation(); + } +} + function defaults(): RunnerCommandTraits { return { readOnly: false, diff --git a/src/platforms/ios/desktop-scroll.ts b/src/platforms/ios/desktop-scroll.ts new file mode 100644 index 000000000..82abdf06c --- /dev/null +++ b/src/platforms/ios/desktop-scroll.ts @@ -0,0 +1,38 @@ +import type { DeviceInfo } from '../../utils/device.ts'; +import type { RunnerCallOptions, RunnerContext } from '../../core/interactor-types.ts'; +import type { ScrollDirection } from '../../core/scroll-gesture.ts'; +import type { RunnerCommand } from './runner-contract.ts'; +import { + normalizeAppleScrollResultWithResolvedFrame, + scrollRunnerFields, + type AppleScrollOptions, +} from './scroll.ts'; + +type RunRunnerCommand = ( + device: DeviceInfo, + command: RunnerCommand, + options?: RunnerCallOptions, +) => Promise>; + +export async function runMacosDesktopScroll( + runRunnerCommand: RunRunnerCommand, + device: DeviceInfo, + ctx: RunnerContext, + runnerOpts: RunnerCallOptions, + direction: ScrollDirection, + options?: AppleScrollOptions, +): Promise> { + const runnerResult = await runRunnerCommand( + device, + { + command: 'desktopScroll', + direction, + ...scrollRunnerFields(options, { includeDuration: true }), + appBundleId: ctx.appBundleId, + }, + runnerOpts, + ); + return normalizeAppleScrollResultWithResolvedFrame(runnerResult, direction, options, { + includeDuration: true, + }); +} diff --git a/src/platforms/ios/interactions.ts b/src/platforms/ios/interactions.ts index 60346eee5..212441e3d 100644 --- a/src/platforms/ios/interactions.ts +++ b/src/platforms/ios/interactions.ts @@ -1,14 +1,16 @@ import type { DeviceInfo } from '../../utils/device.ts'; -import { - assertScrollGestureInput, - buildScrollGesturePlan, - SCROLL_DURATION_MAX_MS, - type ScrollDirection, -} from '../../core/scroll-gesture.ts'; -import { AppError } from '../../utils/errors.ts'; +import { assertScrollGestureInput, type ScrollDirection } from '../../core/scroll-gesture.ts'; +import { normalizeScrollDurationMs, SCROLL_DURATION_MAX_MS } from '../../core/scroll-command.ts'; import { runIosRunnerCommand } from './runner-client.ts'; import { buildRunnerSequenceCommand, parseRunnerSequenceResult } from './runner-sequence.ts'; import type { RunnerCommand } from './runner-contract.ts'; +import { runMacosDesktopScroll } from './desktop-scroll.ts'; +import { + normalizeAppleScrollResult, + normalizeAppleScrollResultWithResolvedFrame, + scrollRunnerFields, + type AppleScrollOptions, +} from './scroll.ts'; import type { BackMode, Interactor, @@ -25,15 +27,6 @@ const IOS_SWIPE_DEFAULT_DURATION_MS = 250; const IOS_SWIPE_MIN_DURATION_MS = 16; const IOS_SWIPE_MAX_DURATION_MS = 10_000; -type NormalizedScrollOptions = { - amount?: number; - pixels?: number; - durationMs?: number; - preferProvidedPixels?: boolean; -}; - -type AppleScrollOptions = Omit; - type IosDragCommandOptions = { defaultDurationMs: number; legacyDefaultDurationMs?: number; @@ -335,29 +328,26 @@ async function runAppleScroll( appleRemotePressCommand(direction, ctx.appBundleId), runnerOpts, ); - return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); + return normalizeAppleScrollResult(runnerResult, { amount: options?.amount }); } // Validate amount/pixels up front so bad inputs throw INVALID_ARGS before any runner command // is sent (previously validation ran between the frame request and the drag, so a bad amount // could cost one runner request first). assertScrollGestureInput(options ?? {}); - assertScrollDurationInput(options?.durationMs); + normalizeScrollDurationMs(options?.durationMs, { + invalidMessage: `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, + }); if (device.platform === 'macos') { - const runnerResult = await runRunnerCommand( + return await runMacosDesktopScroll( + runRunnerCommand, device, - { - command: 'desktopScroll', - direction, - ...scrollRunnerFields(options, { includeDuration: true }), - appBundleId: ctx.appBundleId, - }, + ctx, runnerOpts, + direction, + options, ); - return normalizeScrollResultWithResolvedFrame(runnerResult, direction, options, { - includeDuration: true, - }); } // Single fused lifecycle command: the runner resolves the interaction frame and runs the drag. @@ -375,131 +365,5 @@ async function runAppleScroll( runnerOpts, ); - const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); - const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); - if (referenceWidth !== undefined && referenceHeight !== undefined) - return normalizeScrollResultWithResolvedFrame(runnerResult, direction, options); - - // Missing frame dims: derive pixels from endpoint travel instead of throwing. - return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); -} - -function assertScrollDurationInput(durationMs: number | undefined): void { - if (durationMs === undefined) return; - if ( - !Number.isFinite(durationMs) || - !Number.isInteger(durationMs) || - durationMs < 0 || - durationMs > SCROLL_DURATION_MAX_MS - ) { - throw new AppError( - 'INVALID_ARGS', - `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, - ); - } -} - -function normalizeScrollResultWithResolvedFrame( - runnerResult: Record, - direction: ScrollDirection, - options?: AppleScrollOptions, - config?: { includeDuration?: boolean }, -): Record { - const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); - const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); - if (referenceWidth === undefined || referenceHeight === undefined) { - return normalizeIosScrollResult(runnerResult, { amount: options?.amount }); - } - - // Recompute the plan from the runner's resolved frame so reported pixels match the planned - // travel (TS keeps buildScrollGesturePlan for Android and recording anyway). - const plan = buildScrollGesturePlan({ - direction, - amount: options?.amount, - pixels: options?.pixels, - referenceWidth, - referenceHeight, - }); - return normalizeIosScrollResult(runnerResult, { - amount: options?.amount, - pixels: plan.pixels, - durationMs: config?.includeDuration ? options?.durationMs : undefined, - preferProvidedPixels: true, - }); -} - -function scrollRunnerFields( - options: AppleScrollOptions | undefined, - config?: { includeDuration?: boolean }, -): Record { - return { - ...(options?.amount !== undefined ? { amount: options.amount } : {}), - ...(options?.pixels !== undefined ? { pixels: options.pixels } : {}), - ...(config?.includeDuration && options?.durationMs !== undefined - ? { durationMs: options.durationMs } - : {}), - }; -} - -function readFiniteNumber(value: unknown): number | undefined { - return typeof value === 'number' && Number.isFinite(value) ? value : undefined; -} - -function normalizeIosScrollResult( - runnerResult: Record, - options?: NormalizedScrollOptions, -): Record { - const { x1, y1, x2, y2 } = remapRunnerCoordinates(runnerResult); - const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); - const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); - const horizontalTravel = - x1 !== undefined && x2 !== undefined ? Math.round(Math.abs(x2 - x1)) : undefined; - const verticalTravel = - y1 !== undefined && y2 !== undefined ? Math.round(Math.abs(y2 - y1)) : undefined; - const travelPixels = selectScrollTravelPixels(options, horizontalTravel, verticalTravel); - - const result: Record = {}; - setDefinedNumber(result, 'x1', x1); - setDefinedNumber(result, 'y1', y1); - setDefinedNumber(result, 'x2', x2); - setDefinedNumber(result, 'y2', y2); - setDefinedNumber(result, 'referenceWidth', referenceWidth); - setDefinedNumber(result, 'referenceHeight', referenceHeight); - setDefinedNumber(result, 'amount', options?.amount); - setDefinedNumber(result, 'pixels', travelPixels); - setDefinedNumber(result, 'durationMs', options?.durationMs); - return result; -} - -function setDefinedNumber( - result: Record, - key: string, - value: number | undefined, -): void { - if (value !== undefined) result[key] = value; -} - -function selectScrollTravelPixels( - options: NormalizedScrollOptions | undefined, - horizontalTravel: number | undefined, - verticalTravel: number | undefined, -): number | undefined { - if (options?.preferProvidedPixels && options.pixels !== undefined) return options.pixels; - if (horizontalTravel !== undefined && horizontalTravel > 0) return horizontalTravel; - if (verticalTravel !== undefined && verticalTravel > 0) return verticalTravel; - return undefined; -} - -function remapRunnerCoordinates(runnerResult: Record): { - x1?: number; - y1?: number; - x2?: number; - y2?: number; -} { - return { - x1: readFiniteNumber(runnerResult.x), - y1: readFiniteNumber(runnerResult.y), - x2: readFiniteNumber(runnerResult.x2), - y2: readFiniteNumber(runnerResult.y2), - }; + return normalizeAppleScrollResultWithResolvedFrame(runnerResult, direction, options); } diff --git a/src/platforms/ios/runner-command-manifest.ts b/src/platforms/ios/runner-command-manifest.ts new file mode 100644 index 000000000..1b7a49aae --- /dev/null +++ b/src/platforms/ios/runner-command-manifest.ts @@ -0,0 +1,42 @@ +import type { RunnerCommand } from './runner-contract.ts'; + +export type RunnerCommandTraitClass = + | 'default' + | 'readOnly' + | 'readOnlyReadinessProbe' + | 'preflightSkippableTouchMutation'; + +export const RUNNER_COMMAND_TRAIT_MANIFEST = { + tap: 'preflightSkippableTouchMutation', + mouseClick: 'default', + longPress: 'preflightSkippableTouchMutation', + drag: 'preflightSkippableTouchMutation', + remotePress: 'default', + type: 'default', + swipe: 'preflightSkippableTouchMutation', + scroll: 'preflightSkippableTouchMutation', + desktopScroll: 'preflightSkippableTouchMutation', + findText: 'readOnly', + querySelector: 'readOnly', + readText: 'readOnly', + snapshot: 'readOnly', + screenshot: 'readOnly', + back: 'default', + backInApp: 'default', + backSystem: 'default', + home: 'default', + rotate: 'default', + rotateGesture: 'default', + transformGesture: 'default', + appSwitcher: 'default', + keyboardDismiss: 'default', + keyboardReturn: 'default', + alert: 'readOnly', + pinch: 'default', + sequence: 'preflightSkippableTouchMutation', + recordStart: 'default', + recordStop: 'default', + status: 'readOnlyReadinessProbe', + uptime: 'readOnlyReadinessProbe', + shutdown: 'default', +} as const satisfies Record; diff --git a/src/platforms/ios/runner-command-traits.ts b/src/platforms/ios/runner-command-traits.ts index 09c6210c4..8f83b78cb 100644 --- a/src/platforms/ios/runner-command-traits.ts +++ b/src/platforms/ios/runner-command-traits.ts @@ -1,4 +1,8 @@ import type { RunnerCommand } from './runner-contract.ts'; +import { + RUNNER_COMMAND_TRAIT_MANIFEST, + type RunnerCommandTraitClass, +} from './runner-command-manifest.ts'; export type RunnerCommandTraits = Readonly<{ readOnly: boolean; @@ -33,40 +37,12 @@ const PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS: RunnerCommandTraits = { readinessPreflightSkipEligibleAfterHealthyMutation: true, }; -const RUNNER_COMMAND_TRAITS = { - tap: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - mouseClick: DEFAULT_TRAITS, - longPress: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - drag: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - remotePress: DEFAULT_TRAITS, - type: DEFAULT_TRAITS, - swipe: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - scroll: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - desktopScroll: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - findText: READ_ONLY_TRAITS, - querySelector: READ_ONLY_TRAITS, - readText: READ_ONLY_TRAITS, - snapshot: READ_ONLY_TRAITS, - screenshot: READ_ONLY_TRAITS, - back: DEFAULT_TRAITS, - backInApp: DEFAULT_TRAITS, - backSystem: DEFAULT_TRAITS, - home: DEFAULT_TRAITS, - rotate: DEFAULT_TRAITS, - rotateGesture: DEFAULT_TRAITS, - transformGesture: DEFAULT_TRAITS, - appSwitcher: DEFAULT_TRAITS, - keyboardDismiss: DEFAULT_TRAITS, - keyboardReturn: DEFAULT_TRAITS, - alert: READ_ONLY_TRAITS, - pinch: DEFAULT_TRAITS, - sequence: PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS, - recordStart: DEFAULT_TRAITS, - recordStop: DEFAULT_TRAITS, - status: READ_ONLY_READINESS_PROBE_TRAITS, - uptime: READ_ONLY_READINESS_PROBE_TRAITS, - shutdown: DEFAULT_TRAITS, -} satisfies Record; +const RUNNER_COMMAND_TRAITS = Object.fromEntries( + Object.entries(RUNNER_COMMAND_TRAIT_MANIFEST).map(([command, traitClass]) => [ + command, + traitsForClass(traitClass), + ]), +) as Record; export function readRunnerCommandTraits(command: RunnerCommand['command']): RunnerCommandTraits { return RUNNER_COMMAND_TRAITS[command]; @@ -85,3 +61,16 @@ export function canSkipRunnerReadinessPreflightAfterHealthyMutation( ): boolean { return readRunnerCommandTraits(command).readinessPreflightSkipEligibleAfterHealthyMutation; } + +function traitsForClass(traitClass: RunnerCommandTraitClass): RunnerCommandTraits { + switch (traitClass) { + case 'default': + return DEFAULT_TRAITS; + case 'readOnly': + return READ_ONLY_TRAITS; + case 'readOnlyReadinessProbe': + return READ_ONLY_READINESS_PROBE_TRAITS; + case 'preflightSkippableTouchMutation': + return PREFLIGHT_SKIPPABLE_TOUCH_MUTATION_TRAITS; + } +} diff --git a/src/platforms/ios/scroll.ts b/src/platforms/ios/scroll.ts new file mode 100644 index 000000000..013b7daf6 --- /dev/null +++ b/src/platforms/ios/scroll.ts @@ -0,0 +1,111 @@ +import { buildScrollGesturePlan, type ScrollDirection } from '../../core/scroll-gesture.ts'; + +export type NormalizedScrollOptions = { + amount?: number; + pixels?: number; + durationMs?: number; + preferProvidedPixels?: boolean; +}; + +export type AppleScrollOptions = Omit; + +export function normalizeAppleScrollResultWithResolvedFrame( + runnerResult: Record, + direction: ScrollDirection, + options?: AppleScrollOptions, + config?: { includeDuration?: boolean }, +): Record { + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); + const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); + if (referenceWidth === undefined || referenceHeight === undefined) { + return normalizeAppleScrollResult(runnerResult, { amount: options?.amount }); + } + + // Recompute the plan from the runner's resolved frame so reported pixels match the planned + // travel (TS keeps buildScrollGesturePlan for Android and recording anyway). + const plan = buildScrollGesturePlan({ + direction, + amount: options?.amount, + pixels: options?.pixels, + referenceWidth, + referenceHeight, + }); + return normalizeAppleScrollResult(runnerResult, { + amount: options?.amount, + pixels: plan.pixels, + durationMs: config?.includeDuration ? options?.durationMs : undefined, + preferProvidedPixels: true, + }); +} + +export function scrollRunnerFields( + options: AppleScrollOptions | undefined, + config?: { includeDuration?: boolean }, +): Record { + return { + ...(options?.amount !== undefined ? { amount: options.amount } : {}), + ...(options?.pixels !== undefined ? { pixels: options.pixels } : {}), + ...(config?.includeDuration && options?.durationMs !== undefined + ? { durationMs: options.durationMs } + : {}), + }; +} + +function readFiniteNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +export function normalizeAppleScrollResult( + runnerResult: Record, + options?: NormalizedScrollOptions, +): Record { + const { x1, y1, x2, y2 } = remapRunnerCoordinates(runnerResult); + const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); + const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); + const horizontalTravel = + x1 !== undefined && x2 !== undefined ? Math.round(Math.abs(x2 - x1)) : undefined; + const verticalTravel = + y1 !== undefined && y2 !== undefined ? Math.round(Math.abs(y2 - y1)) : undefined; + const travelPixels = selectScrollTravelPixels(options, horizontalTravel, verticalTravel); + + return { + ...(x1 !== undefined ? { x1 } : {}), + ...(y1 !== undefined ? { y1 } : {}), + ...(x2 !== undefined ? { x2 } : {}), + ...(y2 !== undefined ? { y2 } : {}), + ...(referenceWidth !== undefined ? { referenceWidth } : {}), + ...(referenceHeight !== undefined ? { referenceHeight } : {}), + ...(options?.amount !== undefined ? { amount: options.amount } : {}), + ...(travelPixels !== undefined ? { pixels: travelPixels } : {}), + ...(options?.durationMs !== undefined ? { durationMs: options.durationMs } : {}), + }; +} + +function selectScrollTravelPixels( + options: NormalizedScrollOptions | undefined, + horizontalTravel: number | undefined, + verticalTravel: number | undefined, +): number | undefined { + if (options?.preferProvidedPixels && options.pixels !== undefined) return options.pixels; + if (horizontalTravel !== undefined && horizontalTravel > 0) return horizontalTravel; + if (verticalTravel !== undefined && verticalTravel > 0) return verticalTravel; + return undefined; +} + +function remapRunnerCoordinates(runnerResult: Record): { + x1?: number; + y1?: number; + x2?: number; + y2?: number; +} { + const x = readFiniteNumber(runnerResult.x); + const y = readFiniteNumber(runnerResult.y); + const x2 = readFiniteNumber(runnerResult.x2); + const y2 = readFiniteNumber(runnerResult.y2); + return { + ...(x !== undefined ? { x1: x } : {}), + ...(y !== undefined ? { y1: y } : {}), + ...(x2 !== undefined ? { x2 } : {}), + ...(y2 !== undefined ? { y2 } : {}), + }; +} From ef2d240b1e0ac451c18bbcc475f7d71627933a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 25 Jun 2026 07:59:37 +0200 Subject: [PATCH 2/3] fix: honor scroll duration across platforms --- .../RunnerTests+CommandExecution.swift | 18 ++++++-- src/platforms/android/__tests__/index.test.ts | 5 +- src/platforms/android/input-actions.ts | 8 +++- src/platforms/ios/__tests__/index.test.ts | 12 +++-- src/platforms/ios/interactions.ts | 17 +++---- src/platforms/ios/scroll.ts | 4 +- .../linux/__tests__/input-actions.test.ts | 3 +- src/platforms/linux/input-actions.ts | 44 +++++++++++++++--- .../web/agent-browser-provider.test.ts | 3 ++ src/platforms/web/agent-browser-provider.ts | 46 ++++++++++++++++++- src/platforms/web/provider.ts | 2 +- 11 files changed, 128 insertions(+), 34 deletions(-) diff --git a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index 82e20cf61..620dca958 100644 --- a/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -625,14 +625,25 @@ extension RunnerTests { ) ) } + if let durationMs = command.durationMs, + durationMs.isFinite == false || durationMs < 0 || durationMs > 10000 + { + return Response( + ok: false, + error: ErrorPayload( + code: "INVALID_ARGS", + message: "scroll durationMs must be between 0 and 10000" + ) + ) + } return executeDragGesture( activeApp: activeApp, x: frame.minX + plan.x1, y: frame.minY + plan.y1, x2: frame.minX + plan.x2, y2: frame.minY + plan.y2, - durationMs: nil, - synthesized: false, + durationMs: command.durationMs, + synthesized: command.durationMs != nil, message: "scrolled" ) case .desktopScroll: @@ -988,8 +999,7 @@ extension RunnerTests { /// Shared drag execution for `.drag` and the fused `.scroll`. Mirrors the original `.drag` body /// exactly: keyboardAvoidingDragPoints -> resolvedDragVisualizationFrame -> synthesized branch /// (16-10000ms clamp) or non-synthesized dragAt with coordinateDragHoldDuration -> - /// gestureResponse(.drag). `.scroll` always passes synthesized: false, pinning the same - /// non-synthesized drag path scroll's drag used today. + /// gestureResponse(.drag). `.scroll` uses the synthesized path only when a duration is requested. private func executeDragGesture( activeApp: XCUIApplication, x: Double, diff --git a/src/platforms/android/__tests__/index.test.ts b/src/platforms/android/__tests__/index.test.ts index 095e51a93..c1f72ba8f 100644 --- a/src/platforms/android/__tests__/index.test.ts +++ b/src/platforms/android/__tests__/index.test.ts @@ -534,12 +534,13 @@ test('scrollAndroid supports explicit pixel travel distance', async () => { '', ].join('\n'), async ({ argsLogPath, device }) => { - const result = await scrollAndroid(device, 'down', { pixels: 240 }); + const result = await scrollAndroid(device, 'down', { pixels: 240, durationMs: 120 }); const args = await fs.readFile(argsLogPath, 'utf8'); - assert.match(args, /shell\ninput\nswipe\n540\n1080\n540\n840\n300\n/); + assert.match(args, /shell\ninput\nswipe\n540\n1080\n540\n840\n120\n/); assert.doesNotMatch(args, /uiautomator|dump/); assert.equal(result.pixels, 240); + assert.equal(result.durationMs, 120); assert.equal(result.referenceWidth, 1080); assert.equal(result.referenceHeight, 1920); }, diff --git a/src/platforms/android/input-actions.ts b/src/platforms/android/input-actions.ts index 94e17ddc5..f23e7263b 100644 --- a/src/platforms/android/input-actions.ts +++ b/src/platforms/android/input-actions.ts @@ -214,6 +214,7 @@ export async function scrollAndroid( referenceWidth: size.width, referenceHeight: size.height, }); + const durationMs = options?.durationMs ?? 300; await runAndroidAdb(device, [ 'shell', @@ -223,10 +224,13 @@ export async function scrollAndroid( String(plan.y1), String(plan.x2), String(plan.y2), - '300', + String(durationMs), ]); - return plan; + return { + ...plan, + ...(options?.durationMs !== undefined ? { durationMs: options.durationMs } : {}), + }; } function resolveAndroidUserRotation(orientation: DeviceRotation): string { diff --git a/src/platforms/ios/__tests__/index.test.ts b/src/platforms/ios/__tests__/index.test.ts index 522e87651..12b563c0e 100644 --- a/src/platforms/ios/__tests__/index.test.ts +++ b/src/platforms/ios/__tests__/index.test.ts @@ -342,9 +342,8 @@ for (const [name, device] of [ } test('iosRunnerOverrides maps iOS scroll to a single fused scroll command', async () => { - // The fused scroll resolves the frame and performs the drag in one runner lifecycle command; - // no separate interactionFrame request and no durationMs (the runner pins the non-synthesized - // drag path that ignores it). + // The fused scroll resolves the frame and performs the duration-aware drag in one runner + // lifecycle command; no separate interactionFrame request is needed. mockRunIosRunnerCommand.mockResolvedValueOnce({ x: 200, y: 640, @@ -364,6 +363,7 @@ test('iosRunnerOverrides maps iOS scroll to a single fused scroll command', asyn assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { command: 'scroll', direction: 'down', + durationMs: 50, appBundleId: 'com.example.App', }); assert.deepEqual(result, { @@ -374,10 +374,11 @@ test('iosRunnerOverrides maps iOS scroll to a single fused scroll command', asyn referenceWidth: 400, referenceHeight: 800, pixels: 480, + durationMs: 50, }); }); -test('iosRunnerOverrides does not report duration for tvOS remote scroll', async () => { +test('iosRunnerOverrides maps tvOS scroll duration to remote press hold duration', async () => { mockRunIosRunnerCommand.mockResolvedValueOnce({ ok: true, }); @@ -392,9 +393,10 @@ test('iosRunnerOverrides does not report duration for tvOS remote scroll', async assert.deepEqual(mockRunIosRunnerCommand.mock.calls[0]?.[1], { command: 'remotePress', remoteButton: 'down', + durationMs: 50, appBundleId: 'com.example.App', }); - assert.deepEqual(result, {}); + assert.deepEqual(result, { durationMs: 50 }); }); test('iosRunnerOverrides maps macOS desktop scroll to a desktop wheel command', async () => { diff --git a/src/platforms/ios/interactions.ts b/src/platforms/ios/interactions.ts index 212441e3d..cf6045a2f 100644 --- a/src/platforms/ios/interactions.ts +++ b/src/platforms/ios/interactions.ts @@ -322,22 +322,26 @@ async function runAppleScroll( direction: ScrollDirection, options?: AppleScrollOptions, ): Promise> { + normalizeScrollDurationMs(options?.durationMs, { + invalidMessage: `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, + }); + if (device.target === 'tv') { const runnerResult = await runRunnerCommand( device, - appleRemotePressCommand(direction, ctx.appBundleId), + appleRemotePressCommand(direction, ctx.appBundleId, options?.durationMs), runnerOpts, ); - return normalizeAppleScrollResult(runnerResult, { amount: options?.amount }); + return normalizeAppleScrollResult(runnerResult, { + amount: options?.amount, + durationMs: options?.durationMs, + }); } // Validate amount/pixels up front so bad inputs throw INVALID_ARGS before any runner command // is sent (previously validation ran between the frame request and the drag, so a bad amount // could cost one runner request first). assertScrollGestureInput(options ?? {}); - normalizeScrollDurationMs(options?.durationMs, { - invalidMessage: `scroll durationMs must be a non-negative integer at most ${SCROLL_DURATION_MAX_MS}`, - }); if (device.platform === 'macos') { return await runMacosDesktopScroll( @@ -351,9 +355,6 @@ async function runAppleScroll( } // Single fused lifecycle command: the runner resolves the interaction frame and runs the drag. - // durationMs is intentionally not sent — scroll's drag used 250ms today, but the runner's - // non-synthesized drag path ignores it (coordinateDragHoldDuration + XCTest default drag - // velocity), and the fused `scroll` handler pins that same non-synthesized path. const runnerResult = await runRunnerCommand( device, { diff --git a/src/platforms/ios/scroll.ts b/src/platforms/ios/scroll.ts index 013b7daf6..a002b9c68 100644 --- a/src/platforms/ios/scroll.ts +++ b/src/platforms/ios/scroll.ts @@ -13,7 +13,7 @@ export function normalizeAppleScrollResultWithResolvedFrame( runnerResult: Record, direction: ScrollDirection, options?: AppleScrollOptions, - config?: { includeDuration?: boolean }, + config: { includeDuration?: boolean } = { includeDuration: true }, ): Record { const referenceWidth = readFiniteNumber(runnerResult.referenceWidth); const referenceHeight = readFiniteNumber(runnerResult.referenceHeight); @@ -40,7 +40,7 @@ export function normalizeAppleScrollResultWithResolvedFrame( export function scrollRunnerFields( options: AppleScrollOptions | undefined, - config?: { includeDuration?: boolean }, + config: { includeDuration?: boolean } = { includeDuration: true }, ): Record { return { ...(options?.amount !== undefined ? { amount: options.amount } : {}), diff --git a/src/platforms/linux/__tests__/input-actions.test.ts b/src/platforms/linux/__tests__/input-actions.test.ts index 4d13c9a6e..978134518 100644 --- a/src/platforms/linux/__tests__/input-actions.test.ts +++ b/src/platforms/linux/__tests__/input-actions.test.ts @@ -98,7 +98,7 @@ test('typeLinux uses ydotool type', async () => { test('scrollLinux uses ydotool mousemove --wheel for vertical scroll', async () => { setupYdotool(); - await scrollLinux('up'); + const result = await scrollLinux('up', { pixels: 80, durationMs: 1 }); const c = calls(); assert.ok( c.some( @@ -109,4 +109,5 @@ test('scrollLinux uses ydotool mousemove --wheel for vertical scroll', async () args.includes('-y'), ), ); + assert.equal(result.durationMs, 1); }); diff --git a/src/platforms/linux/input-actions.ts b/src/platforms/linux/input-actions.ts index b26e83b33..2c93dcfed 100644 --- a/src/platforms/linux/input-actions.ts +++ b/src/platforms/linux/input-actions.ts @@ -178,11 +178,11 @@ const DEFAULT_SCROLL_CLICKS = 5; export async function scrollLinux( direction: ScrollDirection, options?: { amount?: number; pixels?: number; durationMs?: number }, -): Promise { +): Promise> { const provider = resolveLinuxInputProvider(); if (provider) { await provider.scroll(direction, options); - return; + return scrollDurationResult(options); } const { tool } = await ensureInputTool(); @@ -205,17 +205,47 @@ export async function scrollLinux( if (tool === 'xdotool') { const button = direction === 'up' ? '4' : direction === 'down' ? '5' : direction === 'left' ? '6' : '7'; - await xdotool('click', '--repeat', String(scrollCount), button); + await runPacedScrollSteps(scrollCount, options?.durationMs, async () => { + await xdotool('click', button); + }); } else { // ydotool: wheel events use positive/negative values if (direction === 'up' || direction === 'down') { - const value = direction === 'up' ? String(-scrollCount) : String(scrollCount); - await ydotool('mousemove', '--wheel', '-y', value); + await runPacedScrollSteps(scrollCount, options?.durationMs, async (stepCount) => { + const stepValue = direction === 'up' ? String(-stepCount) : String(stepCount); + await ydotool('mousemove', '--wheel', '-y', stepValue); + }); } else { - const value = direction === 'left' ? String(-scrollCount) : String(scrollCount); - await ydotool('mousemove', '--wheel', '-x', value); + await runPacedScrollSteps(scrollCount, options?.durationMs, async (stepCount) => { + const stepValue = direction === 'left' ? String(-stepCount) : String(stepCount); + await ydotool('mousemove', '--wheel', '-x', stepValue); + }); } } + return scrollDurationResult(options); +} + +async function runPacedScrollSteps( + totalCount: number, + durationMs: number | undefined, + runStep: (stepCount: number) => Promise, +): Promise { + if (durationMs === undefined || durationMs <= 0 || totalCount <= 1) { + await runStep(totalCount); + return; + } + + const intervalMs = durationMs / Math.max(1, totalCount - 1); + for (let index = 0; index < totalCount; index += 1) { + await runStep(1); + if (index < totalCount - 1) await sleep(intervalMs); + } +} + +function scrollDurationResult( + options: { durationMs?: number } | undefined, +): Record { + return options?.durationMs !== undefined ? { durationMs: options.durationMs } : {}; } // ── Keyboard actions ──────────────────────────────────────────────────── diff --git a/src/platforms/web/agent-browser-provider.test.ts b/src/platforms/web/agent-browser-provider.test.ts index 2f088e394..24ffc8b03 100644 --- a/src/platforms/web/agent-browser-provider.test.ts +++ b/src/platforms/web/agent-browser-provider.test.ts @@ -34,6 +34,8 @@ test('agent-browser provider maps supported operations to session-scoped JSON co await provider.fillRef?.('@e2', 'Grace'); await provider.typeText('hello'); await provider.scroll('down', { pixels: 400 }); + const scrollResult = await provider.scroll('up', { pixels: 100, durationMs: 1 }); + assert.deepEqual(scrollResult, { durationMs: 1 }); await provider.close(); }); @@ -55,6 +57,7 @@ test('agent-browser provider maps supported operations to session-scoped JSON co ['fill', '@e2', 'Grace', '--json', '--session', 'web-session'], ['keyboard', 'type', 'hello', '--json', '--session', 'web-session'], ['scroll', 'down', '400', '--json', '--session', 'web-session'], + ['scroll', 'up', '100', '--json', '--session', 'web-session'], ['close', '--json', '--session', 'web-session'], ], ); diff --git a/src/platforms/web/agent-browser-provider.ts b/src/platforms/web/agent-browser-provider.ts index 00fed4e6e..aad79932d 100644 --- a/src/platforms/web/agent-browser-provider.ts +++ b/src/platforms/web/agent-browser-provider.ts @@ -1,5 +1,6 @@ import { runCmd } from '../../utils/exec.ts'; import { AppError } from '../../utils/errors.ts'; +import { sleep } from '../../utils/timeouts.ts'; import type { Rect } from '../../utils/snapshot.ts'; import { normalizeAgentBrowserNetworkRequests } from './agent-browser-network.ts'; import { normalizeAgentBrowserSnapshot } from './agent-browser-snapshot.ts'; @@ -65,8 +66,10 @@ export function createAgentBrowserWebProvider( await runJson(['keyboard', 'type', text]); }, async scroll(direction, scrollOptions) { - const distance = scrollOptions?.pixels ?? scrollOptions?.amount; - await runJson(['scroll', direction, ...(distance === undefined ? [] : [String(distance)])]); + await runPacedScroll(runJson, direction, scrollOptions); + return scrollOptions?.durationMs !== undefined + ? { durationMs: scrollOptions.durationMs } + : {}; }, async dumpNetwork(options) { return normalizeAgentBrowserNetworkRequests(await runJson(['network', 'requests']), options); @@ -74,6 +77,45 @@ export function createAgentBrowserWebProvider( }; } +async function runPacedScroll( + runJson: (args: string[]) => Promise, + direction: string, + scrollOptions: { amount?: number; pixels?: number; durationMs?: number } | undefined, +): Promise { + const steps = buildPacedScrollSteps(scrollOptions); + for (const step of steps) { + await runJson(buildScrollArgs(direction, step.distance)); + if (step.delayAfterMs > 0) await sleep(step.delayAfterMs); + } +} + +type ScrollStep = { + distance?: number; + delayAfterMs: number; +}; + +function buildPacedScrollSteps( + scrollOptions: { amount?: number; pixels?: number; durationMs?: number } | undefined, +): ScrollStep[] { + const requestedDistance = scrollOptions?.pixels ?? scrollOptions?.amount; + const durationMs = scrollOptions?.durationMs; + if (durationMs === undefined || durationMs <= 0) { + return [{ distance: requestedDistance, delayAfterMs: 0 }]; + } + + const stepCount = Math.max(1, Math.min(20, Math.ceil(durationMs / 50))); + const stepDistance = (requestedDistance ?? 300) / stepCount; + const intervalMs = durationMs / Math.max(1, stepCount - 1); + return Array.from({ length: stepCount }, (_, index) => ({ + distance: stepDistance, + delayAfterMs: index < stepCount - 1 ? intervalMs : 0, + })); +} + +function buildScrollArgs(direction: string, distance: number | undefined): string[] { + return ['scroll', direction, ...(distance === undefined ? [] : [String(distance)])]; +} + async function clickCoordinates( runJson: (args: string[]) => Promise, x: number, diff --git a/src/platforms/web/provider.ts b/src/platforms/web/provider.ts index 796d6c7b1..42b9a1257 100644 --- a/src/platforms/web/provider.ts +++ b/src/platforms/web/provider.ts @@ -43,7 +43,7 @@ export type WebProvider = { scroll( direction: ScrollDirection, options?: { amount?: number; pixels?: number; durationMs?: number }, - ): Promise; + ): Promise | void>; readText?(x: number, y: number): Promise; dumpNetwork?(options?: BackendDumpNetworkOptions): Promise; }; From 5c0e3e850f608572adeff25a1832fb2fe31bc395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Thu, 25 Jun 2026 10:03:28 +0200 Subject: [PATCH 3/3] fix: address scroll duration review comments --- src/commands/interaction/metadata.ts | 2 +- .../linux/__tests__/input-actions.test.ts | 15 ++++++++++++ src/platforms/linux/input-actions.ts | 4 ++-- .../web/agent-browser-provider.test.ts | 8 ++++--- src/platforms/web/agent-browser-provider.ts | 24 ++++++++++++++++--- src/utils/cli-flags.ts | 2 +- 6 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/commands/interaction/metadata.ts b/src/commands/interaction/metadata.ts index 3b216d5dc..1f3f71455 100644 --- a/src/commands/interaction/metadata.ts +++ b/src/commands/interaction/metadata.ts @@ -115,7 +115,7 @@ const scrollFields = { direction: requiredField(enumField(SCROLL_INPUT_DIRECTIONS)), amount: numberField('Platform scroll amount.'), pixels: integerField('Pixel scroll amount.', { min: 0 }), - durationMs: integerField('Desktop scroll duration in milliseconds.', { + durationMs: integerField('Scroll duration in milliseconds when the backend supports pacing.', { min: 0, max: SCROLL_DURATION_MAX_MS, }), diff --git a/src/platforms/linux/__tests__/input-actions.test.ts b/src/platforms/linux/__tests__/input-actions.test.ts index 978134518..d9d665e14 100644 --- a/src/platforms/linux/__tests__/input-actions.test.ts +++ b/src/platforms/linux/__tests__/input-actions.test.ts @@ -61,6 +61,21 @@ test('typeLinux omits --delay when delayMs is 0', async () => { assert.ok(!typeCall[1].includes('--delay')); }); +test('scrollLinux preserves xdotool repeat count for non-paced scroll', async () => { + setupXdotool(); + await scrollLinux('down', { pixels: 75 }); + const c = calls(); + assert.ok( + c.some( + ([cmd, args]) => + cmd === 'xdotool' && + args.includes('click') && + args.includes('--repeat') && + args.includes('5'), + ), + ); +}); + // ── ydotool tests ──────────────────────────────────────────────────────── test('pressLinux uses ydotool mousemove + click on Wayland', async () => { diff --git a/src/platforms/linux/input-actions.ts b/src/platforms/linux/input-actions.ts index 2c93dcfed..cbba7bc1f 100644 --- a/src/platforms/linux/input-actions.ts +++ b/src/platforms/linux/input-actions.ts @@ -205,8 +205,8 @@ export async function scrollLinux( if (tool === 'xdotool') { const button = direction === 'up' ? '4' : direction === 'down' ? '5' : direction === 'left' ? '6' : '7'; - await runPacedScrollSteps(scrollCount, options?.durationMs, async () => { - await xdotool('click', button); + await runPacedScrollSteps(scrollCount, options?.durationMs, async (stepCount) => { + await xdotool('click', '--repeat', String(stepCount), button); }); } else { // ydotool: wheel events use positive/negative values diff --git a/src/platforms/web/agent-browser-provider.test.ts b/src/platforms/web/agent-browser-provider.test.ts index 24ffc8b03..7957e22bb 100644 --- a/src/platforms/web/agent-browser-provider.test.ts +++ b/src/platforms/web/agent-browser-provider.test.ts @@ -34,8 +34,8 @@ test('agent-browser provider maps supported operations to session-scoped JSON co await provider.fillRef?.('@e2', 'Grace'); await provider.typeText('hello'); await provider.scroll('down', { pixels: 400 }); - const scrollResult = await provider.scroll('up', { pixels: 100, durationMs: 1 }); - assert.deepEqual(scrollResult, { durationMs: 1 }); + const scrollResult = await provider.scroll('up', { pixels: 100, durationMs: 120 }); + assert.deepEqual(scrollResult, { durationMs: 120 }); await provider.close(); }); @@ -57,7 +57,9 @@ test('agent-browser provider maps supported operations to session-scoped JSON co ['fill', '@e2', 'Grace', '--json', '--session', 'web-session'], ['keyboard', 'type', 'hello', '--json', '--session', 'web-session'], ['scroll', 'down', '400', '--json', '--session', 'web-session'], - ['scroll', 'up', '100', '--json', '--session', 'web-session'], + ['scroll', 'up', '34', '--json', '--session', 'web-session'], + ['scroll', 'up', '33', '--json', '--session', 'web-session'], + ['scroll', 'up', '33', '--json', '--session', 'web-session'], ['close', '--json', '--session', 'web-session'], ], ); diff --git a/src/platforms/web/agent-browser-provider.ts b/src/platforms/web/agent-browser-provider.ts index aad79932d..111a67d38 100644 --- a/src/platforms/web/agent-browser-provider.ts +++ b/src/platforms/web/agent-browser-provider.ts @@ -104,14 +104,32 @@ function buildPacedScrollSteps( } const stepCount = Math.max(1, Math.min(20, Math.ceil(durationMs / 50))); - const stepDistance = (requestedDistance ?? 300) / stepCount; const intervalMs = durationMs / Math.max(1, stepCount - 1); - return Array.from({ length: stepCount }, (_, index) => ({ - distance: stepDistance, + return scrollStepDistances(scrollOptions, stepCount).map((distance, index) => ({ + distance, delayAfterMs: index < stepCount - 1 ? intervalMs : 0, })); } +function scrollStepDistances( + scrollOptions: { amount?: number; pixels?: number } | undefined, + stepCount: number, +): number[] { + const totalDistance = scrollOptions?.pixels ?? scrollOptions?.amount ?? 300; + if (scrollOptions?.amount !== undefined && scrollOptions.pixels === undefined) { + return Array.from({ length: stepCount }, () => totalDistance / stepCount); + } + return distributeIntegerDistance(Math.round(totalDistance), stepCount); +} + +function distributeIntegerDistance(totalDistance: number, stepCount: number): number[] { + const baseDistance = Math.floor(totalDistance / stepCount); + const remainder = totalDistance - baseDistance * stepCount; + return Array.from({ length: stepCount }, (_, index) => + index < remainder ? baseDistance + 1 : baseDistance, + ); +} + function buildScrollArgs(direction: string, distance: number | undefined): string[] { return ['scroll', direction, ...(distance === undefined ? [] : [String(distance)])]; } diff --git a/src/utils/cli-flags.ts b/src/utils/cli-flags.ts index 8d0438f5e..6a1f99703 100644 --- a/src/utils/cli-flags.ts +++ b/src/utils/cli-flags.ts @@ -685,7 +685,7 @@ const FLAG_DEFINITIONS: readonly FlagDefinition[] = [ min: 0, max: 10_000, usageLabel: '--duration-ms ', - usageDescription: 'Scroll: spread desktop wheel events over this duration', + usageDescription: 'Scroll: pace the gesture over this duration when supported', }, { key: 'holdMs',