From e6bedb94e522ecf449f6f45fdd99c787e38481dc Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Tue, 23 Jun 2026 12:45:30 +0800 Subject: [PATCH] Add canonical computer-use action schema (Anthropic/OpenAI -> AC_*) --- README/WHATS_NEW_zh-CN.md | 6 + README/WHATS_NEW_zh-TW.md | 6 + WHATS_NEW.md | 6 + .../doc/new_features/v151_features_doc.rst | 45 +++++++ docs/source/Eng/eng_index.rst | 1 + .../Zh/doc/new_features/v151_features_doc.rst | 39 ++++++ docs/source/Zh/zh_index.rst | 1 + je_auto_control/__init__.py | 8 ++ .../gui/script_builder/command_schema.py | 10 ++ je_auto_control/utils/cua_action/__init__.py | 7 + .../utils/cua_action/cua_action.py | 120 ++++++++++++++++++ .../utils/executor/action_executor.py | 16 +++ .../utils/mcp_server/tools/_factories.py | 22 +++- .../utils/mcp_server/tools/_handlers.py | 5 + .../headless/test_cua_action_batch.py | 84 ++++++++++++ 15 files changed, 374 insertions(+), 2 deletions(-) create mode 100644 docs/source/Eng/doc/new_features/v151_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v151_features_doc.rst create mode 100644 je_auto_control/utils/cua_action/__init__.py create mode 100644 je_auto_control/utils/cua_action/cua_action.py create mode 100644 test/unit_test/headless/test_cua_action_batch.py diff --git a/README/WHATS_NEW_zh-CN.md b/README/WHATS_NEW_zh-CN.md index 8aca9a54..aaeeffc5 100644 --- a/README/WHATS_NEW_zh-CN.md +++ b/README/WHATS_NEW_zh-CN.md @@ -1,5 +1,11 @@ # 本次更新 — AutoControl +## 本次更新 (2026-06-23) — 标准化 Computer-Use 动作结构 + +把 Anthropic / OpenAI agent 动作桥接到 AutoControl 命令。完整参考:[`docs/source/Zh/doc/new_features/v151_features_doc.rst`](../docs/source/Zh/doc/new_features/v151_features_doc.rst)。 + +- **`from_anthropic` / `from_openai_cua` / `to_ac_command` / `canonical_action`**(`AC_cua_command`):`tool_use_schema` 导出 AC_* 签章、`coordinate_space` 缩放——两者都不*正规化进来的动作载荷*。Anthropic 发出 `{action:"left_click", coordinate:[x,y]}`、OpenAI CUA 发出 `{type:"click", x, y, button}`;这些转接器把两者对应为标准动作再对应为可执行的 `[AC_*, params]`(含可选坐标空间 `scale`)。纯标准库、可无头测试;执行器命令对任一来源返回 `{canonical, command}`。 + ## 本次更新 (2026-06-23) — 窗口客户区几何 不论标题栏 / 边框,点击窗口*内部*。完整参考:[`docs/source/Zh/doc/new_features/v150_features_doc.rst`](../docs/source/Zh/doc/new_features/v150_features_doc.rst)。 diff --git a/README/WHATS_NEW_zh-TW.md b/README/WHATS_NEW_zh-TW.md index fb75f305..59675f4d 100644 --- a/README/WHATS_NEW_zh-TW.md +++ b/README/WHATS_NEW_zh-TW.md @@ -1,5 +1,11 @@ # 本次更新 — AutoControl +## 本次更新 (2026-06-23) — 標準化 Computer-Use 動作結構 + +把 Anthropic / OpenAI agent 動作橋接到 AutoControl 命令。完整參考:[`docs/source/Zh/doc/new_features/v151_features_doc.rst`](../docs/source/Zh/doc/new_features/v151_features_doc.rst)。 + +- **`from_anthropic` / `from_openai_cua` / `to_ac_command` / `canonical_action`**(`AC_cua_command`):`tool_use_schema` 匯出 AC_* 簽章、`coordinate_space` 縮放——兩者都不*正規化進來的動作酬載*。Anthropic 發出 `{action:"left_click", coordinate:[x,y]}`、OpenAI CUA 發出 `{type:"click", x, y, button}`;這些轉接器把兩者對應為標準動作再對應為可執行的 `[AC_*, params]`(含選用座標空間 `scale`)。純標準函式庫、可無頭測試;執行器命令對任一來源回傳 `{canonical, command}`。 + ## 本次更新 (2026-06-23) — 視窗客戶區幾何 不論標題列 / 邊框,點擊視窗*內部*。完整參考:[`docs/source/Zh/doc/new_features/v150_features_doc.rst`](../docs/source/Zh/doc/new_features/v150_features_doc.rst)。 diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 60e24230..fcb19626 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -1,5 +1,11 @@ # What's New — AutoControl +## What's new (2026-06-23) — Canonical Computer-Use Action Schema + +Bridge Anthropic / OpenAI agent actions to AutoControl commands. Full reference: [`docs/source/Eng/doc/new_features/v151_features_doc.rst`](docs/source/Eng/doc/new_features/v151_features_doc.rst). + +- **`from_anthropic` / `from_openai_cua` / `to_ac_command` / `canonical_action`** (`AC_cua_command`): `tool_use_schema` exports AC_* signatures and `coordinate_space` rescales — neither *normalizes an inbound action payload*. Anthropic emits `{action:"left_click", coordinate:[x,y]}`, OpenAI CUA emits `{type:"click", x, y, button}`; these adapters map both to a canonical action and then to a runnable `[AC_*, params]` (with optional coordinate-space `scale`). Pure-stdlib, headless-testable; the executor command returns `{canonical, command}` for any source. + ## What's new (2026-06-23) — Window Client-Area Geometry Click *inside* a window regardless of its title bar / borders. Full reference: [`docs/source/Eng/doc/new_features/v150_features_doc.rst`](docs/source/Eng/doc/new_features/v150_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v151_features_doc.rst b/docs/source/Eng/doc/new_features/v151_features_doc.rst new file mode 100644 index 00000000..81ac4039 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v151_features_doc.rst @@ -0,0 +1,45 @@ +Canonical Computer-Use Action Schema +==================================== + +``tool_use_schema`` exports the AC_* command *signatures* as tool definitions and +``coordinate_space`` rescales a model grid — but neither *normalizes an inbound action +payload*. Anthropic's computer-use tool emits ``{action:"left_click", +coordinate:[x,y]}``, OpenAI's CUA emits ``{type:"click", x, y, button}`` — there was no +adapter mapping these heterogeneous shapes onto a canonical action and then onto a +runnable AC_* command, so integrators hand-wrote the glue. + +Pure-stdlib dict mapping (an optional ``scale`` callable applies coordinate-space +rescaling), fully headless-testable. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import (from_anthropic, from_openai_cua, to_ac_command, + canonical_action) + + # Anthropic agent output -> canonical -> runnable AC action. + canonical = from_anthropic({"action": "left_click", "coordinate": [120, 80]}) + command = to_ac_command(canonical) + # -> ["AC_click_mouse", {"mouse_keycode": "mouse_left", "x": 120, "y": 80}] + + # OpenAI CUA, with model->physical coordinate rescaling. + cmd = to_ac_command(from_openai_cua({"type": "scroll", "x": 5, "y": 6, + "scroll_y": 120}), + scale=lambda x, y: (x * 2, y * 2)) + +``from_anthropic`` / ``from_openai_cua`` map each provider's payload to a canonical +``{type, x, y, text, …}`` (clicks, double/right/middle click, move, type, key, scroll, +screenshot). ``to_ac_command`` maps a canonical action to a ``[command_name, params]`` +AC action (``AC_click_mouse`` / ``AC_set_mouse_position`` / ``AC_write`` / ``AC_hotkey`` +/ ``AC_mouse_scroll`` / ``AC_screenshot``), applying ``scale`` to coordinates; an +unmapped type raises ``AutoControlActionException``. ``canonical_action`` builds a +canonical dict directly. + +Executor command +---------------- + +``AC_cua_command`` normalizes a ``payload`` from ``source`` (``anthropic`` / ``openai`` +/ ``canonical``) and returns ``{canonical, command}``. It is exposed as the MCP tool +``ac_cua_command`` and as a Script Builder command under **Native UI**. diff --git a/docs/source/Eng/eng_index.rst b/docs/source/Eng/eng_index.rst index 0b46abc9..832f3a1f 100644 --- a/docs/source/Eng/eng_index.rst +++ b/docs/source/Eng/eng_index.rst @@ -173,6 +173,7 @@ Comprehensive guides for all AutoControl features. doc/new_features/v148_features_doc doc/new_features/v149_features_doc doc/new_features/v150_features_doc + doc/new_features/v151_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/docs/source/Zh/doc/new_features/v151_features_doc.rst b/docs/source/Zh/doc/new_features/v151_features_doc.rst new file mode 100644 index 00000000..1de89fa0 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v151_features_doc.rst @@ -0,0 +1,39 @@ +標準化 Computer-Use 動作結構 +============================ + +``tool_use_schema`` 把 AC_* 命令*簽章*匯出為工具定義,``coordinate_space`` 縮放模型網格——但兩者都不*正規化進來的 +動作酬載*。Anthropic 的 computer-use 工具發出 ``{action:"left_click", coordinate:[x,y]}``,OpenAI 的 CUA 發出 +``{type:"click", x, y, button}``——先前沒有把這些異質形狀對應到標準動作、再對應到可執行 AC_* 命令的轉接器, +整合者只能手寫膠水程式。 + +純標準函式庫的字典對應(選用 ``scale`` callable 套用座標空間縮放),完全可無頭測試。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import (from_anthropic, from_openai_cua, to_ac_command, + canonical_action) + + # Anthropic agent 輸出 -> 標準 -> 可執行 AC 動作。 + canonical = from_anthropic({"action": "left_click", "coordinate": [120, 80]}) + command = to_ac_command(canonical) + # -> ["AC_click_mouse", {"mouse_keycode": "mouse_left", "x": 120, "y": 80}] + + # OpenAI CUA,含 模型->實體 座標縮放。 + cmd = to_ac_command(from_openai_cua({"type": "scroll", "x": 5, "y": 6, + "scroll_y": 120}), + scale=lambda x, y: (x * 2, y * 2)) + +``from_anthropic`` / ``from_openai_cua`` 把各供應商酬載對應為標準 ``{type, x, y, text, …}``(click、double/right/ +middle click、move、type、key、scroll、screenshot)。``to_ac_command`` 把標準動作對應為 ``[command_name, params]`` +AC 動作(``AC_click_mouse`` / ``AC_set_mouse_position`` / ``AC_write`` / ``AC_hotkey`` / ``AC_mouse_scroll`` / +``AC_screenshot``),並對座標套用 ``scale``;無法對應的類型會丟出 ``AutoControlActionException``。``canonical_action`` +直接建立標準字典。 + +執行器命令 +---------- + +``AC_cua_command`` 從 ``source``(``anthropic`` / ``openai`` / ``canonical``)正規化 ``payload`` 並回傳 +``{canonical, command}``。它以 MCP 工具 ``ac_cua_command`` 以及 Script Builder 中 **Native UI** 分類下的命令提供。 diff --git a/docs/source/Zh/zh_index.rst b/docs/source/Zh/zh_index.rst index 48ff0352..d43485b8 100644 --- a/docs/source/Zh/zh_index.rst +++ b/docs/source/Zh/zh_index.rst @@ -173,6 +173,7 @@ AutoControl 所有功能的完整使用指南。 doc/new_features/v148_features_doc doc/new_features/v149_features_doc doc/new_features/v150_features_doc + doc/new_features/v151_features_doc doc/ocr_backends/ocr_backends_doc doc/observability/observability_doc doc/operations_layer/operations_layer_doc diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 4ce366ce..5e44e1d3 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -361,6 +361,10 @@ from je_auto_control.utils.window_geometry import ( client_point, client_to_screen, frame_insets, get_client_rect, ) +# Canonical computer-use action schema (normalize Anthropic / OpenAI -> AC_*) +from je_auto_control.utils.cua_action import ( + canonical_action, from_anthropic, from_openai_cua, to_ac_command, +) # CI workflow annotations (GitHub Actions) from je_auto_control.utils.ci_annotations import ( emit_annotations, format_annotation, @@ -1235,6 +1239,10 @@ def start_autocontrol_gui(*args, **kwargs): "client_to_screen", "get_client_rect", "client_point", + "canonical_action", + "from_anthropic", + "from_openai_cua", + "to_ac_command", "emit_annotations", "format_annotation", "ClipboardHistory", "default_clipboard_history", "analyze_heal_log", "heal_stats", "scan_secrets", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index c713d168..6b17f704 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -2910,6 +2910,16 @@ def _add_screen_state_specs(specs: List[CommandSpec]) -> None: def _add_set_of_marks_specs(specs: List[CommandSpec]) -> None: + specs.append(CommandSpec( + "AC_cua_command", "Native UI", "Computer-Use: Map Action", + fields=( + FieldSpec("payload", FieldType.STRING, + placeholder='{"action":"left_click","coordinate":[x,y]}'), + FieldSpec("source", FieldType.ENUM, optional=True, default="canonical", + choices=("canonical", "anthropic", "openai")), + ), + description="Map an Anthropic / OpenAI computer-use action to an AC command.", + )) specs.append(CommandSpec( "AC_mark_screen", "Native UI", "Set-of-Marks: Number Elements", fields=( diff --git a/je_auto_control/utils/cua_action/__init__.py b/je_auto_control/utils/cua_action/__init__.py new file mode 100644 index 00000000..e579377c --- /dev/null +++ b/je_auto_control/utils/cua_action/__init__.py @@ -0,0 +1,7 @@ +"""Canonical computer-use action schema (normalize Anthropic / OpenAI -> AC_*).""" +from je_auto_control.utils.cua_action.cua_action import ( + canonical_action, from_anthropic, from_openai_cua, to_ac_command, +) + +__all__ = ["canonical_action", "from_anthropic", "from_openai_cua", + "to_ac_command"] diff --git a/je_auto_control/utils/cua_action/cua_action.py b/je_auto_control/utils/cua_action/cua_action.py new file mode 100644 index 00000000..d1987c2a --- /dev/null +++ b/je_auto_control/utils/cua_action/cua_action.py @@ -0,0 +1,120 @@ +"""Canonical computer-use action schema — normalize Anthropic / OpenAI payloads to AC_*. + +``tool_use_schema`` exports the AC_* command *signatures* as tool definitions and +``coordinate_space`` rescales a model grid — but neither *normalizes an inbound action +payload*. Anthropic emits ``{action:"left_click", coordinate:[x,y]}``, OpenAI's CUA +emits ``{type:"click", x, y, button}`` — there is no adapter mapping these heterogeneous +shapes onto a canonical action and then onto a runnable AC_* command. Integrators +hand-write this glue today. + +All pure-stdlib dict mapping (an optional ``scale`` callable applies coordinate-space +rescaling), so it is fully headless-testable. Imports no ``PySide6``. +""" +from typing import Any, Callable, Dict, List, Mapping, Optional + +from je_auto_control.utils.exception.exceptions import AutoControlActionException + +# Anthropic computer-use "action" -> canonical type. +_ANTHROPIC = {"left_click": "click", "right_click": "right_click", + "middle_click": "middle_click", "double_click": "double_click", + "mouse_move": "move", "left_click_drag": "drag", "type": "type", + "key": "key", "scroll": "scroll", "screenshot": "screenshot", + "cursor_position": "cursor_position"} + +# canonical click type -> AC mouse button keycode. +_CLICK_BUTTONS = {"click": "mouse_left", "double_click": "mouse_left", + "right_click": "mouse_right", "middle_click": "mouse_middle"} + + +def canonical_action(action_type: str, **fields: Any) -> Dict[str, Any]: + """Build a canonical action dict ``{type, …}`` dropping ``None`` fields.""" + result: Dict[str, Any] = {"type": action_type} + result.update({key: value for key, value in fields.items() if value is not None}) + return result + + +def _xy(coordinate) -> Dict[str, int]: + if not coordinate: + return {} + return {"x": int(coordinate[0]), "y": int(coordinate[1])} + + +def from_anthropic(tool_input: Mapping[str, Any]) -> Dict[str, Any]: + """Normalize an Anthropic computer-use tool input to a canonical action.""" + action = tool_input.get("action", "") + fields: Dict[str, Any] = _xy(tool_input.get("coordinate")) + if tool_input.get("text") is not None: + fields["text"] = tool_input["text"] + if action == "scroll": + fields["direction"] = tool_input.get("scroll_direction") + fields["amount"] = tool_input.get("scroll_amount") + return canonical_action(_ANTHROPIC.get(action, action), **fields) + + +def _openai_click_type(item: Mapping[str, Any]) -> str: + button = item.get("button", "left") + return {"right": "right_click", "wheel": "middle_click", + "middle": "middle_click"}.get(button, "click") + + +def from_openai_cua(item: Mapping[str, Any]) -> Dict[str, Any]: + """Normalize an OpenAI CUA ``computer_call`` item to a canonical action.""" + kind = item.get("type", "") + fields: Dict[str, Any] = {} + if item.get("x") is not None and item.get("y") is not None: + fields["x"], fields["y"] = int(item["x"]), int(item["y"]) + if kind == "click": + kind = _openai_click_type(item) + elif kind == "keypress": + kind, fields["text"] = "key", "+".join(item.get("keys", [])) + elif kind == "type": + fields["text"] = item.get("text") + elif kind == "scroll": + fields["scroll_x"] = item.get("scroll_x") + fields["scroll_y"] = item.get("scroll_y") + return canonical_action(kind, **fields) + + +def _scroll_value(action: Mapping[str, Any]) -> int: + if action.get("amount") is not None: + sign = 1 if action.get("direction") in ("up", "left") else -1 + return sign * int(action["amount"]) + if action.get("scroll_y") is not None: + return -int(action["scroll_y"]) # OpenAI: +y is downward + return 0 + + +def _point(action: Mapping[str, Any], + scale: Optional[Callable[[int, int], Any]]) -> Dict[str, int]: + if action.get("x") is None or action.get("y") is None: + return {} + x, y = int(action["x"]), int(action["y"]) + if scale is not None: + x, y = (int(coord) for coord in scale(x, y)) + return {"x": x, "y": y} + + +def to_ac_command(action: Mapping[str, Any], *, + scale: Optional[Callable[[int, int], Any]] = None) -> List[Any]: + """Map a canonical action to a runnable ``[command_name, params]`` AC action. + + ``scale`` optionally remaps ``(x, y)`` (e.g. ``coordinate_space`` model→physical). + Raises ``AutoControlActionException`` for an action with no AC mapping. + """ + kind = action.get("type") + point = _point(action, scale) + if kind in _CLICK_BUTTONS: + return ["AC_click_mouse", {"mouse_keycode": _CLICK_BUTTONS[kind], **point}] + keys = [part.strip() for part in str(action.get("text", "")).split("+") + if part.strip()] + builders = { + "move": lambda: ["AC_set_mouse_position", point], + "type": lambda: ["AC_write", {"write_string": str(action.get("text", ""))}], + "key": lambda: ["AC_hotkey", {"key_code_list": keys}], + "scroll": lambda: ["AC_mouse_scroll", + {"scroll_value": _scroll_value(action), **point}], + "screenshot": lambda: ["AC_screenshot", {}], + } + if kind in builders: + return builders[kind]() + raise AutoControlActionException(f"no AC mapping for action type: {kind!r}") diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index fe457228..44c147d5 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -3814,6 +3814,21 @@ def _client_point(title: str, x: Any, y: Any) -> Dict[str, Any]: "point": list(point) if point is not None else None} +def _cua_command(payload: Any, source: str = "canonical") -> Dict[str, Any]: + """Adapter: normalize a computer-use payload and map it to an AC_* command.""" + import json + from je_auto_control.utils.cua_action import (from_anthropic, from_openai_cua, + to_ac_command) + if isinstance(payload, str): + payload = json.loads(payload) + normalizers = {"anthropic": from_anthropic, "openai": from_openai_cua, + "canonical": dict} + if source not in normalizers: + raise AutoControlActionException(f"unknown cua source: {source!r}") + canonical = normalizers[source](payload) + return {"canonical": canonical, "command": to_ac_command(canonical)} + + def _with_modifiers(modifiers: Any, actions: Any) -> Dict[str, Any]: """Adapter: run nested actions while modifier keys are held down.""" import json @@ -5568,6 +5583,7 @@ def __init__(self): "AC_perceptual_diff": _perceptual_diff, "AC_get_client_rect": _get_client_rect, "AC_client_point": _client_point, + "AC_cua_command": _cua_command, "AC_tile_rect": _tile_rect, "AC_grid_rects": _grid_rects, "AC_cascade_rects": _cascade_rects, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 9f50b3ae..fe2b2f6b 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -3258,6 +3258,24 @@ def window_geometry_tools() -> List[MCPTool]: ] +def cua_action_tools() -> List[MCPTool]: + return [ + MCPTool( + name="ac_cua_command", + description=("Normalize a computer-use action 'payload' from 'source' " + "(anthropic / openai / canonical) and map it to a runnable " + "AC_* command. Returns {canonical, command:[name, params]}. " + "Bridges Anthropic/OpenAI agent outputs to AutoControl."), + input_schema=schema({ + "payload": {"type": "object"}, + "source": {"type": "string"}}, + required=["payload"]), + handler=h.cua_command, + annotations=READ_ONLY, + ), + ] + + def ssim_tools() -> List[MCPTool]: return [ MCPTool( @@ -6765,8 +6783,8 @@ def media_assert_tools() -> List[MCPTool]: hsv_segment_tools, text_regions_tools, edge_lines_tools, expect_poll_tools, locator_chain_tools, rich_clipboard_tools, img_histogram_tools, motion_regions_tools, window_zorder_tools, soft_assert_tools, - perceptual_diff_tools, window_geometry_tools, plugin_sdk_tools, - governance_tools, + perceptual_diff_tools, window_geometry_tools, cua_action_tools, + plugin_sdk_tools, governance_tools, credential_lease_tools, egress_tools, approval_testing_tools, trajectory_eval_tools, compliance_tools, agent_trace_tools, video_report_tools, fuzzy_tools, artifact_store_tools, image_dedup_tools, diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 06fdeb99..1425e1ef 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -2304,6 +2304,11 @@ def client_point(title, x, y): return _client_point(title, x, y) +def cua_command(payload, source="canonical"): + from je_auto_control.utils.executor.action_executor import _cua_command + return _cua_command(payload, source) + + def detect_drift(reference, current, threshold=0.25, bins=10): from je_auto_control.utils.executor.action_executor import _detect_drift return _detect_drift(reference, current, threshold, bins) diff --git a/test/unit_test/headless/test_cua_action_batch.py b/test/unit_test/headless/test_cua_action_batch.py new file mode 100644 index 00000000..ed60523f --- /dev/null +++ b/test/unit_test/headless/test_cua_action_batch.py @@ -0,0 +1,84 @@ +"""Headless tests for canonical computer-use action mapping. No Qt.""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.cua_action import ( + canonical_action, from_anthropic, from_openai_cua, to_ac_command, +) +from je_auto_control.utils.exception.exceptions import AutoControlActionException + + +def test_canonical_action_drops_none(): + assert canonical_action("click", x=1, y=2, text=None) == {"type": "click", + "x": 1, "y": 2} + + +def test_from_anthropic_click_key_scroll(): + assert from_anthropic({"action": "left_click", "coordinate": [100, 200]}) == { + "type": "click", "x": 100, "y": 200} + assert from_anthropic({"action": "key", "text": "ctrl+s"}) == { + "type": "key", "text": "ctrl+s"} + scroll = from_anthropic({"action": "scroll", "coordinate": [10, 20], + "scroll_direction": "down", "scroll_amount": 3}) + assert scroll["type"] == "scroll" and scroll["amount"] == 3 + + +def test_from_openai_click_button_and_keypress(): + assert from_openai_cua({"type": "click", "x": 5, "y": 6, + "button": "right"})["type"] == "right_click" + assert from_openai_cua({"type": "keypress", "keys": ["ctrl", "c"]}) == { + "type": "key", "text": "ctrl+c"} + + +def test_to_ac_command_click_key_scroll(): + assert to_ac_command({"type": "click", "x": 100, "y": 200}) == [ + "AC_click_mouse", {"mouse_keycode": "mouse_left", "x": 100, "y": 200}] + assert to_ac_command({"type": "key", "text": "ctrl+s"}) == [ + "AC_hotkey", {"key_code_list": ["ctrl", "s"]}] + assert to_ac_command({"type": "type", "text": "hi"}) == [ + "AC_write", {"write_string": "hi"}] + assert to_ac_command({"type": "scroll", "x": 1, "y": 2, "scroll_y": 120}) == [ + "AC_mouse_scroll", {"scroll_value": -120, "x": 1, "y": 2}] + + +def test_to_ac_command_applies_scale(): + assert to_ac_command({"type": "move", "x": 50, "y": 60}, + scale=lambda x, y: (x * 2, y * 2)) == [ + "AC_set_mouse_position", {"x": 100, "y": 120}] + + +def test_to_ac_command_unsupported_raises(): + with pytest.raises(AutoControlActionException): + to_ac_command({"type": "wait"}) + + +def test_round_trip_anthropic_to_ac(): + canonical = from_anthropic({"action": "right_click", "coordinate": [7, 8]}) + assert to_ac_command(canonical) == [ + "AC_click_mouse", {"mouse_keycode": "mouse_right", "x": 7, "y": 8}] + + +# --- wiring --------------------------------------------------------------- + +def test_wiring(): + assert "AC_cua_command" in set(ac.executor.known_commands()) + from je_auto_control.utils.mcp_server.tools import build_default_tool_registry + names = {t.name for t in build_default_tool_registry()} + assert "ac_cua_command" in names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert "AC_cua_command" in specs + + +def test_executor_normalizes_and_maps(): + from je_auto_control.utils.executor.action_executor import _cua_command + result = _cua_command({"action": "left_click", "coordinate": [3, 4]}, + source="anthropic") + assert result["command"] == ["AC_click_mouse", + {"mouse_keycode": "mouse_left", "x": 3, "y": 4}] + + +def test_facade_exports(): + for attr in ("canonical_action", "from_anthropic", "from_openai_cua", + "to_ac_command"): + assert hasattr(ac, attr) and attr in ac.__all__