diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 22be7c21..1e16dbb5 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -1,5 +1,11 @@ # What's New — AutoControl +## What's new (2026-06-24) — Display-Scale / Visual-DPI Detection + +Infer which display scale (DPI) a template renders at — and how confidently. Full reference: [`docs/source/Eng/doc/new_features/v189_features_doc.rst`](docs/source/Eng/doc/new_features/v189_features_doc.rst). + +- **`detect_scale` / `scale_sweep`** (`AC_detect_scale`, `AC_scale_sweep`): a template cropped at 100% scale won't match on a 150%-DPI machine, and `match_template` returns only the single best match — discarding the per-scale scores. This keeps the whole profile: `scale_sweep` scores the template at every scale, and `detect_scale` reports the winning scale as a DPI inference (`scale_percent`) with a confidence `margin` (how far it beats the runner-up). Reuses `visual_match._score_map` per scale; source is any ndarray / path / PIL image (or the live screen); scales default to the common Windows values. cv2/numpy lazily imported. No `PySide6`. + ## What's new (2026-06-24) — Image Quality Scoring (sharpness / contrast / brightness gate) Refuse to OCR a blurry or washed-out frame — score quality and gate before recognition. Full reference: [`docs/source/Eng/doc/new_features/v188_features_doc.rst`](docs/source/Eng/doc/new_features/v188_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v189_features_doc.rst b/docs/source/Eng/doc/new_features/v189_features_doc.rst new file mode 100644 index 00000000..95f050ba --- /dev/null +++ b/docs/source/Eng/doc/new_features/v189_features_doc.rst @@ -0,0 +1,47 @@ +Display-Scale / Visual-DPI Detection +==================================== + +A template cropped at 100% display scale will not match pixel-for-pixel on a +machine running at 150% DPI — everything is 1.5x bigger. ``visual_match. +match_template`` *can* sweep scales, but it returns only the single best match's +location and throws the per-scale scores away. ``scale_detect`` keeps the whole +profile: it scores the template against the haystack at a range of scales and +reports **which scale wins, by how much**, so an automation can infer the +effective UI scale / DPI and how confident that inference is. + +* :func:`scale_sweep` — the per-scale score profile (every scale's best match), +* :func:`detect_scale` — the winning scale as a DPI inference with a confidence + margin. + +It reuses ``visual_match._score_map`` (the full ``matchTemplate`` surface, +oriented higher = better) for each scale, so the source is any ndarray / path / +PIL image (or the live screen). cv2 / numpy are lazily imported. Imports no +``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import detect_scale, scale_sweep + + detect_scale("button.png", "screen.png") + # {"scale": 1.5, "scale_percent": 150, "score": 0.98, "center": [...], + # "margin": 0.62, "candidates": [...]} + + scale_sweep("button.png", scales=[1.0, 1.25, 1.5, 1.75, 2.0]) + # [{"scale": 1.0, "score": .., "center": [..]}, {"scale": 1.25, ...}, ...] + +``scales`` defaults to the common Windows display scales +``(1.0, 1.25, 1.5, 1.75, 2.0)``. ``margin`` is how far the winning scale beats the +runner-up — a low margin means the inference is ambiguous. Scales at which the +template is larger than the haystack are skipped; ``detect_scale`` returns +``None`` when none fit. Omit ``haystack`` to match against the live screen +(``region`` applies to that grab). + +Executor commands +----------------- + +``AC_detect_scale`` and ``AC_scale_sweep`` (``template`` / ``haystack`` / +``region`` / ``scales`` / ``method``). They are exposed as read-only ``ac_*`` MCP +tools and as Script Builder commands under **Image**. diff --git a/docs/source/Zh/doc/new_features/v189_features_doc.rst b/docs/source/Zh/doc/new_features/v189_features_doc.rst new file mode 100644 index 00000000..e38de475 --- /dev/null +++ b/docs/source/Zh/doc/new_features/v189_features_doc.rst @@ -0,0 +1,40 @@ +顯示縮放 / 視覺 DPI 偵測 +======================= + +在 100% 顯示縮放下裁切的模板,在 150% DPI 的機器上不會逐像素吻合——一切都放大了 1.5 倍。 +``visual_match.match_template`` *可以* 掃過多個縮放,但它只回傳單一最佳吻合的位置,並把各縮放的 +分數丟棄。``scale_detect`` 保留整個剖面:它在一系列縮放下對 haystack 評分模板,並回報**哪個縮放 +勝出、勝出多少**,讓自動化能推測有效的 UI 縮放 / DPI,以及該推測的信心。 + +* :func:`scale_sweep` ——逐縮放的分數剖面(每個縮放的最佳吻合), +* :func:`detect_scale` ——勝出的縮放作為 DPI 推測,並附信心 margin。 + +它對每個縮放重用 ``visual_match._score_map``(完整的 ``matchTemplate`` 表面,方向為越高越好), +因此來源可為任何 ndarray / 路徑 / PIL 影像(或存活螢幕)。cv2 / numpy 為延遲匯入。不匯入 +``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import detect_scale, scale_sweep + + detect_scale("button.png", "screen.png") + # {"scale": 1.5, "scale_percent": 150, "score": 0.98, "center": [...], + # "margin": 0.62, "candidates": [...]} + + scale_sweep("button.png", scales=[1.0, 1.25, 1.5, 1.75, 2.0]) + # [{"scale": 1.0, "score": .., "center": [..]}, {"scale": 1.25, ...}, ...] + +``scales`` 預設為常見的 Windows 顯示縮放 ``(1.0, 1.25, 1.5, 1.75, 2.0)``。``margin`` 是勝出縮放 +領先次佳者的幅度——margin 低代表推測模稜兩可。模板大於 haystack 的縮放會被略過;當沒有任何縮放 +吻合時 ``detect_scale`` 回傳 ``None``。省略 ``haystack`` 即對存活螢幕比對(``region`` 套用於該 +擷取)。 + +執行器指令 +---------- + +``AC_detect_scale`` 與 ``AC_scale_sweep``(``template`` / ``haystack`` / ``region`` / +``scales`` / ``method``)。皆以唯讀 ``ac_*`` MCP 工具及 Script Builder 指令(位於 **Image** +分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index a9e18c73..405a84b5 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -82,6 +82,8 @@ from je_auto_control.utils.image_quality import ( image_quality, is_blurry, quality_gate, ) +# Display-scale / visual-DPI detection (per-scale match profile) +from je_auto_control.utils.scale_detect import detect_scale, scale_sweep # VLM element locator (headless) from je_auto_control.utils.vision import ( VLMNotAvailableError, click_by_description, locate_by_description, @@ -1657,6 +1659,7 @@ def start_autocontrol_gui(*args, **kwargs): "list_clipboard_formats", "clipboard_formats", "plan_file_drop", "drop_files", "image_quality", "is_blurry", "quality_gate", + "detect_scale", "scale_sweep", # VLM locator "VLMNotAvailableError", "locate_by_description", "click_by_description", "verify_description", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 76b9b74a..43d2eeb7 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -763,6 +763,30 @@ def _add_image_specs(specs: List[CommandSpec]) -> None: ), description="Pass/fail an image for OCR readability with named issues.", )) + specs.append(CommandSpec( + "AC_detect_scale", "Image", "Detect Display Scale (DPI)", + fields=( + FieldSpec("template", FieldType.FILE_PATH), + FieldSpec("haystack", FieldType.FILE_PATH, optional=True), + FieldSpec("region", FieldType.STRING, optional=True, + placeholder=_REGION_PLACEHOLDER), + FieldSpec("scales", FieldType.STRING, optional=True, + placeholder="[1.0, 1.25, 1.5, 1.75, 2.0]"), + ), + description="Infer the display scale a template renders at (visual DPI).", + )) + specs.append(CommandSpec( + "AC_scale_sweep", "Image", "Scale Sweep (per-scale scores)", + fields=( + FieldSpec("template", FieldType.FILE_PATH), + FieldSpec("haystack", FieldType.FILE_PATH, optional=True), + FieldSpec("region", FieldType.STRING, optional=True, + placeholder=_REGION_PLACEHOLDER), + FieldSpec("scales", FieldType.STRING, optional=True, + placeholder="[1.0, 1.25, 1.5, 1.75, 2.0]"), + ), + description="Per-scale match-score profile of a template.", + )) specs.append(CommandSpec( "AC_changed_regions", "Image", "Changed Regions (motion)", fields=( diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index bf87165f..0f3eaf37 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -4298,6 +4298,35 @@ def _quality_gate(source: Any = None, region: Any = None, min_contrast=float(min_contrast)) +def _coerce_scales(scales: Any): + """Normalise a scales argument (JSON '[1.0,1.5]' string / list / None).""" + import json + if isinstance(scales, str): + return json.loads(scales) if scales.strip() else None + return scales + + +def _detect_scale(template: Any, haystack: Any = None, region: Any = None, + scales: Any = None, + method: str = "ccoeff_normed") -> Dict[str, Any]: + """Adapter: infer the display scale a template renders at (visual DPI).""" + from je_auto_control.utils.scale_detect import detect_scale + result = detect_scale(template, haystack, region=_coerce_region(region), + scales=_coerce_scales(scales), method=str(method)) + return {"found": result is not None, "result": result} + + +def _scale_sweep(template: Any, haystack: Any = None, region: Any = None, + scales: Any = None, + method: str = "ccoeff_normed") -> Dict[str, Any]: + """Adapter: per-scale match-score profile of a template.""" + from je_auto_control.utils.scale_detect import scale_sweep + return {"sweep": scale_sweep(template, haystack, + region=_coerce_region(region), + scales=_coerce_scales(scales), + method=str(method))} + + def _image_histogram(source: Any = None, bins: Any = 32, space: str = "hsv", region: Any = None) -> Dict[str, Any]: """Adapter: per-channel colour histogram of an image / the screen.""" @@ -6522,6 +6551,8 @@ def __init__(self): "AC_drop_files": _drop_files, "AC_image_quality": _image_quality, "AC_quality_gate": _quality_gate, + "AC_detect_scale": _detect_scale, + "AC_scale_sweep": _scale_sweep, "AC_image_histogram": _image_histogram, "AC_histogram_changed": _histogram_changed, "AC_changed_regions": _changed_regions, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 54e0425c..efd97227 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -3411,6 +3411,38 @@ def img_histogram_tools() -> List[MCPTool]: handler=h.quality_gate, annotations=READ_ONLY, ), + MCPTool( + name="ac_detect_scale", + description=("Infer the display scale a 'template' renders at (visual " + "DPI) by scoring it against 'haystack' (default screen) " + "across 'scales'. Returns {found, result:{scale, " + "scale_percent, score, center, margin, candidates}}."), + input_schema=schema({ + "template": {"type": "string"}, + "haystack": {"type": "string"}, + "region": {"type": "array", "items": {"type": "integer"}}, + "scales": {"type": "array", "items": {"type": "number"}}, + "method": {"type": "string"}}, + required=["template"]), + handler=h.detect_scale, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_scale_sweep", + description=("Per-scale match-score profile of a 'template' against " + "'haystack' (default screen): {sweep:[{scale, score, x, " + "y, width, height, center}]} — the raw scores match_" + "template discards."), + input_schema=schema({ + "template": {"type": "string"}, + "haystack": {"type": "string"}, + "region": {"type": "array", "items": {"type": "integer"}}, + "scales": {"type": "array", "items": {"type": "number"}}, + "method": {"type": "string"}}, + required=["template"]), + handler=h.scale_sweep, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 15cc8971..3c2016e7 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -2520,6 +2520,18 @@ def quality_gate(source=None, region=None, min_sharpness=100.0, return _quality_gate(source, region, min_sharpness, min_contrast) +def detect_scale(template, haystack=None, region=None, scales=None, + method="ccoeff_normed"): + from je_auto_control.utils.executor.action_executor import _detect_scale + return _detect_scale(template, haystack, region, scales, method) + + +def scale_sweep(template, haystack=None, region=None, scales=None, + method="ccoeff_normed"): + from je_auto_control.utils.executor.action_executor import _scale_sweep + return _scale_sweep(template, haystack, region, scales, method) + + def image_histogram(source=None, bins=32, space="hsv", region=None): from je_auto_control.utils.executor.action_executor import _image_histogram return _image_histogram(source, bins, space, region) diff --git a/je_auto_control/utils/scale_detect/__init__.py b/je_auto_control/utils/scale_detect/__init__.py new file mode 100644 index 00000000..666f9725 --- /dev/null +++ b/je_auto_control/utils/scale_detect/__init__.py @@ -0,0 +1,6 @@ +"""Detect the display scale / visual DPI a template renders at (per-scale profile).""" +from je_auto_control.utils.scale_detect.scale_detect import ( + detect_scale, scale_sweep, +) + +__all__ = ["detect_scale", "scale_sweep"] diff --git a/je_auto_control/utils/scale_detect/scale_detect.py b/je_auto_control/utils/scale_detect/scale_detect.py new file mode 100644 index 00000000..4e63fbd1 --- /dev/null +++ b/je_auto_control/utils/scale_detect/scale_detect.py @@ -0,0 +1,78 @@ +"""Detect the display scale a template renders at (visual DPI). + +A template cropped at 100% display scale will not match pixel-for-pixel on a +machine running at 150% DPI — everything is 1.5x bigger. ``visual_match. +match_template`` *can* sweep scales, but it returns only the single best match's +location and throws the per-scale scores away. ``scale_detect`` keeps the whole +profile: it scores the template against the haystack at a range of scales and +reports *which scale wins, by how much*, so an automation can infer the effective +UI scale / DPI and how confident that inference is. + +It reuses ``visual_match._score_map`` (the full ``matchTemplate`` surface, +oriented higher = better) for each scale, so the source is any ndarray / path / +PIL image (or the live screen). cv2 / numpy are lazily imported. Imports no +``PySide6``. +""" +from typing import Any, Dict, List, Optional, Sequence + +ImageSource = Any +# Common Windows display scales (100% / 125% / 150% / 175% / 200%). +_DEFAULT_SCALES = (1.0, 1.25, 1.5, 1.75, 2.0) + + +def _score_at(template: ImageSource, haystack: Optional[ImageSource], + region: Optional[Sequence[int]], method: str, + scale: float) -> Optional[Dict[str, Any]]: + import cv2 + from je_auto_control.utils.visual_match.visual_match import _score_map + score_map, tmpl = _score_map(template, haystack, region=region, + method=method, scale=scale) + if score_map is None: + return None # template larger than haystack at this scale + _min_v, max_v, _min_loc, max_loc = cv2.minMaxLoc(score_map) + height, width = int(tmpl.shape[0]), int(tmpl.shape[1]) + x, y = int(max_loc[0]), int(max_loc[1]) + return {"scale": float(scale), "score": float(max_v), "x": x, "y": y, + "width": width, "height": height, + "center": [x + width // 2, y + height // 2]} + + +def scale_sweep(template: ImageSource, haystack: Optional[ImageSource] = None, *, + region: Optional[Sequence[int]] = None, + scales: Optional[Sequence[float]] = None, + method: str = "ccoeff_normed") -> List[Dict[str, Any]]: + """Score ``template`` against the haystack at each scale. + + Returns ``[{scale, score, x, y, width, height, center}]`` (best match per + scale), skipping scales at which the template is larger than the haystack. + """ + chosen = tuple(scales) if scales else _DEFAULT_SCALES + results = [] + for scale in chosen: + entry = _score_at(template, haystack, region, method, float(scale)) + if entry is not None: + results.append(entry) + return results + + +def detect_scale(template: ImageSource, haystack: Optional[ImageSource] = None, *, + region: Optional[Sequence[int]] = None, + scales: Optional[Sequence[float]] = None, + method: str = "ccoeff_normed") -> Optional[Dict[str, Any]]: + """Infer the display scale ``template`` renders at (its visual DPI). + + Returns ``{scale, scale_percent, score, center, margin, candidates}`` — the + winning scale, its percentage, the match score, and ``margin`` (how far it + beats the runner-up: a confidence in the inference). ``None`` if no scale + matched (template never fit the haystack). + """ + sweep = scale_sweep(template, haystack, region=region, scales=scales, + method=method) + if not sweep: + return None + ranked = sorted(sweep, key=lambda entry: entry["score"], reverse=True) + best = ranked[0] + margin = best["score"] - ranked[1]["score"] if len(ranked) > 1 else best["score"] + return {"scale": best["scale"], "scale_percent": round(best["scale"] * 100), + "score": best["score"], "center": best["center"], + "margin": float(margin), "candidates": sweep} diff --git a/test/unit_test/headless/test_scale_detect_batch.py b/test/unit_test/headless/test_scale_detect_batch.py new file mode 100644 index 00000000..2dd18ba1 --- /dev/null +++ b/test/unit_test/headless/test_scale_detect_batch.py @@ -0,0 +1,84 @@ +"""Headless tests for display-scale / visual-DPI detection (cv2 synthetic frames).""" +import pytest + +import je_auto_control as ac + +np = pytest.importorskip("numpy") +cv2 = pytest.importorskip("cv2") + +from je_auto_control.utils.scale_detect import detect_scale, scale_sweep # noqa: E402 + + +def _template(): + rng = np.random.default_rng(1) + return rng.integers(0, 256, (40, 40, 3)).astype("uint8") + + +def _haystack_at(template, factor): + """Embed the template resized by ``factor`` into a blank canvas.""" + size = int(40 * factor) + big = cv2.resize(template, (size, size), interpolation=cv2.INTER_LINEAR) + canvas = np.zeros((260, 260, 3), "uint8") + canvas[60:60 + size, 50:50 + size] = big + return canvas + + +def test_detect_scale_finds_the_rendering_scale(): + template = _template() + result = detect_scale(template, _haystack_at(template, 1.5), + scales=(1.0, 1.25, 1.5, 1.75, 2.0)) + assert result["scale"] == pytest.approx(1.5) + assert result["scale_percent"] == 150 + assert result["margin"] > 0.3 # clearly beats the runner-up + # centre near the embedded 60x60 block at (50,60) + cx, cy = result["center"] + assert 70 <= cx <= 110 and 80 <= cy <= 120 + + +def test_detect_scale_at_unity(): + template = _template() + result = detect_scale(template, _haystack_at(template, 1.0)) + assert result["scale"] == pytest.approx(1.0) + assert result["scale_percent"] == 100 + + +def test_scale_sweep_returns_full_profile(): + template = _template() + sweep = scale_sweep(template, _haystack_at(template, 1.25), + scales=(1.0, 1.25, 1.5)) + assert [round(c["scale"], 2) for c in sweep] == [1.0, 1.25, 1.5] + best = max(sweep, key=lambda c: c["score"]) + assert best["scale"] == pytest.approx(1.25) + assert {"scale", "score", "x", "y", "width", "height", "center"} <= set(sweep[0]) + + +def test_detect_scale_none_when_template_too_big(): + template = _template() + assert detect_scale(template, np.zeros((10, 10, 3), "uint8")) is None + assert scale_sweep(template, np.zeros((10, 10, 3), "uint8")) == [] + + +# --- wiring --------------------------------------------------------------- + +def test_executor_pure_path(): + template = _template() + from je_auto_control.utils.executor.action_executor import _detect_scale + # pass ndarrays straight through (executor coerces only str args) + out = _detect_scale(template, _haystack_at(template, 2.0)) + assert out["found"] is True and out["result"]["scale_percent"] == 200 + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_detect_scale", "AC_scale_sweep"} <= known + from je_auto_control.utils.mcp_server.tools import build_default_tool_registry + names = {t.name for t in build_default_tool_registry()} + assert {"ac_detect_scale", "ac_scale_sweep"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_detect_scale", "AC_scale_sweep"} <= specs + + +def test_facade_exports(): + for name in ("detect_scale", "scale_sweep"): + assert hasattr(ac, name) and name in ac.__all__