From 1d8e86e8ec0d3a3e348b850e9edfb5c86419c57d Mon Sep 17 00:00:00 2001 From: JeffreyChen Date: Wed, 24 Jun 2026 14:25:15 +0800 Subject: [PATCH] Add image_quality: sharpness/contrast/brightness gate before OCR OCR and template matching quietly fail on a blurry, washed-out or too-dark capture, and the caller can't tell a missing element from an unreadable one. Measure sharpness (variance of the Laplacian), contrast (grayscale stddev) and brightness (mean), and gate on them with named issues (blurry / low_contrast / too_dark / too_bright) so a script can pre-process or re-capture before OCR. Reuses visual_match's grayscale loader; cv2/numpy lazily imported. --- WHATS_NEW.md | 6 ++ .../doc/new_features/v188_features_doc.rst | 47 +++++++++++ .../Zh/doc/new_features/v188_features_doc.rst | 42 ++++++++++ je_auto_control/__init__.py | 5 ++ .../gui/script_builder/command_schema.py | 22 ++++++ .../utils/executor/action_executor.py | 26 ++++++ .../utils/image_quality/__init__.py | 6 ++ .../utils/image_quality/image_quality.py | 71 +++++++++++++++++ .../utils/mcp_server/tools/_factories.py | 26 ++++++ .../utils/mcp_server/tools/_handlers.py | 11 +++ .../headless/test_image_quality_batch.py | 79 +++++++++++++++++++ 11 files changed, 341 insertions(+) create mode 100644 docs/source/Eng/doc/new_features/v188_features_doc.rst create mode 100644 docs/source/Zh/doc/new_features/v188_features_doc.rst create mode 100644 je_auto_control/utils/image_quality/__init__.py create mode 100644 je_auto_control/utils/image_quality/image_quality.py create mode 100644 test/unit_test/headless/test_image_quality_batch.py diff --git a/WHATS_NEW.md b/WHATS_NEW.md index a0be99cc..22be7c21 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -1,5 +1,11 @@ # What's New — AutoControl +## What's new (2026-06-24) — Image Quality Scoring (sharpness / contrast / brightness gate) + +Refuse to OCR a blurry or washed-out frame — score quality and gate before recognition. Full reference: [`docs/source/Eng/doc/new_features/v188_features_doc.rst`](docs/source/Eng/doc/new_features/v188_features_doc.rst). + +- **`image_quality` / `is_blurry` / `quality_gate`** (`AC_image_quality`, `AC_quality_gate`): OCR and template matching quietly fail on a blurry, washed-out or too-dark capture, and the caller can't tell a *missing* element from an *unreadable* one. This measures sharpness (variance of the Laplacian), contrast (grayscale stddev) and brightness (mean 0–255); `quality_gate` turns them into `{passed, issues}` flagging `blurry` / `low_contrast` / `too_dark` / `too_bright` so a script can pre-process or re-capture before OCR. Reuses `visual_match`'s grayscale loader (any ndarray / path / PIL image, or the live screen); cv2/numpy lazily imported. No `PySide6`. + ## What's new (2026-06-24) — Drop Files onto a Window (WM_DROPFILES) Complete a drag-and-drop programmatically — drop files onto a target window. Full reference: [`docs/source/Eng/doc/new_features/v187_features_doc.rst`](docs/source/Eng/doc/new_features/v187_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v188_features_doc.rst b/docs/source/Eng/doc/new_features/v188_features_doc.rst new file mode 100644 index 00000000..f50e7471 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v188_features_doc.rst @@ -0,0 +1,47 @@ +Image Quality Scoring (sharpness / contrast / brightness gate) +============================================================== + +OCR and template matching quietly fail on a blurry, washed-out or too-dark +capture — the locate returns nothing and the caller can't tell a *missing* +element from an *unreadable* one. ``image_quality`` measures the three things +that wreck recognition and gates on them: + +* **sharpness** — variance of the Laplacian (low = blurry / out of focus), +* **contrast** — standard deviation of the grayscale (low = washed out), +* **brightness** — mean grayscale 0–255 (too low = dark, too high = blown out). + +:func:`image_quality` returns the raw metrics, :func:`is_blurry` is the common +one-liner, and :func:`quality_gate` turns the metrics into a pass / fail verdict +with named issues, so a script can refuse to OCR a bad frame (or pre-process it +first). It reuses ``visual_match``'s grayscale loader, so the source is any +ndarray / path / PIL image (or the live screen when omitted); cv2 / numpy are +lazily imported. Imports no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import image_quality, is_blurry, quality_gate + + image_quality("frame.png") + # {"sharpness": 842.1, "contrast": 58.3, "brightness": 131.0} + + if is_blurry("frame.png", threshold=100): + ... # capture again / sharpen before OCR + + gate = quality_gate("frame.png", min_sharpness=100, min_contrast=12) + # {"sharpness": .., "contrast": .., "brightness": .., "passed": False, + # "issues": ["blurry", "too_dark"]} + +``quality_gate`` flags ``blurry`` / ``low_contrast`` / ``too_dark`` / +``too_bright``; ``passed`` is True only when no issue fires. ``region`` applies to +a live-screen grab (omit ``source`` to grade the screen). Thresholds are tunable; +the defaults suit typical UI screenshots. + +Executor commands +----------------- + +``AC_image_quality`` (``source`` / ``region``) and ``AC_quality_gate`` (plus +``min_sharpness`` / ``min_contrast``). They are exposed as read-only ``ac_*`` MCP +tools and as Script Builder commands under **Image**. diff --git a/docs/source/Zh/doc/new_features/v188_features_doc.rst b/docs/source/Zh/doc/new_features/v188_features_doc.rst new file mode 100644 index 00000000..6c7b062d --- /dev/null +++ b/docs/source/Zh/doc/new_features/v188_features_doc.rst @@ -0,0 +1,42 @@ +影像品質評分(銳利度 / 對比 / 亮度門檻) +======================================= + +OCR 與模板比對在模糊、褪色或太暗的擷取畫面上會悄悄失敗——定位回傳空值,呼叫端無法分辨是元素 +*不存在*還是畫面*無法辨識*。``image_quality`` 量測三項會破壞辨識的指標並據以把關: + +* **sharpness(銳利度)**——Laplacian 的變異數(低 = 模糊 / 失焦), +* **contrast(對比)**——灰階的標準差(低 = 褪色), +* **brightness(亮度)**——灰階平均 0–255(太低 = 太暗,太高 = 過曝)。 + +:func:`image_quality` 回傳原始指標,:func:`is_blurry` 是常用的一行式,:func:`quality_gate` 把 +指標轉成通過 / 失敗的判定並附上具名問題,讓腳本可以拒絕對壞畫面做 OCR(或先做前處理)。它重用 +``visual_match`` 的灰階載入器,因此來源可為任何 ndarray / 路徑 / PIL 影像(省略時則為存活螢幕); +cv2 / numpy 為延遲匯入。不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import image_quality, is_blurry, quality_gate + + image_quality("frame.png") + # {"sharpness": 842.1, "contrast": 58.3, "brightness": 131.0} + + if is_blurry("frame.png", threshold=100): + ... # 在 OCR 前重新擷取 / 銳化 + + gate = quality_gate("frame.png", min_sharpness=100, min_contrast=12) + # {"sharpness": .., "contrast": .., "brightness": .., "passed": False, + # "issues": ["blurry", "too_dark"]} + +``quality_gate`` 會標記 ``blurry`` / ``low_contrast`` / ``too_dark`` / +``too_bright``;只有在沒有任何問題時 ``passed`` 才為 True。``region`` 套用於存活螢幕擷取(省略 +``source`` 即評分螢幕)。門檻可調整;預設值適合一般 UI 截圖。 + +執行器指令 +---------- + +``AC_image_quality``(``source`` / ``region``)與 ``AC_quality_gate``(另加 +``min_sharpness`` / ``min_contrast``)。皆以唯讀 ``ac_*`` MCP 工具及 Script Builder 指令 +(位於 **Image** 分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 895a182d..a9e18c73 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -78,6 +78,10 @@ ) # Drop files onto a window (WM_DROPFILES sender) from je_auto_control.utils.file_drop import drop_files, plan_file_drop +# Image quality scoring (sharpness / contrast / brightness gate before OCR) +from je_auto_control.utils.image_quality import ( + image_quality, is_blurry, quality_gate, +) # VLM element locator (headless) from je_auto_control.utils.vision import ( VLMNotAvailableError, click_by_description, locate_by_description, @@ -1652,6 +1656,7 @@ def start_autocontrol_gui(*args, **kwargs): "classify_format", "classify_formats", "diff_formats", "list_clipboard_formats", "clipboard_formats", "plan_file_drop", "drop_files", + "image_quality", "is_blurry", "quality_gate", # VLM locator "VLMNotAvailableError", "locate_by_description", "click_by_description", "verify_description", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index 60844736..76b9b74a 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -741,6 +741,28 @@ def _add_image_specs(specs: List[CommandSpec]) -> None: ), description="Detect a palette/view change vs a reference (illumination-robust).", )) + specs.append(CommandSpec( + "AC_image_quality", "Image", "Image Quality", + fields=( + FieldSpec("source", FieldType.FILE_PATH, optional=True), + FieldSpec("region", FieldType.STRING, optional=True, + placeholder=_REGION_PLACEHOLDER), + ), + description="Sharpness / contrast / brightness of an image or the screen.", + )) + specs.append(CommandSpec( + "AC_quality_gate", "Image", "Quality Gate (OCR-ready?)", + fields=( + FieldSpec("source", FieldType.FILE_PATH, optional=True), + FieldSpec("region", FieldType.STRING, optional=True, + placeholder=_REGION_PLACEHOLDER), + FieldSpec("min_sharpness", FieldType.FLOAT, optional=True, + default=100.0), + FieldSpec("min_contrast", FieldType.FLOAT, optional=True, + default=12.0), + ), + description="Pass/fail an image for OCR readability with named issues.", + )) specs.append(CommandSpec( "AC_changed_regions", "Image", "Changed Regions (motion)", fields=( diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index dff38212..bf87165f 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -4274,6 +4274,30 @@ def _drop_files(hwnd: Any, paths: Any, point: Any = None) -> Dict[str, Any]: return {"dropped": bool(dropped), "count": len(coerced)} +def _coerce_region(region: Any): + """Normalise a region argument (JSON '[x,y,w,h]' string / list / None).""" + import json + if isinstance(region, str): + return json.loads(region) if region.strip() else None + return region + + +def _image_quality(source: Any = None, region: Any = None) -> Dict[str, Any]: + """Adapter: sharpness / contrast / brightness of an image or the screen.""" + from je_auto_control.utils.image_quality import image_quality + return image_quality(source, region=_coerce_region(region)) + + +def _quality_gate(source: Any = None, region: Any = None, + min_sharpness: Any = 100.0, + min_contrast: Any = 12.0) -> Dict[str, Any]: + """Adapter: pass / fail an image for OCR readability with named issues.""" + from je_auto_control.utils.image_quality import quality_gate + return quality_gate(source, region=_coerce_region(region), + min_sharpness=float(min_sharpness), + min_contrast=float(min_contrast)) + + def _image_histogram(source: Any = None, bins: Any = 32, space: str = "hsv", region: Any = None) -> Dict[str, Any]: """Adapter: per-channel colour histogram of an image / the screen.""" @@ -6496,6 +6520,8 @@ def __init__(self): "AC_diff_formats": _diff_formats, "AC_plan_file_drop": _plan_file_drop, "AC_drop_files": _drop_files, + "AC_image_quality": _image_quality, + "AC_quality_gate": _quality_gate, "AC_image_histogram": _image_histogram, "AC_histogram_changed": _histogram_changed, "AC_changed_regions": _changed_regions, diff --git a/je_auto_control/utils/image_quality/__init__.py b/je_auto_control/utils/image_quality/__init__.py new file mode 100644 index 00000000..279676f0 --- /dev/null +++ b/je_auto_control/utils/image_quality/__init__.py @@ -0,0 +1,6 @@ +"""Score image quality (sharpness / contrast / brightness) before OCR / matching.""" +from je_auto_control.utils.image_quality.image_quality import ( + image_quality, is_blurry, quality_gate, +) + +__all__ = ["image_quality", "is_blurry", "quality_gate"] diff --git a/je_auto_control/utils/image_quality/image_quality.py b/je_auto_control/utils/image_quality/image_quality.py new file mode 100644 index 00000000..086d4eb8 --- /dev/null +++ b/je_auto_control/utils/image_quality/image_quality.py @@ -0,0 +1,71 @@ +"""Score image quality before OCR / template matching. + +OCR and template matching quietly fail on a blurry, washed-out or too-dark +capture — the locate returns nothing and the caller can't tell a *missing* +element from an *unreadable* one. ``image_quality`` measures the three things +that wreck recognition and gates on them: + +* **sharpness** — variance of the Laplacian (low = blurry / out of focus), +* **contrast** — standard deviation of the grayscale (low = washed out), +* **brightness** — mean grayscale 0–255 (too low = dark, too high = blown out). + +:func:`image_quality` returns the raw metrics, :func:`is_blurry` is the common +one-liner, and :func:`quality_gate` turns the metrics into a pass / fail verdict +with named issues so a script can refuse to OCR a bad frame (or pre-process it +first). It reuses ``visual_match``'s grayscale loader, so the source is any +ndarray / path / PIL image (or the live screen when omitted); ``region`` applies +to a live-screen grab. cv2 / numpy are lazily imported. Imports no ``PySide6``. +""" +from typing import Any, Dict, Optional, Sequence, Tuple + +ImageSource = Any + + +def _gray(source: Optional[ImageSource], region: Optional[Sequence[int]]): + from je_auto_control.utils.visual_match.visual_match import _haystack_gray + return _haystack_gray(source, region) + + +def image_quality(source: Optional[ImageSource] = None, *, + region: Optional[Sequence[int]] = None) -> Dict[str, float]: + """Return ``{sharpness, contrast, brightness}`` for an image (or live screen). + + ``sharpness`` is the variance of the Laplacian, ``contrast`` the grayscale + standard deviation, ``brightness`` the mean grayscale (0–255). + """ + import cv2 + gray = _gray(source, region) + return {"sharpness": float(cv2.Laplacian(gray, cv2.CV_64F).var()), + "contrast": float(gray.std()), + "brightness": float(gray.mean())} + + +def is_blurry(source: Optional[ImageSource] = None, *, + region: Optional[Sequence[int]] = None, + threshold: float = 100.0) -> bool: + """Return True if the image's Laplacian variance is below ``threshold``.""" + return image_quality(source, region=region)["sharpness"] < float(threshold) + + +def quality_gate(source: Optional[ImageSource] = None, *, + region: Optional[Sequence[int]] = None, + min_sharpness: float = 100.0, min_contrast: float = 12.0, + brightness_range: Tuple[float, float] = (40.0, 220.0), + ) -> Dict[str, Any]: + """Grade an image for OCR readability: ``{..., passed, issues}``. + + ``issues`` flags ``blurry`` / ``low_contrast`` / ``too_dark`` / ``too_bright``; + ``passed`` is True only when no issue fires. + """ + metrics = image_quality(source, region=region) + low, high = brightness_range + issues = [] + if metrics["sharpness"] < float(min_sharpness): + issues.append("blurry") + if metrics["contrast"] < float(min_contrast): + issues.append("low_contrast") + if metrics["brightness"] < float(low): + issues.append("too_dark") + elif metrics["brightness"] > float(high): + issues.append("too_bright") + return {**metrics, "passed": not issues, "issues": issues} diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index c3e2401d..54e0425c 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -3385,6 +3385,32 @@ def img_histogram_tools() -> List[MCPTool]: handler=h.histogram_changed, annotations=READ_ONLY, ), + MCPTool( + name="ac_image_quality", + description=("Measure image quality of 'source' (image path; default " + "screen grab of 'region'): {sharpness (Laplacian " + "variance — low=blurry), contrast (grayscale stddev), " + "brightness (mean 0-255)}."), + input_schema=schema({ + "source": {"type": "string"}, + "region": {"type": "array", "items": {"type": "integer"}}}), + handler=h.image_quality, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_quality_gate", + description=("Grade 'source' for OCR readability: {sharpness, " + "contrast, brightness, passed, issues}. 'issues' flags " + "blurry / low_contrast / too_dark / too_bright. Tune with " + "'min_sharpness' / 'min_contrast'."), + input_schema=schema({ + "source": {"type": "string"}, + "region": {"type": "array", "items": {"type": "integer"}}, + "min_sharpness": {"type": "number"}, + "min_contrast": {"type": "number"}}), + handler=h.quality_gate, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 2bb64e51..15cc8971 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -2509,6 +2509,17 @@ def drop_files(hwnd, paths, point=None): return _drop_files(hwnd, paths, point) +def image_quality(source=None, region=None): + from je_auto_control.utils.executor.action_executor import _image_quality + return _image_quality(source, region) + + +def quality_gate(source=None, region=None, min_sharpness=100.0, + min_contrast=12.0): + from je_auto_control.utils.executor.action_executor import _quality_gate + return _quality_gate(source, region, min_sharpness, min_contrast) + + def image_histogram(source=None, bins=32, space="hsv", region=None): from je_auto_control.utils.executor.action_executor import _image_histogram return _image_histogram(source, bins, space, region) diff --git a/test/unit_test/headless/test_image_quality_batch.py b/test/unit_test/headless/test_image_quality_batch.py new file mode 100644 index 00000000..1b43165e --- /dev/null +++ b/test/unit_test/headless/test_image_quality_batch.py @@ -0,0 +1,79 @@ +"""Headless tests for image-quality scoring (cv2 synthetic frames).""" +import pytest + +import je_auto_control as ac + +np = pytest.importorskip("numpy") +cv2 = pytest.importorskip("cv2") + +from je_auto_control.utils.image_quality import ( # noqa: E402 + image_quality, is_blurry, quality_gate, +) + + +def _sharp(): + rng = np.random.default_rng(0) + return rng.integers(0, 256, (120, 120)).astype("uint8") # noisy = high Laplacian var + + +def _blurry(): + return cv2.GaussianBlur(_sharp(), (0, 0), 8) # heavy blur = low var + + +def test_metrics_present_and_typed(): + metrics = image_quality(_sharp()) + assert set(metrics) == {"sharpness", "contrast", "brightness"} + assert all(isinstance(v, float) for v in metrics.values()) + assert 0.0 <= metrics["brightness"] <= 255.0 + + +def test_sharp_is_sharper_than_blurry(): + assert image_quality(_sharp())["sharpness"] > image_quality(_blurry())["sharpness"] + assert is_blurry(_sharp(), threshold=100.0) is False + assert is_blurry(_blurry(), threshold=100.0) is True + + +def test_quality_gate_pass_and_fail(): + good = quality_gate(_sharp()) + assert good["passed"] is True and good["issues"] == [] + bad = quality_gate(_blurry()) + assert bad["passed"] is False + assert "blurry" in bad["issues"] + + +def test_quality_gate_flags_dark(): + dark = np.full((80, 80), 5, "uint8") + report = quality_gate(dark) + assert report["passed"] is False + assert "too_dark" in report["issues"] + + +def test_quality_gate_brightness_range_tunable(): + mid = np.full((40, 40), 130, "uint8") + # a flat frame is blurry+low-contrast, but brightness must not be flagged + issues = quality_gate(mid, brightness_range=(40.0, 220.0))["issues"] + assert "too_dark" not in issues and "too_bright" not in issues + + +# --- wiring --------------------------------------------------------------- + +def test_executor_pure_path(): + from je_auto_control.utils.executor.action_executor import _quality_gate + report = _quality_gate(_blurry()) + assert report["passed"] is False and "blurry" in report["issues"] + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_image_quality", "AC_quality_gate"} <= known + from je_auto_control.utils.mcp_server.tools import build_default_tool_registry + names = {t.name for t in build_default_tool_registry()} + assert {"ac_image_quality", "ac_quality_gate"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_image_quality", "AC_quality_gate"} <= specs + + +def test_facade_exports(): + for name in ("image_quality", "is_blurry", "quality_gate"): + assert hasattr(ac, name) and name in ac.__all__