diff --git a/WHATS_NEW.md b/WHATS_NEW.md index 3789042d..0587166d 100644 --- a/WHATS_NEW.md +++ b/WHATS_NEW.md @@ -1,5 +1,11 @@ # What's New — AutoControl +## What's new (2026-06-25) — Per-Run Step Timeline (waterfall + bottleneck steps) + +Read why *this* run was slow — a step waterfall and its bottlenecks. Full reference: [`docs/source/Eng/doc/new_features/v194_features_doc.rst`](docs/source/Eng/doc/new_features/v194_features_doc.rst). + +- **`build_timeline` / `critical_steps`** (`AC_build_timeline`, `AC_critical_steps`): the action profiler aggregates timings by step *name* across runs — useless for "why was *this* run slow". This turns one run's ordered steps into a waterfall (each step's offset, duration, and `pct` share of the total) with the `bottleneck` step and a `parallelism` ratio (`> 1` when steps overlap via explicit `start` times); `critical_steps` ranks the dominant steps to optimise. A step is any `{name, duration, start?}` dict. Pure stdlib. No `PySide6`. + ## What's new (2026-06-25) — Flaky-Test Co-Failure Clustering Find the tests that flake *together* — and the shared root cause behind them. Full reference: [`docs/source/Eng/doc/new_features/v193_features_doc.rst`](docs/source/Eng/doc/new_features/v193_features_doc.rst). diff --git a/docs/source/Eng/doc/new_features/v194_features_doc.rst b/docs/source/Eng/doc/new_features/v194_features_doc.rst new file mode 100644 index 00000000..f8568eb6 --- /dev/null +++ b/docs/source/Eng/doc/new_features/v194_features_doc.rst @@ -0,0 +1,51 @@ +Per-Run Step Timeline (waterfall + bottleneck steps) +==================================================== + +The action profiler aggregates timings by step *name* across many runs — great +for "which action is slow on average", useless for "why was *this* run slow". A +single run is an ordered timeline: step A ran, then B, then C, and one of them +dominated. ``step_timeline`` turns one run's steps into a waterfall (each step's +offset from the start, its duration and its share of the total) and ranks the +bottleneck steps, so you can read a single slow run instead of an average. + +* :func:`build_timeline` — the waterfall + total / busy / bottleneck / + parallelism, +* :func:`critical_steps` — the steps that dominate the run, longest first. + +A step is any dict with a name (default ``"name"``) and a ``duration``; an +optional ``start`` places it on an absolute timeline (overlapping / parallel +steps), else steps are laid out back-to-back. Pure standard library; no device, +no ``PySide6``. + +Headless API +------------ + +.. code-block:: python + + from je_auto_control import build_timeline, critical_steps + + steps = [{"name": "login", "duration": 1.0}, + {"name": "load_dashboard", "duration": 4.0}, + {"name": "submit", "duration": 1.0}] + + build_timeline(steps) + # {"steps": [{"name": "login", "offset": 0.0, "duration": 1.0, "pct": 16.7}, + # {"name": "load_dashboard", "offset": 1.0, ..., "pct": 66.7}, ...], + # "total": 6.0, "busy": 6.0, + # "bottleneck": {"name": "load_dashboard", "duration": 4.0}, + # "parallelism": 1.0} + + critical_steps(steps, top=2) + # [{"name": "load_dashboard", "duration": 4.0, "pct": 66.7}, + # {"name": "login", "duration": 1.0, "pct": 16.7}] + +``total`` is the wall-clock span, ``busy`` the summed step time; ``parallelism`` = +busy / total is ``1.0`` for a purely sequential run and ``> 1`` when steps overlap +(supply ``start`` times). ``pct`` is each step's share of the total time. + +Executor commands +----------------- + +``AC_build_timeline`` (``steps``) and ``AC_critical_steps`` (``steps`` / ``top``). +They are exposed as read-only ``ac_*`` MCP tools and as Script Builder commands +under **Testing**. diff --git a/docs/source/Zh/doc/new_features/v194_features_doc.rst b/docs/source/Zh/doc/new_features/v194_features_doc.rst new file mode 100644 index 00000000..63a9e8db --- /dev/null +++ b/docs/source/Zh/doc/new_features/v194_features_doc.rst @@ -0,0 +1,44 @@ +單次執行的步驟時間軸(瀑布圖 + 瓶頸步驟) +========================================== + +動作 profiler 把計時按步驟*名稱*跨多次執行聚合——很適合「哪個動作平均較慢」,卻無助於「為什麼 +*這一次*執行很慢」。單次執行是一條有序時間軸:步驟 A 跑完、接著 B、再 C,其中某一步主導了時間。 +``step_timeline`` 把一次執行的步驟轉成瀑布圖(每步距起點的偏移、其時長、其占總時間的比例),並 +排名瓶頸步驟,讓你能讀懂單一慢執行,而非平均值。 + +* :func:`build_timeline` ——瀑布圖加上 total / busy / bottleneck / parallelism, +* :func:`critical_steps` ——主導該次執行的步驟,最長者在前。 + +步驟可為任何帶名稱(預設 ``"name"``)與 ``duration`` 的字典;選填 ``start`` 會把它放到絕對 +時間軸上(重疊 / 平行步驟),否則步驟會背靠背排列。純標準庫;不涉及裝置,不匯入 ``PySide6``。 + +無頭 API +-------- + +.. code-block:: python + + from je_auto_control import build_timeline, critical_steps + + steps = [{"name": "login", "duration": 1.0}, + {"name": "load_dashboard", "duration": 4.0}, + {"name": "submit", "duration": 1.0}] + + build_timeline(steps) + # {"steps": [{"name": "login", "offset": 0.0, "duration": 1.0, "pct": 16.7}, + # {"name": "load_dashboard", "offset": 1.0, ..., "pct": 66.7}, ...], + # "total": 6.0, "busy": 6.0, + # "bottleneck": {"name": "load_dashboard", "duration": 4.0}, + # "parallelism": 1.0} + + critical_steps(steps, top=2) + # [{"name": "load_dashboard", "duration": 4.0, "pct": 66.7}, + # {"name": "login", "duration": 1.0, "pct": 16.7}] + +``total`` 是牆鐘時間跨度,``busy`` 是各步驟時長總和;``parallelism`` = busy / total,純序列執行 +為 ``1.0``,步驟重疊時 ``> 1``(需提供 ``start`` 時間)。``pct`` 是每步占總時間的比例。 + +執行器指令 +---------- + +``AC_build_timeline``(``steps``)與 ``AC_critical_steps``(``steps`` / ``top``)。皆以唯讀 +``ac_*`` MCP 工具及 Script Builder 指令(位於 **Testing** 分類下)形式提供。 diff --git a/je_auto_control/__init__.py b/je_auto_control/__init__.py index 39c53d8a..81bc5b59 100644 --- a/je_auto_control/__init__.py +++ b/je_auto_control/__init__.py @@ -96,6 +96,8 @@ from je_auto_control.utils.run_diff import diff_runs, summarize_run_diff # Flaky-test co-failure clustering (Jaccard over shared failing runs) from je_auto_control.utils.flake_cluster import cofailure_pairs, failure_clusters +# Per-run step waterfall + bottleneck (critical) steps +from je_auto_control.utils.step_timeline import build_timeline, critical_steps # VLM element locator (headless) from je_auto_control.utils.vision import ( VLMNotAvailableError, click_by_description, locate_by_description, @@ -1676,6 +1678,7 @@ def start_autocontrol_gui(*args, **kwargs): "normalize_error", "failure_signature", "group_failures", "diff_runs", "summarize_run_diff", "cofailure_pairs", "failure_clusters", + "build_timeline", "critical_steps", # VLM locator "VLMNotAvailableError", "locate_by_description", "click_by_description", "verify_description", diff --git a/je_auto_control/gui/script_builder/command_schema.py b/je_auto_control/gui/script_builder/command_schema.py index b26d514f..d4d5bce4 100644 --- a/je_auto_control/gui/script_builder/command_schema.py +++ b/je_auto_control/gui/script_builder/command_schema.py @@ -2755,6 +2755,21 @@ def _add_audit_specs(specs: List[CommandSpec]) -> None: ), description="Test pairs that fail together above a Jaccard threshold.", )) + specs.append(CommandSpec( + "AC_build_timeline", "Testing", "Step Timeline (waterfall)", + fields=(FieldSpec("steps", FieldType.STRING, + placeholder='[{"name": "login", "duration": 1.2}]'),), + description="Per-run step waterfall: offsets, durations, bottleneck.", + )) + specs.append(CommandSpec( + "AC_critical_steps", "Testing", "Critical (Bottleneck) Steps", + fields=( + FieldSpec("steps", FieldType.STRING, + placeholder='[{"name": "login", "duration": 1.2}]'), + FieldSpec("top", FieldType.INT, optional=True, default=3), + ), + description="The steps that dominate a run's time, longest first.", + )) specs.append(CommandSpec( "AC_scan_secrets", "Tools", "Scan for Hardcoded Secrets", description="Scan 'data' (JSON view) for hardcoded secrets that " diff --git a/je_auto_control/utils/executor/action_executor.py b/je_auto_control/utils/executor/action_executor.py index 377cb4f9..edff6eda 100644 --- a/je_auto_control/utils/executor/action_executor.py +++ b/je_auto_control/utils/executor/action_executor.py @@ -4402,6 +4402,24 @@ def _cofailure_pairs(runs: Any, threshold: Any = 0.5) -> Dict[str, Any]: return {"pairs": pairs, "count": len(pairs)} +def _build_timeline(steps: Any) -> Dict[str, Any]: + """Adapter: a per-run step waterfall (offsets / durations / bottleneck).""" + import json + from je_auto_control.utils.step_timeline import build_timeline + if isinstance(steps, str): + steps = json.loads(steps) + return build_timeline(steps) + + +def _critical_steps(steps: Any, top: Any = 3) -> Dict[str, Any]: + """Adapter: the steps that dominate a run's time (bottlenecks).""" + import json + from je_auto_control.utils.step_timeline import critical_steps + if isinstance(steps, str): + steps = json.loads(steps) + return {"steps": critical_steps(steps, top=int(top))} + + def _image_histogram(source: Any = None, bins: Any = 32, space: str = "hsv", region: Any = None) -> Dict[str, Any]: """Adapter: per-channel colour histogram of an image / the screen.""" @@ -6635,6 +6653,8 @@ def __init__(self): "AC_diff_runs": _diff_runs, "AC_failure_clusters": _failure_clusters, "AC_cofailure_pairs": _cofailure_pairs, + "AC_build_timeline": _build_timeline, + "AC_critical_steps": _critical_steps, "AC_image_histogram": _image_histogram, "AC_histogram_changed": _histogram_changed, "AC_changed_regions": _changed_regions, diff --git a/je_auto_control/utils/mcp_server/tools/_factories.py b/je_auto_control/utils/mcp_server/tools/_factories.py index 95752986..d32d15ae 100644 --- a/je_auto_control/utils/mcp_server/tools/_factories.py +++ b/je_auto_control/utils/mcp_server/tools/_factories.py @@ -7720,6 +7720,30 @@ def flakiness_tools() -> List[MCPTool]: handler=h.cofailure_pairs, annotations=READ_ONLY, ), + MCPTool( + name="ac_build_timeline", + description=("Per-run step waterfall from 'steps' (list of {name," + "duration,start?}): {steps:[{name,offset,duration,pct}], " + "total, busy, bottleneck, parallelism}. Reads ONE slow " + "run, not a per-name average."), + input_schema=schema({ + "steps": {"type": "array", "items": {"type": "object"}}}, + required=["steps"]), + handler=h.build_timeline, + annotations=READ_ONLY, + ), + MCPTool( + name="ac_critical_steps", + description=("The 'top' steps that dominate a run's time (bottlenecks " + "to optimise): {steps:[{name,duration,pct}]}, longest " + "first."), + input_schema=schema({ + "steps": {"type": "array", "items": {"type": "object"}}, + "top": {"type": "integer"}}, + required=["steps"]), + handler=h.critical_steps, + annotations=READ_ONLY, + ), ] diff --git a/je_auto_control/utils/mcp_server/tools/_handlers.py b/je_auto_control/utils/mcp_server/tools/_handlers.py index 34319fcd..ab6d91cc 100644 --- a/je_auto_control/utils/mcp_server/tools/_handlers.py +++ b/je_auto_control/utils/mcp_server/tools/_handlers.py @@ -2568,6 +2568,16 @@ def cofailure_pairs(runs, threshold=0.5): return _cofailure_pairs(runs, threshold) +def build_timeline(steps): + from je_auto_control.utils.executor.action_executor import _build_timeline + return _build_timeline(steps) + + +def critical_steps(steps, top=3): + from je_auto_control.utils.executor.action_executor import _critical_steps + return _critical_steps(steps, top) + + def image_histogram(source=None, bins=32, space="hsv", region=None): from je_auto_control.utils.executor.action_executor import _image_histogram return _image_histogram(source, bins, space, region) diff --git a/je_auto_control/utils/step_timeline/__init__.py b/je_auto_control/utils/step_timeline/__init__.py new file mode 100644 index 00000000..36cae102 --- /dev/null +++ b/je_auto_control/utils/step_timeline/__init__.py @@ -0,0 +1,6 @@ +"""Per-run step waterfall timeline + bottleneck (critical) step ranking.""" +from je_auto_control.utils.step_timeline.step_timeline import ( + build_timeline, critical_steps, +) + +__all__ = ["build_timeline", "critical_steps"] diff --git a/je_auto_control/utils/step_timeline/step_timeline.py b/je_auto_control/utils/step_timeline/step_timeline.py new file mode 100644 index 00000000..4f677bcd --- /dev/null +++ b/je_auto_control/utils/step_timeline/step_timeline.py @@ -0,0 +1,75 @@ +"""Build a per-run step waterfall and find the run's bottleneck steps. + +The action profiler aggregates timings by step *name* across many runs — great for +"which action is slow on average", useless for "why was *this* run slow". A single +run is an ordered timeline: step A ran, then B, then C, and one of them dominated. +``step_timeline`` turns one run's steps into a waterfall (each step's offset from +the start, duration and share of the total) and ranks the bottleneck steps, so you +can read a single slow run instead of an average. + +A step is any dict with a name (default ``"name"``) and a ``duration``; an optional +``start`` places it on an absolute timeline (overlapping / parallel steps), else +steps are laid out back-to-back. Pure standard library; no device, no ``PySide6``. +""" +from typing import Any, Dict, List, Sequence + +Step = Dict[str, Any] + + +def _normalize(steps: Sequence[Step], name_key: str, start_key: str, + duration_key: str) -> List[Dict[str, Any]]: + """Resolve each step to ``{name, start, end, duration}`` (sequential if no start).""" + resolved, cursor = [], 0.0 + for step in steps: + duration = float(step.get(duration_key, 0.0) or 0.0) + raw_start = step.get(start_key) + start = float(raw_start) if raw_start is not None else cursor + end = start + duration + cursor = max(cursor, end) + resolved.append({"name": str(step.get(name_key, "")), "start": start, + "end": end, "duration": duration}) + return resolved + + +def build_timeline(steps: Sequence[Step], *, name_key: str = "name", + start_key: str = "start", + duration_key: str = "duration") -> Dict[str, Any]: + """Return a waterfall timeline for one run. + + ``{steps:[{name, offset, duration, pct}], total, busy, bottleneck, + parallelism}`` — ``total`` is the wall-clock span, ``busy`` the summed step + time, ``parallelism`` = busy / total (1.0 for a purely sequential run), + ``bottleneck`` the longest single step. + """ + resolved = _normalize(steps, name_key, start_key, duration_key) + if not resolved: + return {"steps": [], "total": 0.0, "busy": 0.0, "bottleneck": None, + "parallelism": 0.0} + base = min(step["start"] for step in resolved) + span = max(step["end"] for step in resolved) - base + busy = sum(step["duration"] for step in resolved) + rows = [{"name": step["name"], "offset": round(step["start"] - base, 6), + "duration": step["duration"], + "pct": round(step["duration"] / span * 100, 1) if span > 0 else 0.0} + for step in resolved] + bottleneck = max(resolved, key=lambda step: step["duration"]) + return {"steps": rows, "total": round(span, 6), "busy": round(busy, 6), + "bottleneck": {"name": bottleneck["name"], + "duration": bottleneck["duration"]}, + "parallelism": round(busy / span, 3) if span > 0 else 1.0} + + +def critical_steps(steps: Sequence[Step], *, name_key: str = "name", + start_key: str = "start", duration_key: str = "duration", + top: int = 3) -> List[Dict[str, Any]]: + """Return the ``top`` steps that dominate the run, longest first. + + Each entry is ``{name, duration, pct}`` where ``pct`` is the step's share of + the total step time — the bottlenecks worth optimising. + """ + resolved = _normalize(steps, name_key, start_key, duration_key) + busy = sum(step["duration"] for step in resolved) or 1.0 + ranked = sorted(resolved, key=lambda step: step["duration"], reverse=True) + return [{"name": step["name"], "duration": step["duration"], + "pct": round(step["duration"] / busy * 100, 1)} + for step in ranked[:max(1, int(top))]] diff --git a/test/unit_test/headless/test_step_timeline_batch.py b/test/unit_test/headless/test_step_timeline_batch.py new file mode 100644 index 00000000..5f8cb179 --- /dev/null +++ b/test/unit_test/headless/test_step_timeline_batch.py @@ -0,0 +1,75 @@ +"""Headless tests for per-run step timeline (waterfall + bottleneck steps).""" +import pytest + +import je_auto_control as ac +from je_auto_control.utils.step_timeline import build_timeline, critical_steps + + +def _sequential(): + return [{"name": "login", "duration": 1.0}, + {"name": "load", "duration": 4.0}, + {"name": "submit", "duration": 1.0}] + + +def test_sequential_waterfall_offsets_and_bottleneck(): + tl = build_timeline(_sequential()) + offsets = {s["name"]: s["offset"] for s in tl["steps"]} + assert offsets == {"login": 0.0, "load": 1.0, "submit": 5.0} + assert tl["total"] == pytest.approx(6.0) + assert tl["busy"] == pytest.approx(6.0) + assert tl["parallelism"] == pytest.approx(1.0) # purely sequential + assert tl["bottleneck"] == {"name": "load", "duration": 4.0} + + +def test_pct_share_of_total(): + pct = {s["name"]: s["pct"] for s in build_timeline(_sequential())["steps"]} + assert pct["load"] == pytest.approx(66.7, abs=0.1) + + +def test_overlapping_run_reports_parallelism(): + par = [{"name": "a", "start": 0.0, "duration": 3.0}, + {"name": "b", "start": 1.0, "duration": 3.0}] + tl = build_timeline(par) + assert tl["total"] == pytest.approx(4.0) # span 0..4 + assert tl["busy"] == pytest.approx(6.0) # 3 + 3 + assert tl["parallelism"] == pytest.approx(1.5) # overlap detected + + +def test_critical_steps_ranked_with_pct(): + crit = critical_steps(_sequential(), top=2) + assert [s["name"] for s in crit] == ["load", "login"] # longest first + assert crit[0]["pct"] == pytest.approx(66.7, abs=0.1) + assert len(critical_steps(_sequential(), top=1)) == 1 + + +def test_empty_run(): + assert build_timeline([]) == {"steps": [], "total": 0.0, "busy": 0.0, + "bottleneck": None, "parallelism": 0.0} + assert critical_steps([]) == [] + + +# --- wiring --------------------------------------------------------------- + +def test_executor_paths(): + import json + from je_auto_control.utils.executor.action_executor import ( + _build_timeline, _critical_steps) + steps_json = json.dumps(_sequential()) + assert _build_timeline(steps_json)["bottleneck"]["name"] == "load" + assert _critical_steps(steps_json, top=1)["steps"][0]["name"] == "load" + + +def test_wiring(): + known = set(ac.executor.known_commands()) + assert {"AC_build_timeline", "AC_critical_steps"} <= known + from je_auto_control.utils.mcp_server.tools import build_default_tool_registry + names = {t.name for t in build_default_tool_registry()} + assert {"ac_build_timeline", "ac_critical_steps"} <= names + from je_auto_control.gui.script_builder.command_schema import _build_specs + specs = {s.command for s in _build_specs()} + assert {"AC_build_timeline", "AC_critical_steps"} <= specs + + +def test_facade_exports(): + for name in ("build_timeline", "critical_steps"): + assert hasattr(ac, name) and name in ac.__all__