Polishing the voting logic

MohamedBilelBesbes · MohamedBilelBesbes · commit a0455af9feaf · 2026-03-17T13:57:53.000-04:00
diff --git a/tests/perfalert/test_methods_alerts.py b/tests/perfalert/test_methods_alerts.py
@@ -1,16 +1,17 @@
 import datetime
+import sys
 import time
 
-import treeherder.perf.alerts as alerts_module
 from treeherder.model.models import Push
 from treeherder.perf.alerts import (
     build_cpd_methods,
+    create_alert,
     detect_methods_changes,
-    equal_voting_strategy,
     generate_new_test_alerts_in_series,
     get_methods_detecting_at_index,
     get_weighted_average_push,
     name_voting_strategy,
+    vote,
 )
 from treeherder.perf.models import (
     PerformanceAlertSummaryTesting,
@@ -519,12 +520,10 @@ def test_margin_deduplication_guard_suppresses_nearby_duplicate_alerts(
     monkeypatch,
 ):
     """
-    Verifies the deduplication guard in equal_voting_strategy (alerts.py in equal_voting_strategy and priority_voting_strategy functions):
-        if any(abs(i - alerted_idx) <= margin for alerted_idx in alerted_indices):
-            continue
+    Verifies the deduplication guard in equal_voting_strategy suppresses duplicate
+    create_alert calls for adjacent indices near the same detected regression.
     Compares create_alert call counts between the real implementation (guard active)
-    and a guard-free copy. The guard must suppress at least one redundant call for
-    adjacent indices near the detected regression.
+    and a guard-free copy.
     Note: DB row counts cannot be used here because update_or_create silently
     merges duplicate calls with the same push_id regardless of the guard.
     """
@@ -557,60 +556,51 @@ def test_margin_deduplication_guard_suppresses_nearby_duplicate_alerts(
         test_perf_signature, list(revision_data.values()), build_cpd_methods()
     )
 
-    def equal_without_guard(
-        signature,
-        analyzed_series,
-        cons_th=3,
-        margin=2,
-        alerted_indices=None,
-        detection_method_naming=None,
-        replicates_enabled=False,
-    ):
-        if not analyzed_series or len(analyzed_series) < 2:
-            return
-        detection_method_naming = name_voting_strategy(
-            "equal", cons_th, margin, replicates_enabled, detection_method_naming
-        )
-        alerted_indices = alerted_indices if alerted_indices is not None else set()
+    _alerts_mod = sys.modules["treeherder.perf.alerts"]
+    original = create_alert
+    call_counts = {"with_guard": 0, "without_guard": 0}
+
+    def vote_without_guard(signature, analyzed_series, cons_th=3, margin=2):
+        """Guard-free version: collects detections without deduplication, then calls create_alert."""
+        detections = []
+        detection_method_naming = name_voting_strategy("equal", cons_th, margin, False, None)
         for i in range(1, len(analyzed_series)):
             methods_detecting_data = get_methods_detecting_at_index(analyzed_series, i, margin)
             if len(methods_detecting_data) >= cons_th:
-                start_idx, end_idx = max(0, i - margin), min(len(analyzed_series) - 1, i + margin)
+                start_idx = max(0, i - margin)
+                end_idx = min(len(analyzed_series) - 1, i + margin)
                 weighted_index, prev_index = get_weighted_average_push(
                     analyzed_series, methods_detecting_data, start_idx, end_idx
                 )
                 if weighted_index is not None:
-                    alerts_module.create_alert(
-                        signature,
-                        analyzed_series,
-                        analyzed_series[prev_index],
-                        analyzed_series[weighted_index],
-                        weighted_index,
-                        methods_detecting_data,
-                        detection_method_naming,
-                    )
-                    alerted_indices.add(weighted_index)
+                    detections.append((weighted_index, prev_index, methods_detecting_data))
+        for weighted_index, prev_index, methods_data in detections:
+            _alerts_mod.create_alert(
+                signature,
+                analyzed_series,
+                analyzed_series[prev_index],
+                analyzed_series[weighted_index],
+                weighted_index,
+                methods_data,
+                detection_method_naming,
+            )
+
+    def make_counter(key):
+        return lambda *a, **kw: call_counts.__setitem__(key, call_counts[key] + 1) or original(
+            *a, **kw
+        )
+
+    monkeypatch.setattr(_alerts_mod, "create_alert", make_counter("with_guard"))
+    vote(test_perf_signature, analyzed_series, strategy="equal", cons_th=cons_th, margin=margin)
 
-    call_counts = {"with_guard": 0, "without_guard": 0}
-    original = alerts_module.create_alert
-    monkeypatch.setattr(
-        alerts_module,
-        "create_alert",
-        lambda *a, **kw: call_counts.__setitem__("with_guard", call_counts["with_guard"] + 1)
-        or original(*a, **kw),
-    )
-    equal_voting_strategy(test_perf_signature, analyzed_series, cons_th=cons_th, margin=margin)
     PerformanceAlertTesting.objects.all().delete()
     PerformanceAlertSummaryTesting.objects.all().delete()
-    monkeypatch.setattr(
-        alerts_module,
-        "create_alert",
-        lambda *a, **kw: call_counts.__setitem__("without_guard", call_counts["without_guard"] + 1)
-        or original(*a, **kw),
-    )
-    equal_without_guard(test_perf_signature, analyzed_series, cons_th=cons_th, margin=margin)
+
+    monkeypatch.setattr(_alerts_mod, "create_alert", make_counter("without_guard"))
+    vote_without_guard(test_perf_signature, analyzed_series, cons_th=cons_th, margin=margin)
+
     assert call_counts["without_guard"] > call_counts["with_guard"], (
-        f"Deduplication guard missing in equal_voting_strategy (~line 490): "
+        f"Deduplication guard missing in equal_voting_strategy: "
         f"expected fewer create_alert calls with guard ({call_counts['with_guard']}) "
         f"than without ({call_counts['without_guard']})."
     )
diff --git a/treeherder/perf/alerts.py b/treeherder/perf/alerts.py
@@ -33,10 +33,13 @@
 
 logger = logging.getLogger(__name__)
 
-
-STRATEGY = "equal"
-CONS_TH = 3
-MARGIN = 1
+# Selects which voting algorithm is used to combine detections across methods. The avaiable strategies are equal and priority voting.
+VOTING_STRATEGY = "equal"
+# Sets how many methods must agree before an alert is raised.
+MIN_METHOD_AGREEMENT = 3
+# Controls how far apart two detections can be while still being counted as the same change.
+DETECTION_INDEX_TOLERANCE = 1
+# Toggles whether raw repeated measurements are passed to the detectors instead of aggregated values.
 REPLICATES = False
 
 
@@ -297,6 +300,10 @@ def build_cpd_methods():
 
 
 def name_voting_strategy(strategy, cons_th, margin, replicates_enabled, existing_name=None):
+    """
+    Builds a string label encoding the active voting configuration, used to tag
+    alerts with the strategy that produced them.
+    """
     if existing_name is not None:
         return existing_name
     suffix = "replicates_enabled" if replicates_enabled else "replicates_not_enabled"
@@ -320,18 +327,19 @@ def vote(
 ):
     """
     Apply voting logic to determine which alerts to create based on multiple detection methods.
+    Each strategy returns a list of (weighted_index, prev_index, methods_data) tuples and
+    a detection method naming string. Alert creation is handled here to ensure exactly one
+    alert is created per agreed-upon change point regardless of which strategy is used.
     """
     if strategy == "equal":
-        equal_voting_strategy(
-            signature=signature,
+        detections, detection_method_naming = equal_voting_strategy(
             analyzed_series=analyzed_series,
             cons_th=cons_th,
             margin=margin,
             replicates_enabled=replicates_enabled,
         )
     elif strategy == "priority":
-        priority_voting_strategy(
-            signature=signature,
+        detections, detection_method_naming = priority_voting_strategy(
             analyzed_series=analyzed_series,
             cons_th=cons_th,
             margin=margin,
@@ -340,6 +348,19 @@ def vote(
     else:
         raise ValueError(f"Unknown voting strategy: {strategy}")
 
+    for weighted_index, prev_index, methods_data in detections:
+        cur = analyzed_series[weighted_index]
+        prev = analyzed_series[prev_index]
+        create_alert(
+            signature,
+            analyzed_series,
+            prev,
+            cur,
+            weighted_index,
+            methods_data,
+            detection_method_naming,
+        )
+
 
 def get_methods_detecting_at_index(analyzed_series, index, margin=2):
     """
@@ -407,33 +428,33 @@ def get_weighted_average_push(analyzed_series, methods, start_idx, end_idx):
     return weighted_avg_index, prev_index
 
 
-def priority_voting_strategy(
-    signature, analyzed_series, cons_th=3, margin=1, replicates_enabled=False
-):
+def priority_voting_strategy(analyzed_series, cons_th=3, margin=1, replicates_enabled=False):
     """
     Priority voting strategy where student method has voting priority.
+    Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
     """
     if not analyzed_series or len(analyzed_series) < 2:
-        return
+        return [], name_voting_strategy("priority", cons_th, margin, replicates_enabled)
 
     all_methods = build_cpd_methods().keys()
     detection_method_naming = name_voting_strategy("priority", cons_th, margin, replicates_enabled)
 
-    # Track which indices we've already created alerts for (to avoid duplicates
+    detections = []
+    # Track which indices we've already added detections for (to avoid duplicates
     # in both Phase 1 and the fallback equal strategy)
     alerted_indices = set()
 
     # Phase 1: Student detections (no margin tolerance)
     for i in range(1, len(analyzed_series)):
+        # This prevents duplicate alerts from being raised for the same underlying change event
+        # since different detection methods may pinpoint it at slightly different indices.
         if any(abs(i - alerted_idx) <= margin for alerted_idx in alerted_indices):
             continue
 
         cur = analyzed_series[i]
 
         if cur.change_detected.get("student", False):
-            prev = analyzed_series[i - 1]
-
-            # Collect ALL methods detecting at this exact index (not within margin) to include in alert details, but do not require them for the alert to be created
+            prev_index = i - 1
             methods_data = {}
             for method in all_methods:
                 if cur.change_detected.get(method, False):
@@ -442,44 +463,48 @@ def priority_voting_strategy(
                         confidence_value = 1000
 
                     methods_data[method] = {"push_id": cur.push_id, "confidence": confidence_value}
-            create_alert(
-                signature, analyzed_series, prev, cur, i, methods_data, detection_method_naming
-            )
+
+            detections.append((i, prev_index, methods_data))
+            alerted_indices.add(i)
 
     # Phase 2: Fall back to equal strategy for indices not caught by Student
     # Student won't influence the vote here since change_detected["student"]
     # is False for all remaining candidates
-    equal_voting_strategy(
-        signature=signature,
+    equal_detections, _ = equal_voting_strategy(
         analyzed_series=analyzed_series,
         cons_th=cons_th,
         margin=margin,
         alerted_indices=alerted_indices,
-        detection_method_naming=detection_method_naming,
         replicates_enabled=replicates_enabled,
     )
+    detections.extend(equal_detections)
+
+    return detections, detection_method_naming
 
 
 def equal_voting_strategy(
-    signature,
     analyzed_series,
     cons_th=3,
-    margin=2,
+    margin=1,
     alerted_indices=None,
     detection_method_naming=None,
     replicates_enabled=False,
 ):
     """
     Equal voting strategy where all methods have equal weight.
+    Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
     """
     if not analyzed_series or len(analyzed_series) < 2:
-        return
+        return [], name_voting_strategy(
+            "equal", cons_th, margin, replicates_enabled, detection_method_naming
+        )
 
-    # Track which indices we've already created alerts for (to avoid duplicates)
     detection_method_naming = name_voting_strategy(
         "equal", cons_th, margin, replicates_enabled, detection_method_naming
     )
     alerted_indices = alerted_indices if alerted_indices is not None else set()
+    detections = []
+
     for i in range(1, len(analyzed_series)):
         # Skip if we've already created an alert near this index
         if any(abs(i - alerted_idx) <= margin for alerted_idx in alerted_indices):
@@ -498,20 +523,11 @@ def equal_voting_strategy(
             )
 
             if weighted_index is not None:
-                cur = analyzed_series[weighted_index]
-                prev = analyzed_series[prev_index]
-
-                create_alert(
-                    signature,
-                    analyzed_series,
-                    prev,
-                    cur,
-                    weighted_index,
-                    methods_detecting_data,
-                    detection_method_naming,
-                )
+                detections.append((weighted_index, prev_index, methods_detecting_data))
                 alerted_indices.add(weighted_index)
 
+    return detections, detection_method_naming
+
 
 def create_alert(
     signature,
@@ -621,7 +637,11 @@ def create_alert(
 
 
 def generate_new_test_alerts_in_series(
-    signature, strategy=STRATEGY, cons_th=CONS_TH, margin=MARGIN, replicates_enabled=REPLICATES
+    signature,
+    strategy=VOTING_STRATEGY,
+    cons_th=MIN_METHOD_AGREEMENT,
+    margin=DETECTION_INDEX_TOLERANCE,
+    replicates_enabled=REPLICATES,
 ):
     # get series data starting from either:
     # (1) the last alert, if there is one
diff --git a/treeherder/perf/migrations/0067_alter_performancealerttesting_detected_changes.py b/treeherder/perf/migrations/0067_alter_performancealerttesting_detected_changes.py
@@ -0,0 +1,23 @@
+# Generated by Django 5.1.15 on 2026-03-17 17:57
+
+from django.db import migrations, models
+
+import treeherder.perf.models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("perf", "0066_performancealerttesting_detected_changes_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="performancealerttesting",
+            name="detected_changes",
+            field=models.JSONField(
+                default=treeherder.perf.models.default_detection_methods,
+                help_text="A JSON object that indicates the confidence of the alert for each detection method used. It has methods detecting changes on the culprit revision or in one of the revisions aorund it, the push_id field indicates the revision where the change was detected for the given method.",
+            ),
+        ),
+    ]
diff --git a/treeherder/perf/models.py b/treeherder/perf/models.py
@@ -808,8 +808,8 @@ class PerformanceAlertTesting(PerformanceAlertBase):
     new_p95 = models.FloatField(help_text="New P95 value of series after change")
 
     detected_changes = models.JSONField(
-        help_text="A JSON object that indicates the confidence of the alert for each detection method used."
-        "It has methods detecting changes on the culprit revision or in one of the revisions aorund it, the push_id"
+        help_text="A JSON object that indicates the confidence of the alert for each detection method used. "
+        "It has methods detecting changes on the culprit revision or in one of the revisions aorund it, the push_id "
         "field indicates the revision where the change was detected for the given method.",
         default=default_detection_methods,
     )