diff --git a/src/BenchmarkDotNet/Code/DeclarationsProvider.cs b/src/BenchmarkDotNet/Code/DeclarationsProvider.cs
index 107c2bcb6a..155fba85ee 100644
--- a/src/BenchmarkDotNet/Code/DeclarationsProvider.cs
+++ b/src/BenchmarkDotNet/Code/DeclarationsProvider.cs
@@ -34,6 +34,8 @@ public SmartStringBuilder ReplaceTemplate(SmartStringBuilder smartStringBuilder)
             return ReplaceCore(smartStringBuilder)
                 .Replace("$DisassemblerEntryMethodImpl$", GetWorkloadMethodCall(GetPassArgumentsDirect()))
                 .Replace("$OperationsPerInvoke$", Descriptor.OperationsPerInvoke.ToString())
+                .Replace("$WorkloadMethodName$", Descriptor.WorkloadMethod.Name)
+                .Replace("$WorkloadMethodParameterTypes$", GetWorkloadMethodParameterTypes())
                 .Replace("$WorkloadTypeName$", Descriptor.Type.GetCorrectCSharpTypeName());
         }
 
@@ -108,6 +110,26 @@ protected string GetPassArguments()
                     .Select((parameter, index) => $"{CodeGenerator.GetParameterModifier(parameter)} arg{index}")
             );
 
+        // Renders the benchmark method's parameter types as a Type[] for __ResolveWorkloadMethods to match overloads
+        // exactly. Each is a typeof(...) of the element type, re-wrapping by-ref/pointer via reflection (typeof can't
+        // express `T&`), so resolution never has to name the method's (possibly unspellable) return type.
+        private string GetWorkloadMethodParameterTypes()
+        {
+            var parameters = Descriptor.WorkloadMethod.GetParameters();
+            if (parameters.Length == 0)
+                return "global::System.Array.Empty<global::System.Type>()";
+            return $"new global::System.Type[] {{ {string.Join(", ", parameters.Select(p => GetTypeOfExpression(p.ParameterType)))} }}";
+        }
+
+        private static string GetTypeOfExpression(System.Type type)
+        {
+            if (type.IsByRef)
+                return $"{GetTypeOfExpression(type.GetElementType()!)}.MakeByRefType()";
+            if (type.IsPointer)
+                return $"{GetTypeOfExpression(type.GetElementType()!)}.MakePointerType()";
+            return $"typeof({type.GetCorrectCSharpTypeName()})";
+        }
+
         protected string GetPassArgumentsDirect()
             => string.Join(
                 ", ",
diff --git a/src/BenchmarkDotNet/Engines/Engine.cs b/src/BenchmarkDotNet/Engines/Engine.cs
index a2c4e9eb4b..269280937e 100644
--- a/src/BenchmarkDotNet/Engines/Engine.cs
+++ b/src/BenchmarkDotNet/Engines/Engine.cs
@@ -34,6 +34,7 @@ internal Engine(EngineParameters engineParameters)
             var job = engineParameters.TargetJob ?? throw new ArgumentNullException(nameof(EngineParameters.TargetJob));
             Parameters = new()
             {
+                WorkloadMethods = engineParameters.WorkloadMethods ?? throw new ArgumentNullException(nameof(EngineParameters.WorkloadMethods)),
                 WorkloadActionNoUnroll = engineParameters.WorkloadActionNoUnroll ?? throw new ArgumentNullException(nameof(EngineParameters.WorkloadActionNoUnroll)),
                 WorkloadActionUnroll = engineParameters.WorkloadActionUnroll ?? throw new ArgumentNullException(nameof(EngineParameters.WorkloadActionUnroll)),
                 OverheadActionNoUnroll = engineParameters.OverheadActionNoUnroll ?? throw new ArgumentNullException(nameof(EngineParameters.OverheadActionNoUnroll)),
diff --git a/src/BenchmarkDotNet/Engines/EngineJitStage.cs b/src/BenchmarkDotNet/Engines/EngineJitStage.cs
index 1e61e8315e..005e569e03 100644
--- a/src/BenchmarkDotNet/Engines/EngineJitStage.cs
+++ b/src/BenchmarkDotNet/Engines/EngineJitStage.cs
@@ -1,3 +1,4 @@
+using BenchmarkDotNet.Attributes.CompilerServices;
 using BenchmarkDotNet.Jobs;
 using BenchmarkDotNet.Portability;
 using BenchmarkDotNet.Reports;
@@ -9,29 +10,49 @@ namespace BenchmarkDotNet.Engines;
 // and we purposefully don't spend too much time in this stage, so we can't guarantee it.
 // This should succeed for 99%+ of microbenchmarks. For any sufficiently short benchmarks where this fails,
 // the following stages (Pilot and Warmup) will likely take it the rest of the way. Long-running benchmarks may never fully reach tier1.
+[AggressivelyOptimizeMethods] // Reduce JIT event noise from the jit stage itself.
 internal sealed class EngineJitStage : EngineStage
 {
-    // Jit call counting delay is only for when the app starts up. We don't need to wait for every benchmark if multiple benchmarks are ran in-process.
-    private static TimeSpan s_tieredDelay = JitInfo.TieredDelay;
+    // After a tier's single burst fails to tier-up, we nudge one invocation at a time, giving the background worker a
+    // short window (~10ms) to pick each nudge up before trying the next — so we stop the instant the tier-up lands
+    // instead of overshooting by re-bursting the whole budget. Passed to WaitForTierUp as its busy-wait timeout.
+    private static readonly TimeSpan EventDeliveryLag = TimeSpan.FromMilliseconds(10);
 
     internal bool didStopEarly = false;
     internal Measurement lastMeasurement;
 
     private readonly IEnumerator<IterationData> enumerator;
     private readonly bool evaluateOverhead;
+    private readonly bool skipDelays;
+    // Watches the benchmark method(s)' background tier-up via JIT events so we can proceed once the JIT goes quiet after
+    // each tier (the whole call tree warmed), instead of waiting a fixed delay. Null when there's nothing to watch or
+    // EventSource is disabled, in which case we fall back to the fixed delay.
+    private readonly JitListener? listener;
+    // True when this stage created the listener and must dispose it; false when a caller (a test) injected one it owns.
+    private readonly bool disposeListener;
+
+    internal EngineJitStage(bool evaluateOverhead, EngineParameters parameters, bool skipDelays)
+        : this(evaluateOverhead, parameters, JitListener.Create(parameters.WorkloadMethods), disposeListener: true, skipDelays: skipDelays)
+    {
+    }
 
-    internal EngineJitStage(bool evaluateOverhead, EngineParameters parameters) : base(IterationStage.Jitting, IterationMode.Workload, parameters)
+    internal EngineJitStage(bool evaluateOverhead, EngineParameters parameters, JitListener? listener, bool disposeListener = false, bool skipDelays = false)
+        : base(IterationStage.Jitting, IterationMode.Workload, parameters)
     {
+        this.listener = listener;
+        this.disposeListener = disposeListener;
         enumerator = EnumerateIterations();
         this.evaluateOverhead = evaluateOverhead;
+        this.skipDelays = skipDelays;
     }
 
     internal override List<Measurement> GetMeasurementList() => new(GetMaxMeasurementCount());
 
     private int GetMaxMeasurementCount()
     {
+        int nudgeMultiplier = JitInfo.TieredDelay > TimeSpan.Zero ? 2 : 1;
         int count = JitInfo.IsTiered
-            ? JitInfo.MaxTierPromotions * JitInfo.TieredCallCountThreshold + 2
+            ? JitInfo.MaxTierPromotions * JitInfo.TieredCallCountThreshold * nudgeMultiplier + 2
             : 1;
         if (evaluateOverhead)
         {
@@ -44,7 +65,7 @@ internal override bool GetShouldRunIteration(List<Measurement> measurements, out
     {
         if (measurements.Count > 0)
         {
-            var measurement = measurements[measurements.Count - 1];
+            var measurement = measurements[^1];
             if (measurement.IterationMode == IterationMode.Workload)
             {
                 lastMeasurement = measurement;
@@ -55,6 +76,10 @@ internal override bool GetShouldRunIteration(List<Measurement> measurements, out
             iterationData = enumerator.Current;
             return true;
         }
+        if (disposeListener)
+        {
+            listener?.Dispose();
+        }
         enumerator.Dispose();
         iterationData = default;
         return false;
@@ -81,16 +106,23 @@ private IEnumerator<IterationData> EnumerateIterations()
             yield break;
         }
 
-        // Wait enough time for jit call counting to begin.
-        Engine.SleepIfPositive(s_tieredDelay);
-        // Don't make the next jit stage wait if it's ran in the same process.
-        s_tieredDelay = TimeSpan.Zero;
+        bool observeMethod = listener != null;
+        if (observeMethod)
+        {
+            // Before the tier loop, wait until the call-counting delay is inactive so the first burst is counted —
+            // or, if tiering is quiet because the method was pre-warmed past tier0, fake it and proceed. The first
+            // invoke above already fired the watched method's Pause if it was tier0. See WaitForInitialTieringActive.
+            listener!.WaitForInitialTieringActive(parameters.Host.CancellationToken);
+        }
+        else if (!skipDelays && JitInfo.TieredDelay > TimeSpan.Zero)
+        {
+            // Fall back to a fixed wait for the call-counting delay to elapse.
+            Thread.Sleep(JitInfo.TieredDelay + TimeSpan.FromMilliseconds(10));
+        }
 
-        // If the first iteration suggests a long-running benchmark (a single invocation already
-        // takes ~2/3 of IterationTime or more), run one confirmation iteration and bail out if
-        // it agrees. Same cutoff value that pilot stage uses.
-        // We do not bail out immediately if the first iteration is long-running because it could
-        // be due to cctors or other lazy initialization that won't be hit in steady-state. #2004
+        // Long-running early-exit: if a single invocation already takes ~2/3 of IterationTime, this is a long-running
+        // benchmark — bail and let the Pilot/Warmup stages finish tiering. The first invoke can be inflated by JIT or
+        // cctors, so confirm with one more iteration before bailing (it could be a one-time cost). #2004
         // JitTieringMode.Force opts out of this heuristic and always promotes through every tier.
         TimeInterval iterationTime = parameters.TargetJob.ResolveValue(RunMode.IterationTimeCharacteristic, parameters.Resolver);
         long remainingCalls = JitInfo.TieredCallCountThreshold;
@@ -104,12 +136,18 @@ private IEnumerator<IterationData> EnumerateIterations()
                 didStopEarly = true;
                 yield break;
             }
-            remainingCalls -= userInvokeCount;
         }
 
         // Promote methods to tier1.
-        for (int remainingTiers = JitInfo.MaxTierPromotions; remainingTiers > 0; --remainingTiers)
+        for (int tierCount = 0; tierCount < JitInfo.MaxTierPromotions; ++tierCount, remainingCalls = JitInfo.TieredCallCountThreshold)
         {
+            // Run ONE full burst of this tier's call budget, gated so it's counted rather than wasted into a
+            // deferred window. After it, wait for the background JIT to go QUIET (WaitForQuiescentTierUp): once the
+            // worker is idle, this tier's compiles — the watched method(s) AND their untracked callees — have all
+            // landed, so the next burst / the following stage won't race them. The per-tier counter persists, so if
+            // the burst didn't tier the watched method(s) up we nudge the rest one at a time below rather than
+            // re-bursting the whole budget.
+            listener?.WaitForTieringActive(parameters.Host.CancellationToken);
             while (remainingCalls > 0)
             {
                 // Run the whole tier's call budget in a single iteration unless the user pinned InvocationCount.
@@ -120,8 +158,71 @@ private IEnumerator<IterationData> EnumerateIterations()
                 yield return GetWorkloadIterationData(invokeCount);
             }
 
-            Engine.SleepIfPositive(JitInfo.BackgroundCompilationDelay);
-            remainingCalls = JitInfo.TieredCallCountThreshold;
+            if (listener != null)
+            {
+                // Wait for the background JIT to go quiet (the watched method(s) and their callees settle), then read
+                // whether the watched method(s) actually advanced this burst. Once we've stopped observing them the
+                // advanced result is ignored, but this still drains untracked-callee tier-ups before the next burst.
+                bool advanced = listener.WaitForQuiescentTierUp(tierCount, parameters.Host.CancellationToken);
+                if (observeMethod)
+                {
+                    if (!advanced)
+                    {
+                        // The burst didn't tier the watched method(s) up. With NO call-counting delay, the burst's whole
+                        // budget was counted, so a miss means they were pre-warmed past this tier (or are otherwise
+                        // unobservable) — nudging can't help, so stop consulting the listener for them. We don't bail out
+                        // entirely because the benchmark may call other (un-pre-warmed) methods via different control
+                        // flow (e.g. an InProcess toolchain with arguments/params); the remaining bursts warm those + callees.
+                        if (JitInfo.TieredDelay <= TimeSpan.Zero)
+                        {
+                            observeMethod = false;
+                            continue;
+                        }
+
+                        // Otherwise the call-counting delay was probably active for the first ~10ms of the burst due to event
+                        // delivery lag, so some invocations didn't count and we just need a few more. Re-bursting the whole
+                        // budget would overshoot wastefully (up to threshold * call-time), so nudge one invocation at a time,
+                        // detecting the tier-up cheaply (WaitForTierUp, no full quiescence settle per nudge), then
+                        // settle once at the end so this tier's callees are warm.
+                        listener.WaitForTieringActive(parameters.Host.CancellationToken);
+                        long nudgeCalls = hasUserInvocationCount ? userInvokeCount : 1;
+                        for (long nudged = 0; nudged < JitInfo.TieredCallCountThreshold && !advanced; nudged += nudgeCalls)
+                        {
+                            ++iterationIndex;
+                            yield return GetWorkloadIterationData(nudgeCalls);
+                            advanced = listener.WaitForTierUp(tierCount, EventDeliveryLag, parameters.Host.CancellationToken);
+                        }
+                        // Settle the callees pushed by the nudges (and re-read the tier state race-free); this also
+                        // catches a tier-up whose publication arrived just after the last cheap WaitForTierUp window.
+                        advanced = listener.WaitForQuiescentTierUp(tierCount, parameters.Host.CancellationToken);
+                        if (!advanced)
+                        {
+                            // Even nudging didn't tier them up — most likely pre-warmed to their final tier before the
+                            // stage started (e.g. via InProcess toolchains). Stop consulting the listener (same as the
+                            // no-delay case above). We already spent ~2 tiers' worth here, so skip a tier (the extra
+                            // ++tierCount on top of the loop's) so we don't overspend the budget.
+                            observeMethod = false;
+                            ++tierCount;
+                            continue;
+                        }
+                    }
+
+                    if (listener.ReachedFinalTier)
+                    {
+                        // ReachedFinalTier is the aggregate: every watched method is fully warmed, so we will not
+                        // receive any more tier-up JIT events for the method(s) we track. (OSR adds a quirk: a runtime
+                        // bug double-instruments an OSR'd callee, which JitInfo.MaxTierPromotions already budgets for.)
+                        // Keep bursting to push any untracked callees through their tiers — the quiescence wait above
+                        // handles them — but stop consulting the listener for the watched method(s).
+                        observeMethod = false;
+                    }
+                }
+            }
+            else if (!skipDelays)
+            {
+                // No listener (nothing to watch, or EventSource unavailable), fall back to the fixed delay.
+                Engine.SleepIfPositive(JitInfo.BackgroundCompilationDelay);
+            }
         }
 
         // Empirical evidence shows that the first call after the method is tiered up may take longer,
diff --git a/src/BenchmarkDotNet/Engines/EngineParameters.cs b/src/BenchmarkDotNet/Engines/EngineParameters.cs
index 7d7dab4b21..a496cfa116 100644
--- a/src/BenchmarkDotNet/Engines/EngineParameters.cs
+++ b/src/BenchmarkDotNet/Engines/EngineParameters.cs
@@ -1,3 +1,4 @@
+using System.Reflection;
 using BenchmarkDotNet.Characteristics;
 using BenchmarkDotNet.Jobs;
 using BenchmarkDotNet.Running;
@@ -16,6 +17,13 @@ public class EngineParameters
         public required Func<long, IClock, ValueTask<ClockSpan>> OverheadActionNoUnroll { get; set; }
         public required Func<long, IClock, ValueTask<ClockSpan>> OverheadActionUnroll { get; set; }
         public Job TargetJob { get; set; } = Job.Default;
+
+        /// <summary>
+        /// The benchmark method(s), used by the jit stage to watch for their tier-up via JIT events.
+        /// When empty (nothing to watch, or resolution failed), the jit stage falls back to a fixed delay.
+        /// </summary>
+        public required IEnumerable<MethodInfo> WorkloadMethods { get; set; }
+
         public long OperationsPerInvoke { get; set; } = 1;
         public required Func<ValueTask> GlobalSetupAction { get; set; }
         public required Func<ValueTask> GlobalCleanupAction { get; set; }
diff --git a/src/BenchmarkDotNet/Engines/EngineStage.cs b/src/BenchmarkDotNet/Engines/EngineStage.cs
index 7d1333f390..8b23bfacc4 100644
--- a/src/BenchmarkDotNet/Engines/EngineStage.cs
+++ b/src/BenchmarkDotNet/Engines/EngineStage.cs
@@ -15,7 +15,9 @@ internal abstract class EngineStage(IterationStage stage, IterationMode mode, En
         internal abstract bool GetShouldRunIteration(List<Measurement> measurements, out IterationData iterationData);
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        internal static IEnumerable<EngineStage> EnumerateStages(EngineParameters parameters)
+        // skipJitDelays is used by EnumerateStagesTests to skip waiting when it's only testing the stage logic, not real JIT compilation.
+        // Real JIT compilation is tested in JitListenerTests.
+        internal static IEnumerable<EngineStage> EnumerateStages(EngineParameters parameters, bool skipJitDelays = false)
         {
             var strategy = parameters.TargetJob.ResolveValue(RunMode.RunStrategyCharacteristic, parameters.Resolver);
             var invokeCount = parameters.TargetJob.ResolveValue(RunMode.InvocationCountCharacteristic, parameters.Resolver, 1);
@@ -31,7 +33,7 @@ internal static IEnumerable<EngineStage> EnumerateStages(EngineParameters parame
                     int minInvokeCount = parameters.TargetJob.ResolveValue(AccuracyMode.MinInvokeCountCharacteristic, parameters.Resolver);
 
                     // AOT technically doesn't have a JIT, but we run jit stage regardless because of static constructors. #2004
-                    var jitStage = new EngineJitStage(evaluateOverhead, parameters);
+                    var jitStage = new EngineJitStage(evaluateOverhead, parameters, skipJitDelays);
                     yield return jitStage;
 
                     bool hasUnrollFactor = parameters.TargetJob.HasValue(RunMode.UnrollFactorCharacteristic);
diff --git a/src/BenchmarkDotNet/Engines/JitListener.cs b/src/BenchmarkDotNet/Engines/JitListener.cs
new file mode 100644
index 0000000000..884cf2d138
--- /dev/null
+++ b/src/BenchmarkDotNet/Engines/JitListener.cs
@@ -0,0 +1,500 @@
+using System.Diagnostics.Tracing;
+using System.Reflection;
+using BenchmarkDotNet.Attributes.CompilerServices;
+using BenchmarkDotNet.Portability;
+
+namespace BenchmarkDotNet.Engines;
+
+// Observes background JIT tier-up of one or more (benchmark) methods by listening to the runtime's JIT events
+// in-process, so the jit stage can proceed as soon as the call tree is actually warmed instead of waiting a fixed
+// delay. The runtime only announces transitions (there is no API to poll a method's current tier), so we must be
+// listening while they happen. A single listener can watch several methods at once — ReachedFinalTier is the aggregate
+// (true only once EVERY watched method has reached its final tier). (Today the stage watches just the benchmark method;
+// watching multiple is in place so scenarios that drive several methods need no contract change. #147)
+//
+// The core signal is JIT QUIESCENCE, not the individual tier-up. A burst tiers up the watched method AND its (untracked)
+// callees on the same background worker; proceeding the instant the watched method publishes tier1 would leave its
+// callees still compiling and race them into the next burst / the following stage. So each tier the stage waits for the
+// background worker to go idle (WaitForQuiescentTierUp): once it's quiet the whole tree reached this tier, and the
+// watched methods' tier counts can be read race-free.
+//
+// The events it watches, and their roles:
+//   * TieredCompilationBackgroundJitStart/Stop (Compilation keyword) bracket the background tiering worker draining its
+//     queue — Start when it begins, Stop when it finishes (Stop's PendingMethodCount payload is how many remain; 0 =
+//     drained). These fire ONLY for actual tiered background work, so they are how we detect quiescence: a burst's
+//     methods and their callees tier up in a train of back-to-back batches, and WaitForQuiescentTierUp waits for the
+//     worker to be idle and STAY idle for a short settle window. A batch that began-and-finished before we looked is not
+//     lost — it already bumped the tier counts, which we only read after observing the worker idle (see MethodLoadVerbose).
+//   * MethodLoadVerbose (per-method, JIT keyword) reports each tier publication and carries the tier. A non-tier0 load
+//     for a watched method bumps its tier-up count (so callers can detect it advanced beyond a given tier) and, when the
+//     tier is a final one, marks it done. We deliberately do NOT use MethodJittingStarted (compile-began): it carries no
+//     tier, so the tier0 compile's start is indistinguishable from a tier-up's and would race the tier0 publish that
+//     filters it.
+//   * TieredCompilationPause/Resume (the tiering delay bracket, Compilation keyword) bound the call-counting delay. Two
+//     roles: (1) a burst issued while the delay is active isn't counted (the counting stub is deferred), so the stage
+//     waits until the delay is observed inactive — a Resume — before bursting (WaitForTieringActive); up front
+//     (WaitForInitialTieringActive) it waits for any method's tier0 JIT or a Pause/Resume to confirm a Resume is coming,
+//     and if none arrives the method was pre-warmed so it fakes the inactive state and proceeds. (2) While the delay is
+//     active the background worker is paused — it won't compile even already-enqueued tier-ups until the Resume — so
+//     "worker idle" during a pause is NOT quiescence. WaitForQuiescentTierUp therefore calls WaitForTieringActive each
+//     loop turn to wait the pause out before timing the idle settle window.
+//
+// The busy/idle state is a ManualResetEventSlim pair; the per-method tier/completion counts are mutated under syncRoot
+// (which OnEventWritten already holds) but declared volatile so the waiters read them lock-free — their ordering comes
+// from draining the in-flight batch first, not from the lock. So the waiters are lock-free (the events handle their own
+// timeouts and cancellation), and WaitForTieringActive composes naturally into the quiescence loop.
+//
+// This is intentionally a per-stage listener: enabling the Jit keyword emits an event for every method jitted
+// process-wide, which we must NOT pay during the measurement stages. It is created at the start of the jit stage
+// and disposed at the end.
+//
+// Create returns null (and the caller falls back to the fixed delay) when the runtime has no tiered JIT, or when
+// EventSource is unavailable — it can be disabled via the System.Diagnostics.Tracing.EventSource.IsSupported feature
+// switch. It otherwise watches each method regardless of whether it looks tier-eligible: a method that can't tier just
+// publishes its single final tier (see the tier constants below), which the stage observes and treats as "done".
+[AggressivelyOptimizeMethods] // Reduce JIT event noise from the listener itself.
+internal sealed class JitListener : EventListener
+{
+    private const string RuntimeEventSourceName = "Microsoft-Windows-DotNETRuntime";
+    private const EventKeywords JitKeyword = (EventKeywords)0x10;
+    // The "Compilation" keyword carries the TieredCompilation/Pause|Resume and BackgroundJit Start/Stop events. Low
+    // volume (a handful per delay/batch cycle), so enabling it adds no meaningful cost.
+    private const EventKeywords CompilationKeyword = (EventKeywords)0x1000000000;
+    private const string TieredCompilationResumeEvent = "TieredCompilationResume";
+    private const string TieredCompilationPauseEvent = "TieredCompilationPause";
+    // The background tiering worker brackets each batch with these: Start when it begins draining its queue, Stop when
+    // it finishes (Stop's PendingMethodCount payload is how many remain — 0 = drained). They are the quiescence signal.
+    private const string TieredCompilationBackgroundJitStartEvent = "TieredCompilationBackgroundJitStart";
+    private const string TieredCompilationBackgroundJitStopEvent = "TieredCompilationBackgroundJitStop";
+    // Event-name prefix (the runtime appends a version suffix, e.g. MethodLoadVerbose_V2).
+    private const string MethodLoadVerbosePrefix = "MethodLoadVerbose";
+
+    // Optimization tier is packed into MethodFlags bits [7..9]: (MethodFlags >> 7) & 0x7.
+    // The initial tier0 quick compile is QuickJitted = 3; the intermediate instrumented (PGO) publication reports
+    // another value and just counts as "a recompilation happened". A method is fully warmed once it reaches one of
+    // the runtime's FINAL tiers — those from which no further tier-up is coming:
+    //   * OptimizedTier1 = 4 — the usual steady state for a tier-eligible method.
+    //   * Optimized = 2 (NativeCodeVersion::OptimizationTierOptimized) — a method compiled straight to optimized code
+    //     without a tier1 promotion: AggressiveOptimization, or a method with a loop when TC_QuickJitForLoops is off.
+    //   * MinOptJitted = 1 — a method that never tiers at all: NoOptimization, or any method in an
+    //     optimization-disabled assembly. This is its first and only compile.
+    // Since Create now watches every method (not just ones that look tier-eligible), a non-tiering method publishes
+    // exactly one of MinOptJitted/Optimized and we recognize it as final immediately, rather than predicting it from
+    // attributes. OptimizedTier1OSR = 5 is special: an on-stack-replacement of a still-running body with a hot loop.
+    // It fires off the loop's back-edge counter, NOT off the call-count threshold, so unlike every other tier it is
+    // never the method's active entry-point code version and is never call-counted — it's orthogonal to the
+    // call-count tier ladder the stage drives, and a watched method that OSRs in both its tier0 and instrumented
+    // bodies emits two of them on the way to its final tier. We therefore ignore OSR publications for our method (see
+    // HandleMethodLoad) so they don't inflate its tier count and stall the stage short of the final tier.
+    private const int OptimizationTierShift = 7;
+    private const int OptimizationTierMask = 0x7;
+    private const int MinOptJitted = 1;
+    private const int Optimized = 2;
+    private const int QuickJittedTier0 = 3;
+    private const int OptimizedTier1 = 4;
+    private const int OptimizedTier1OSR = 5;
+
+    // Margin added on top of the call-counting delay when waiting for a TieredCompilationResume, before assuming it
+    // was dropped (EventPipe sheds events under buffer pressure) and proceeding as if the delay had elapsed. We add it
+    // to TieredDelay rather than use a flat cap so a deliberately huge delay can't make the cap shorter than the delay
+    // itself. Generous vs the ~100ms default delay, so it only ever fires on an actual drop, not on the normal path.
+    private static readonly TimeSpan TieringActiveTimeoutMargin = TimeSpan.FromSeconds(1);
+
+    // How long the background worker must stay idle (no new batch) for us to declare quiescence. A burst's methods and
+    // their callees tier up in a TRAIN of back-to-back background batches (a few tens of ms apart), so the window has to
+    // bridge those gaps and only conclude "quiet" once a full window passes with no new batch. 30ms comfortably spans
+    // the inter-batch gap while keeping the per-tier settle cost small.
+    private static readonly TimeSpan QuiescenceSettleWindow = TimeSpan.FromMilliseconds(30);
+    // How long to wait for an observed-busy background JIT batch to drain before assuming its BackgroundJitStop was
+    // dropped (EventPipe sheds events under pressure) and proceeding. Generous — it only bites on a dropped Stop, and a
+    // large compile queue can legitimately take a while; leaving "busy" stuck would poison every later quiescence check.
+    private static readonly TimeSpan BackgroundJitDrainTimeout = TimeSpan.FromSeconds(10);
+
+    // One entry per watched method. Small (a handful at most), so HandleMethodLoad scans it linearly per publication.
+    private readonly WatchedMethod[] watchedMethods;
+    private readonly object syncRoot = new();
+    private readonly ManualResetEventSlim tieringActiveSignal = new(false);
+    private readonly ManualResetEventSlim tieringActivePrimedSignal = new(false);
+    // The background tiering worker's busy/idle state (Start..Stop-with-0-pending). Kept as a paired flip-flop so a
+    // reader never sees both set at once. Set/Reset under syncRoot.
+    private readonly ManualResetEventSlim backgroundJitBusySignal = new(false);
+    private readonly ManualResetEventSlim backgroundJitIdleSignal = new(true);
+
+    // Number of watched methods that have reached a final tier (guarded by syncRoot). reachedFinalTier mirrors
+    // "finalTierCount == watchedMethods.Length" for a lock-free read; both flip together once every method is done.
+    private int finalTierCount;
+    private volatile bool reachedFinalTier;
+    private volatile bool canObserve;
+    private bool disposed;
+
+    // Cached payload indices (field order is stable within a process for a given event version).
+    private int loadTokenIndex = -1;
+    private int loadFlagsIndex = -1;
+    private int loadNameIndex = -1;
+    private int backgroundJitStopPendingIndex = -1;
+
+    private JitListener(WatchedMethod[] methods)
+    {
+        // NOTE: the base EventListener ctor calls OnEventSourceCreated before this field is set, but that callback only
+        // enables events / probes canObserve and never reads watchedMethods.
+        watchedMethods = methods;
+    }
+
+    // Watches every method in the collection. Returns null — so the caller falls back to the fixed delay — when there
+    // is nothing to watch, the runtime has no tiered JIT, or EventSource is unavailable.
+    internal static JitListener? Create(IEnumerable<MethodInfo> methods)
+    {
+        if (!JitInfo.IsTiered)
+        {
+            return null;
+        }
+        var watched = methods.Select(m => new WatchedMethod(m)).ToArray();
+        if (watched.Length == 0)
+        {
+            return null;
+        }
+        var listener = new JitListener(watched);
+        if (!listener.canObserve)
+        {
+            listener.Dispose();
+            return null;
+        }
+        return listener;
+    }
+
+    // True only once EVERY watched method has reached a final tier.
+    internal bool ReachedFinalTier => reachedFinalTier;
+
+    // Waits until the call-counting delay is observed inactive (a TieredCompilationResume), so the stage's first burst
+    // will be counted. It first waits up to a timeout for any sign the tiering machinery is active — a tier0 (QuickJitted)
+    // publication for ANY method, or a TieredCompilation Pause/Resume — which guarantees a Resume is coming to gate on.
+    // (The stage calls this AFTER its first invoke, so a freshly-tier0 watched method has already fired its Pause.) If
+    // nothing arrives within the timeout, tiering is quiet: the watched method was pre-warmed past tier0, its stub is
+    // already installed, and no delay is coming on its own — so we fake the active state and proceed. The lock + IsSet
+    // re-check makes that fake atomic against a real event landing right at the timeout boundary (the event handlers
+    // take the same lock), so we never overwrite one; and we wait OUTSIDE the lock so the handlers never block on us.
+    internal void WaitForInitialTieringActive(CancellationToken cancellationToken)
+    {
+        // No call-counting delay (e.g. AggressiveTiering) — counting is armed immediately, nothing to gate on.
+        if (JitInfo.TieredDelay <= TimeSpan.Zero)
+        {
+            return;
+        }
+        if (!tieringActivePrimedSignal.Wait(JitInfo.TieredDelay + TimeSpan.FromMilliseconds(50), cancellationToken))
+        {
+            lock (syncRoot)
+            {
+                if (!tieringActivePrimedSignal.IsSet)
+                {
+                    tieringActivePrimedSignal.Set();
+                    tieringActiveSignal.Set();
+                }
+            }
+        }
+        WaitForTieringActive(cancellationToken);
+    }
+
+    // Waits until the call-counting delay is inactive (a TieredCompilationResume was observed). Re-gates each burst in
+    // the tier loop after WaitForInitialTieringActive established the delay was inactive up front. Bounded: a Resume can
+    // be dropped by EventPipe under buffer pressure, so rather than block forever we wait up to TieredDelay plus a margin and
+    // then assume the delay elapsed (stubs installed) and proceed — the same fallback WaitForInitialTieringActive uses.
+    // The cap only bites on a dropped event; a real Resume normally arrives within the call-counting delay (~100ms).
+    internal void WaitForTieringActive(CancellationToken cancellationToken)
+    {
+        // No call-counting delay (e.g. AggressiveTiering) — counting is armed immediately, nothing to gate on.
+        if (JitInfo.TieredDelay <= TimeSpan.Zero)
+        {
+            return;
+        }
+        if (!tieringActiveSignal.Wait(JitInfo.TieredDelay + TieringActiveTimeoutMargin, cancellationToken))
+        {
+            // The primed signal is already set (WaitForInitialTieringActive ran first), so only flip the active
+            // signal. Lock so this can't interleave with a concurrent Pause/Resume handler.
+            lock (syncRoot)
+            {
+                tieringActiveSignal.Set();
+            }
+        }
+    }
+
+    // Waits for the background tiering worker to go quiet, then reports whether every still-tiering watched method has
+    // advanced beyond `previousTierCounter` tier-ups (or already reached its final tier). Quiescence is what makes
+    // proceeding safe: when the worker is idle, this tier's compiles (the watched method(s) AND their untracked callees,
+    // which tier up in a train of back-to-back batches) have all landed, so the next burst / the following stage won't
+    // race them. We wait for the worker to be idle and STAY idle for a settle window — a new batch within the window (a
+    // callee tiering up, or the watched-method batch starting after a slow enqueue) wakes us to drain it and re-settle;
+    // a full window with no new batch means the tree is warm. We read the (volatile) tier counts only AFTER observing
+    // the worker idle, so a batch that started-and-finished before we looked has already bumped them. (When the caller
+    // has stopped observing the watched methods it ignores the result and just uses this to drain untracked callees.)
+    internal bool WaitForQuiescentTierUp(int previousTierCounter, CancellationToken cancellationToken)
+    {
+        while (true)
+        {
+            // Wait out any tiering pause first: while paused, the worker won't compile even already-enqueued methods, so
+            // "idle" isn't quiescent — WaitForTieringActive returns once counting is active again (the delay elapsed /
+            // a Resume was observed). In the common case (not paused) it returns immediately.
+            WaitForTieringActive(cancellationToken);
+            // Wait the settle window for the worker to (re)start a batch. A burst's methods and their callees tier up in
+            // a train of back-to-back batches, so if one starts we drain it and re-check; if none starts within the
+            // window the tree has settled.
+            if (!WaitForBackgroundJitBusy(QuiescenceSettleWindow, cancellationToken))
+            {
+                return AllAdvanced(previousTierCounter);
+            }
+            WaitForBackgroundJitIdle(BackgroundJitDrainTimeout, cancellationToken);
+        }
+    }
+
+    // Waits up to the timeout for every still-tiering watched method to advance beyond `previousTierCounter`, WITHOUT
+    // waiting out a full settle window. Used to cheaply detect a tier-up while nudging one call at a time: it returns
+    // immediately if they've already advanced, otherwise waits out any pause and gives the worker a short window
+    // (jitBusyTimeout) to pick the nudge up, draining it if it does. The caller does a final WaitForQuiescentTierUp
+    // afterwards to settle callees. True if all advanced; false otherwise.
+    internal bool WaitForTierUp(int previousTierCounter, TimeSpan jitBusyTimeout, CancellationToken cancellationToken)
+    {
+        if (AllAdvanced(previousTierCounter))
+        {
+            return true;
+        }
+        WaitForTieringActive(cancellationToken);
+        if (WaitForBackgroundJitBusy(jitBusyTimeout, cancellationToken))
+        {
+            WaitForBackgroundJitIdle(BackgroundJitDrainTimeout, cancellationToken);
+        }
+        return AllAdvanced(previousTierCounter);
+    }
+
+    // Waits up to the timeout for the background tiering worker to be running a batch. True if it is/becomes busy.
+    private bool WaitForBackgroundJitBusy(TimeSpan timeout, CancellationToken cancellationToken)
+        => backgroundJitBusySignal.Wait(timeout, cancellationToken);
+
+    // Waits (up to the timeout) for the background tiering worker to go idle (its queue drained — a BackgroundJitStop
+    // with PendingMethodCount == 0). The caller only waits after observing the worker busy, and a running batch always
+    // finishes — but the BackgroundJitStop can be dropped by EventPipe under buffer pressure (most likely right here,
+    // since a busy drain floods the same buffers with MethodLoadVerbose). So on timeout we force the idle state and
+    // proceed: leaving "busy" stuck would poison every later quiescence check. The cap only bites on a dropped Stop.
+    private void WaitForBackgroundJitIdle(TimeSpan timeout, CancellationToken cancellationToken)
+    {
+        if (!backgroundJitIdleSignal.Wait(timeout, cancellationToken))
+        {
+            lock (syncRoot)
+            {
+                // Reset busy before setting idle so a reader never sees both set at once (matches HandleBackgroundJitStop).
+                backgroundJitBusySignal.Reset();
+                backgroundJitIdleSignal.Set();
+            }
+        }
+    }
+
+    // Whether every watched method either advanced its tier count or already reached its final tier.
+    private bool AllAdvanced(int previousTierCount)
+    {
+        for (int i = 0; i < watchedMethods.Length; i++)
+        {
+            var method = watchedMethods[i];
+            if (!method.reachedFinalTier && method.tierUpCount <= previousTierCount)
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    protected override void OnEventSourceCreated(EventSource source)
+    {
+        if (source.Name == RuntimeEventSourceName)
+        {
+            EnableEvents(source, EventLevel.Verbose, JitKeyword | CompilationKeyword);
+            // IsEnabled is true only when EventSource is supported AND the enable actually took effect
+            // at the level/keyword we need.
+            canObserve = source.IsEnabled(EventLevel.Verbose, JitKeyword | CompilationKeyword);
+        }
+    }
+
+    protected override void OnEventWritten(EventWrittenEventArgs e)
+    {
+        if (!canObserve)
+            return;
+        string? name = e.EventName;
+        if (name is null)
+            return;
+
+        // The runtime brackets the call-counting delay with these: Pause when a new tier0 method's first call
+        // (re)starts the delay, Resume when it elapses and the whole pending list of counting stubs is installed.
+        // tieringActiveSignal is the flip-flop the burst gate waits on (Set on Resume = delay inactive = stubs live,
+        // Reset on Pause); tieringActivePrimedSignal just records that some delay activity occurred (set by either).
+        if (name == TieredCompilationResumeEvent)
+        {
+            lock (syncRoot)
+            {
+                if (disposed)
+                    return;
+                tieringActiveSignal.Set();
+                tieringActivePrimedSignal.Set();
+            }
+            return;
+        }
+        if (name == TieredCompilationPauseEvent)
+        {
+            lock (syncRoot)
+            {
+                if (disposed)
+                    return;
+                tieringActiveSignal.Reset();
+                tieringActivePrimedSignal.Set();
+            }
+            return;
+        }
+        if (name == TieredCompilationBackgroundJitStartEvent)
+        {
+            lock (syncRoot)
+            {
+                if (disposed)
+                    return;
+                // The worker began a batch (the watched method(s) and/or untracked callees tiering up). Reset idle
+                // before setting busy so a reader never sees both set at once.
+                backgroundJitIdleSignal.Reset();
+                backgroundJitBusySignal.Set();
+            }
+            return;
+        }
+        if (name == TieredCompilationBackgroundJitStopEvent)
+        {
+            HandleBackgroundJitStop(e);
+            return;
+        }
+
+        if (name.StartsWith(MethodLoadVerbosePrefix, StringComparison.Ordinal))
+        {
+            HandleMethodLoad(e);
+        }
+    }
+
+    private void HandleBackgroundJitStop(EventWrittenEventArgs e)
+    {
+        var payloadNames = e.PayloadNames;
+        var payload = e.Payload;
+        if (payloadNames is null || payload is null)
+            return;
+
+        if (backgroundJitStopPendingIndex < 0)
+        {
+            backgroundJitStopPendingIndex = payloadNames.IndexOf("PendingMethodCount");
+            if (backgroundJitStopPendingIndex < 0)
+                return;
+        }
+
+        // The worker stopped; once nothing is left queued the batch is fully drained and the JIT is idle.
+        if (Convert.ToInt64(payload[backgroundJitStopPendingIndex]) == 0)
+        {
+            lock (syncRoot)
+            {
+                if (disposed)
+                    return;
+                // Reset busy before setting idle so a reader never sees both set at once.
+                backgroundJitBusySignal.Reset();
+                backgroundJitIdleSignal.Set();
+            }
+        }
+    }
+
+    private void HandleMethodLoad(EventWrittenEventArgs e)
+    {
+        var payloadNames = e.PayloadNames;
+        var payload = e.Payload;
+        if (payloadNames is null || payload is null)
+            return;
+
+        if (loadTokenIndex < 0)
+        {
+            loadTokenIndex = payloadNames.IndexOf("MethodToken");
+            loadFlagsIndex = payloadNames.IndexOf("MethodFlags");
+            loadNameIndex = payloadNames.IndexOf("MethodName");
+            if (loadTokenIndex < 0 || loadFlagsIndex < 0 || loadNameIndex < 0)
+                return;
+        }
+
+        long tier = (Convert.ToInt64(payload[loadFlagsIndex]) >> OptimizationTierShift) & OptimizationTierMask;
+
+        // A QuickJitted (tier0) publication — for ANY method, not just the one we watch — means an eligible method was
+        // just tier0-compiled and is about to run, so its first call will start or join the call-counting delay and a
+        // TieredCompilationResume is coming. That is exactly (and all) the up-front gate (WaitForInitialTieringActive)
+        // needs: it only asks "is the tiering machinery active, so a Resume will arrive to gate on?", which is a
+        // process-wide question. (Pause/Resume prime it too; this also covers the brief window before the first call
+        // fires Pause.) The tier0 compile itself is the baseline, not a tier-up, so we never count it.
+        if (tier == QuickJittedTier0)
+        {
+            lock (syncRoot)
+            {
+                if (disposed)
+                    return;
+                tieringActivePrimedSignal.Set();
+            }
+            return;
+        }
+
+        // An OSR publication is not a step on the call-count tier ladder (it fires off a hot loop's back-edge counter,
+        // and the method goes on to be call-count-promoted past it), so don't let it count as a tier-up — otherwise a
+        // method that OSRs in multiple bodies overruns its tier count and the stage stops short of the final tier.
+        if (tier == OptimizedTier1OSR)
+            return;
+
+        // Everything below concerns one of OUR watched methods reaching its next tier, so find the matching one (if any).
+        int token = Convert.ToInt32(payload[loadTokenIndex]);
+        string? name = payload[loadNameIndex] as string;
+        WatchedMethod? matched = null;
+        foreach (var candidate in watchedMethods)
+        {
+            if (candidate.MetadataToken == token && candidate.Name == name)
+            {
+                matched = candidate;
+                break;
+            }
+        }
+        if (matched is null)
+            return;
+
+        // Any of the runtime's final tiers means the method is fully warmed and will emit no further tier-ups —
+        // whether it tiered all the way up (OptimizedTier1), was compiled straight to optimized code (Optimized), or
+        // never tiers at all (MinOptJitted).
+        bool isFinalTier = tier == OptimizedTier1 || tier == Optimized || tier == MinOptJitted;
+
+        lock (syncRoot)
+        {
+            if (disposed)
+                return;
+            // Count this tier-up so callers can detect the method advanced beyond a given tier.
+            matched.tierUpCount++;
+            // Track per-method completion so reachedFinalTier flips only once the LAST watched method is done.
+            if (isFinalTier && !matched.reachedFinalTier)
+            {
+                matched.reachedFinalTier = true;
+                if (++finalTierCount == watchedMethods.Length)
+                    reachedFinalTier = true;
+            }
+        }
+    }
+
+    private sealed class WatchedMethod(MethodInfo method)
+    {
+        internal volatile int tierUpCount;
+        internal volatile bool reachedFinalTier;
+
+        internal int MetadataToken => method.MetadataToken;
+        internal string Name => method.Name;
+    }
+
+    public override void Dispose()
+    {
+        lock (syncRoot)
+        {
+            disposed = true;
+        }
+        // base.Dispose disables the events we enabled (when no other listener wants them).
+        base.Dispose();
+        tieringActivePrimedSignal.Dispose();
+        tieringActiveSignal.Dispose();
+        backgroundJitBusySignal.Dispose();
+        backgroundJitIdleSignal.Dispose();
+    }
+}
diff --git a/src/BenchmarkDotNet/Portability/JitInfo.cs b/src/BenchmarkDotNet/Portability/JitInfo.cs
index 103dc6881a..3a85d28bcc 100644
--- a/src/BenchmarkDotNet/Portability/JitInfo.cs
+++ b/src/BenchmarkDotNet/Portability/JitInfo.cs
@@ -180,7 +180,7 @@ private static TimeSpan GetTieredDelay()
     /// </summary>
     public static readonly TimeSpan BackgroundCompilationDelay =
         IsTiered
-            // It's impossible for us to know exactly how long to wait without hooking into JIT notifications (which we can't do in-process).
+            // It's impossible for us to know exactly how long to wait without hooking into JIT notifications.
             // 100ms should be enough most of the time, but we bump it up to 250ms for higher confidence.
             // When https://github.com/dotnet/runtime/issues/101868 is resolved, if AggressiveTiering is enabled, we can skip the wait time and return TimeSpan.Zero.
             ? TimeSpan.FromMilliseconds(250)
diff --git a/src/BenchmarkDotNet/Templates/BenchmarkType.txt b/src/BenchmarkDotNet/Templates/BenchmarkType.txt
index 53d97e831b..ac9d89c84c 100644
--- a/src/BenchmarkDotNet/Templates/BenchmarkType.txt
+++ b/src/BenchmarkDotNet/Templates/BenchmarkType.txt
@@ -39,6 +39,7 @@
             global::BenchmarkDotNet.Engines.EngineParameters engineParameters = new global::BenchmarkDotNet.Engines.EngineParameters()
             {
                 Host = host,
+                WorkloadMethods = instance.__ResolveWorkloadMethods(host),
                 WorkloadActionUnroll = instance.WorkloadActionUnroll,
                 WorkloadActionNoUnroll = instance.WorkloadActionNoUnroll,
                 OverheadActionNoUnroll = instance.OverheadActionNoUnroll,
@@ -71,6 +72,51 @@
 
         $DeclareFieldsContainer$
 
+        private global::System.Reflection.MethodInfo[] __ResolveWorkloadMethods(global::BenchmarkDotNet.Engines.IHost host)
+        {
+            // Best-effort: the jit stage uses the resolved method(s) to watch their JIT tier-up events, and falls back
+            // to a fixed delay when none are resolved. So neither a missed match nor a reflection failure (e.g. a
+            // same-named overload's parameter type fails to load) may break the benchmark — report and return empty.
+            try
+            {
+                global::System.Type[] parameterTypes = $WorkloadMethodParameterTypes$;
+                foreach (global::System.Reflection.MethodInfo candidate in typeof($WorkloadTypeName$).GetMethods(
+                    global::System.Reflection.BindingFlags.Instance | global::System.Reflection.BindingFlags.Static |
+                    global::System.Reflection.BindingFlags.Public | global::System.Reflection.BindingFlags.NonPublic))
+                {
+                    if (candidate.Name != "$WorkloadMethodName$")
+                    {
+                        continue;
+                    }
+                    global::System.Reflection.ParameterInfo[] parameters = candidate.GetParameters();
+                    if (parameters.Length != parameterTypes.Length)
+                    {
+                        continue;
+                    }
+                    global::System.Boolean isMatch = true;
+                    for (global::System.Int32 i = 0; i < parameters.Length; i++)
+                    {
+                        if (parameters[i].ParameterType != parameterTypes[i])
+                        {
+                            isMatch = false;
+                            break;
+                        }
+                    }
+                    if (isMatch)
+                    {
+                        return new global::System.Reflection.MethodInfo[] { candidate };
+                    }
+                }
+            }
+            catch (global::System.Exception e)
+            {
+                host.SendError($"Exception during __ResolveWorkloadMethods!{(global::System.Environment.NewLine)}{e}");
+                return global::System.Array.Empty<global::System.Reflection.MethodInfo>();
+            }
+            host.WriteLine("// Could not resolve the benchmark method '$WorkloadMethodName$' to watch JIT tier-up events; the jit stage will fall back to a fixed delay.");
+            return global::System.Array.Empty<global::System.Reflection.MethodInfo>();
+        }
+
         private $GlobalSetupModifiers$ global::System.Threading.Tasks.ValueTask __GlobalSetup()
         {
             $GlobalSetupImpl$
diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/AsyncStateMachineEmitter.cs b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/AsyncStateMachineEmitter.cs
index 81519664d6..7646bddf84 100644
--- a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/AsyncStateMachineEmitter.cs
+++ b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/AsyncStateMachineEmitter.cs
@@ -14,16 +14,16 @@ partial class RunnableEmitter
     // This doesn't really matter for the runtime, but it helps with the NaiveRunnableEmitDiff tests.
     protected virtual IReadOnlyDictionary<string, int> AsyncMethodToOrdinalMap { get; } = new Dictionary<string, int>
     {
-        { GlobalSetupMethodName, 4 },
-        { GlobalCleanupMethodName, 5 },
-        { IterationSetupMethodName, 6 },
-        { IterationCleanupMethodName, 7 },
-        { OverheadActionUnrollMethodName, 11 },
-        { OverheadActionNoUnrollMethodName, 12 },
-        { WorkloadActionUnrollMethodName, 13 },
-        { WorkloadActionNoUnrollMethodName, 14 },
-        { StartWorkloadMethodName, 15 },
-        { WorkloadCoreMethodName, 16 },
+        { GlobalSetupMethodName, 5 },
+        { GlobalCleanupMethodName, 6 },
+        { IterationSetupMethodName, 7 },
+        { IterationCleanupMethodName, 8 },
+        { OverheadActionUnrollMethodName, 12 },
+        { OverheadActionNoUnrollMethodName, 13 },
+        { WorkloadActionUnrollMethodName, 14 },
+        { WorkloadActionNoUnrollMethodName, 15 },
+        { StartWorkloadMethodName, 16 },
+        { WorkloadCoreMethodName, 17 },
     };
 
     private record struct AsyncStateMachineFields(FieldInfo StateField, FieldInfo BuilderField, FieldInfo? ThisField);
diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/SyncTaskCoreEmitter.cs b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/SyncTaskCoreEmitter.cs
index b9a338b07f..1e4350f3d6 100644
--- a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/SyncTaskCoreEmitter.cs
+++ b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/Implementation/Emitters/SyncTaskCoreEmitter.cs
@@ -25,10 +25,10 @@ protected override IReadOnlyDictionary<string, int> AsyncMethodToOrdinalMap
                 ? base.AsyncMethodToOrdinalMap
                 : new Dictionary<string, int>
                 {
-                    { GlobalSetupMethodName, 2 },
-                    { GlobalCleanupMethodName, 3 },
-                    { IterationSetupMethodName, 4 },
-                    { IterationCleanupMethodName, 5 },
+                    { GlobalSetupMethodName, 3 },
+                    { GlobalCleanupMethodName, 4 },
+                    { IterationSetupMethodName, 5 },
+                    { IterationCleanupMethodName, 6 },
                 };
 
         protected override void EmitExtraGlobalCleanup(ILGenerator ilBuilder, LocalBuilder? thisLocal) { }
diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/InProcessEmitRunner.cs b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/InProcessEmitRunner.cs
index 4813b111e4..37a1e04973 100644
--- a/src/BenchmarkDotNet/Toolchains/InProcess/Emit/InProcessEmitRunner.cs
+++ b/src/BenchmarkDotNet/Toolchains/InProcess/Emit/InProcessEmitRunner.cs
@@ -92,6 +92,7 @@ private static async ValueTask RunCore(Type runnableType, IHost host, ExecutePar
         var engineParameters = new EngineParameters()
         {
             Host = host,
+            WorkloadMethods = [benchmarkCase.Descriptor.WorkloadMethod],
             WorkloadActionUnroll = LoopCallbackFromMethod(instance, WorkloadActionUnrollMethodName),
             WorkloadActionNoUnroll = LoopCallbackFromMethod(instance, WorkloadActionNoUnrollMethodName),
             OverheadActionNoUnroll = LoopCallbackFromMethod(instance, OverheadActionNoUnrollMethodName),
diff --git a/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs b/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs
index a0f3b1f78d..d124b765e0 100644
--- a/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs
+++ b/src/BenchmarkDotNet/Toolchains/InProcess/NoEmit/InProcessNoEmitRunner.cs
@@ -179,6 +179,7 @@ public static async ValueTask RunCore(IHost host, ExecuteParameters parameters,
                 var engineParameters = new EngineParameters
                 {
                     Host = host,
+                    WorkloadMethods = [target.WorkloadMethod],
                     WorkloadActionNoUnroll = workloadAction.InvokeNoUnroll,
                     WorkloadActionUnroll = workloadAction.InvokeUnroll,
                     OverheadActionNoUnroll = overheadAction.InvokeNoUnroll,
diff --git a/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj b/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj
index 89939ea2fb..aabd80ffeb 100644
--- a/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj
+++ b/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj
@@ -23,8 +23,6 @@
       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     </Content>
     <None Include="wwwroot\**" CopyToOutputDirectory="PreserveNewest" />
-    <!-- Disable EventSource to stabilize MemoryDiagnoserTests. https://github.com/dotnet/BenchmarkDotNet/pull/2562#issuecomment-2081317379 -->
-    <RuntimeHostConfigurationOption Include="System.Diagnostics.Tracing.EventSource.IsSupported" Value="false" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly\BenchmarkDotNet.IntegrationTests.ConfigPerAssembly.csproj" />
diff --git a/tests/BenchmarkDotNet.IntegrationTests/InProcess.EmitTests/NaiveRunnableEmitDiff.cs b/tests/BenchmarkDotNet.IntegrationTests/InProcess.EmitTests/NaiveRunnableEmitDiff.cs
index 2ee80f55bc..8cea409606 100644
--- a/tests/BenchmarkDotNet.IntegrationTests/InProcess.EmitTests/NaiveRunnableEmitDiff.cs
+++ b/tests/BenchmarkDotNet.IntegrationTests/InProcess.EmitTests/NaiveRunnableEmitDiff.cs
@@ -27,7 +27,8 @@ public class NaiveRunnableEmitDiff
         private static readonly HashSet<string> IgnoredRunnableMethodNames =
         [
             "Run",
-            ".ctor"
+            ".ctor",
+            "__ResolveWorkloadMethods"
         ];
 
         private static readonly IReadOnlyDictionary<OpCode, OpCode> AltOpCodes = new Dictionary<OpCode, OpCode>()
diff --git a/tests/BenchmarkDotNet.IntegrationTests/JitListenerTests.cs b/tests/BenchmarkDotNet.IntegrationTests/JitListenerTests.cs
new file mode 100644
index 0000000000..d7e3b6ac59
--- /dev/null
+++ b/tests/BenchmarkDotNet.IntegrationTests/JitListenerTests.cs
@@ -0,0 +1,320 @@
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using BenchmarkDotNet.Engines;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Portability;
+using BenchmarkDotNet.Reports;
+using BenchmarkDotNet.Tests.XUnit;
+using Perfolizer.Horology;
+
+namespace BenchmarkDotNet.IntegrationTests;
+
+public class JitListenerTests
+{
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_Cold()
+    {
+        Func<long, long> workloadMethod = Cold;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Tests the case of InProcess benchmarking the same method multiple times.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_AlreadyTier1()
+    {
+        Func<long, long> workloadMethod = AlreadyTier1;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        // The first jit stage brings the method to tier1 (in an optimized build) and our observer records it. Running
+        // the jit stage again for the same (now tier1) method should also succeed; it gets a fresh listener, because
+        // the stage drove the first to completion and reusing one across runs would leave its tiering signals ambiguous.
+        RunJitStageToCompletion(workloadMethod, observer);
+        using var observer2 = JitListener.Create([workloadMethod.Method]);
+        RunJitStageToCompletion(workloadMethod, observer2);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Tests the case of InProcess benchmarking a method that the user already invoked before starting the benchmarks when call counting is active.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_AlreadyTier0()
+    {
+        Func<long, long> workloadMethod = AlreadyTier0;
+        // Watch from before the pre-invoke, and hand this listener to the stage so it doesn't create a second one
+        // (see RunJitStageToCompletion): in a minopt build the pre-invoke is the method's only compile.
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        DeadCodeEliminationHelper.KeepAliveWithoutBoxing(AlreadyTier0(42));
+        // Sleep long enough for the tiered call counting to begin.
+        Engine.SleepIfPositive(JitInfo.TieredDelay + JitInfo.TieredDelay);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Tests the case of InProcess benchmarking a method that the user already invoked before starting the benchmarks when call counting is delayed.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_AlreadyTier0DelayedCallCounting()
+    {
+        Func<long, long> workloadMethod = AlreadyTier0DelayedCallCounting;
+        // Watch from before the pre-invoke, and hand this listener to the stage (see RunJitStageToCompletion). We do
+        // NOT sleep first: this test's whole point is that the call-counting delay is still pending when the stage
+        // starts. Because the stage reuses this one listener, the pre-invoke's event is never lost to a second
+        // listener's session churn, so no wait is needed to observe the final tier.
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        DeadCodeEliminationHelper.KeepAliveWithoutBoxing(AlreadyTier0DelayedCallCounting(42));
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Tests a benchmark method whose own hot loop is On-Stack-Replaced (OSR) mid-execution. Where OSR is enabled
+    // (by default in .NET 7+) this drives the method through an OSR publication on top of its normal tier-ups, and the
+    // stage must still reach OptimizedTier1 — JitInfo.MaxTierPromotions reserves an extra promotion for the OSR-induced
+    // double tier0-instrumentation. Where OSR is off it is simply a hot-loop method that tiers up normally; either way
+    // it ends at tier1.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_Osr()
+    {
+        Func<long, long> workloadMethod = Osr;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Tests a benchmark method that calls (without inlining) a separate method whose hot loop is OSR'd. The listener
+    // only watches the benchmark method, never the callee, so it can't observe the callee's tiering at all — this
+    // exercises the runtime bug where an OSR'd callee gets tier0-instrumented twice (JitInfo.MaxTierPromotions reserves
+    // the extra promotion the stage spends on it). The benchmark method itself must still be driven to OptimizedTier1.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_CallsOsr()
+    {
+        Func<long, long> workloadMethod = CallsOsr;
+        Func<long, long> calleeMethod = OsrCallee;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+        // The stage only drives (and the engine's listener only watches) the benchmark method, but every call to it
+        // calls the OSR'd callee, so the callee should be driven all the way to tier1 too. Watch it independently.
+        using var calleeObserver = JitListener.Create([calleeMethod.Method]);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+        AssertReachedFinalTier(calleeObserver);
+    }
+
+    // A method pinned to a single optimization level never tiers, but the listener still watches it and recognizes
+    // its one-and-only compile as a final tier (MinOptJitted or Optimized) — so the stage observes "done" rather than
+    // depending on an attribute heuristic to decline up front.
+
+    // [MethodImpl(NoOptimization)] pins the method to minopts, so its final tier is MinOptJitted.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_NoOptimization()
+    {
+        Func<long, long> workloadMethod = NoOptimization;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // [MethodImpl(AggressiveOptimization)] pins the method straight to optimized code, so it never goes through
+    // tier0 -> tier1 and its final tier is Optimized (or OptimizedTier1, depending on runtime).
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_AggressiveOptimization()
+    {
+        Func<long, long> workloadMethod = AggressiveOptimization;
+
+        using var observer = JitListener.Create([workloadMethod.Method]);
+
+        RunJitStageToCompletion(workloadMethod, observer);
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // A single listener can watch several methods at once. Here one listener watches two distinct methods, and the
+    // stage drives both (the workload action calls each per invocation). ReachedFinalTier is the aggregate, so it only
+    // becomes true once BOTH have reached their final tier — exercising the multi-method path that scenarios like
+    // dotnet/BenchmarkDotNet#147 (driving several methods) will rely on.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_MultipleMethods()
+    {
+        Func<long, long> first = MultiFirst;
+        Func<long, long> second = MultiSecond;
+
+        using var observer = JitListener.Create([first.Method, second.Method]);
+
+        RunJitStageToCompletion(observer, [first.Method, second.Method], i => { first(i); second(i); });
+
+        AssertReachedFinalTier(observer);
+    }
+
+    // Watched methods need not be invoked at the same rate (e.g. dotnet/BenchmarkDotNet#147 may call one more often
+    // than another). Here the "fast" method is invoked twice per iteration and the "slow" one once, so the fast method
+    // crosses its call-count thresholds — and thus tiers up — sooner. Each method banks its own publication permits, so
+    // the fast method's extra/earlier promotions aren't dropped while the slow one catches up; both must still reach
+    // their final tier.
+    [FactEnvSpecific("Only CoreCLR supports tiered JIT", EnvRequirement.DotNetCoreOnly)]
+    public void JitStage_MultipleMethodsUnevenInvocationRates()
+    {
+        Func<long, long> fast = MultiUnevenFast;
+        Func<long, long> slow = MultiUnevenSlow;
+
+        using var observer = JitListener.Create([fast.Method, slow.Method]);
+
+        int invokeCount = 0;
+        int tieredCount = 0;
+        RunJitStageToCompletion(observer, [fast.Method, slow.Method], i =>
+        {
+            fast(i);
+            fast(i);
+            fast(i);
+            if (++tieredCount > 0 && tieredCount <= JitInfo.MaxTierPromotions && ++invokeCount * 3 > JitInfo.TieredCallCountThreshold)
+            {
+                Thread.Sleep(200); // Sleep to let the JIT compile the fast method before the slow method.
+                invokeCount = 0;
+            }
+            slow(i);
+        });
+
+        AssertReachedFinalTier(observer);
+    }
+
+    private static void AssertReachedFinalTier(JitListener? observer)
+    {
+        // No wait needed: the tier-up event is delivered while the stage is still running (it spans hundreds of ms of
+        // tiering delays), so by the time the stage returns the observer has already recorded the final tier.
+        Assert.NotNull(observer);
+        Assert.True(observer.ReachedFinalTier, "the jit stage should have driven the benchmark method to its final tier");
+    }
+
+    // The test owns the listener and passes it in; the stage uses that exact instance (it never creates its own), so
+    // there is never a second EventListener whose setup/teardown could flush an event in flight to the test's listener.
+    private static void RunJitStageToCompletion(Func<long, long> workloadMethod, JitListener? listener)
+        => RunJitStageToCompletion(listener, [workloadMethod.Method], i => workloadMethod(i));
+
+    // Core harness: the stage watches/drives the given workloadMethods, and each iteration runs invokeOnce (which the
+    // caller wires to actually call those methods) invokeCount times so they go through call counting and tier up.
+    private static void RunJitStageToCompletion(JitListener? listener, MethodInfo[] workloadMethods, Action<long> invokeOnce)
+    {
+        // The per-tier publication wait is unbounded, cancellable only via the host's token. When the method tiers, the
+        // stage re-bursts until the runtime reports the next-tier compile began, so the wait isn't actually hit — but a
+        // large timeout guards against a hang if tiering somehow stalls, instead of wedging the whole test run.
+        using var timeout = new CancellationTokenSource(TimeSpan.FromSeconds(60));
+        var host = new CancellableHost(timeout.Token);
+        Func<long, IClock, ValueTask<ClockSpan>> empty = (_, _) => new(default(ClockSpan));
+        Func<long, IClock, ValueTask<ClockSpan>> workload = (invokeCount, _) =>
+        {
+            // Really invoke the benchmark method(s) so they go through call counting and tier up for real.
+            for (long i = 0; i < invokeCount; i++)
+                invokeOnce(i);
+            return new(default(ClockSpan));
+        };
+
+        var parameters = new EngineParameters
+        {
+            Host = host,
+            WorkloadMethods = workloadMethods,
+            WorkloadActionNoUnroll = workload,
+            WorkloadActionUnroll = workload,
+            OverheadActionNoUnroll = empty,
+            OverheadActionUnroll = empty,
+            GlobalSetupAction = () => new(),
+            GlobalCleanupAction = () => new(),
+            IterationSetupAction = () => new(),
+            IterationCleanupAction = () => new(),
+            TargetJob = Job.Default,
+            BenchmarkName = "",
+            InProcessDiagnoserHandler = new([], host, BenchmarkDotNet.Diagnosers.RunMode.None, null!),
+        };
+
+        var stage = new EngineJitStage(evaluateOverhead: false, parameters, listener);
+        var measurements = stage.GetMeasurementList();
+        while (stage.GetShouldRunIteration(measurements, out var data))
+        {
+            data.setupAction().GetAwaiter().GetResult();
+            data.workloadAction(data.invokeCount / data.unrollFactor, null!).GetAwaiter().GetResult();
+            data.cleanupAction().GetAwaiter().GetResult();
+            // A zero-time measurement keeps the stage out of its "long-running benchmark" early-exit
+            // (iterationTime / 0 == Infinity, and Infinity < 1.5 is false).
+            measurements.Add(new Measurement(1, data.mode, data.stage, data.index, data.invokeCount, 0d));
+        }
+    }
+
+    // Benchmark-method stand-ins. A distinct method per tier-up scenario so that one test tiering a method up doesn't
+    // affect another's starting state. In a DisableOptimizations build these are JITted at minopts and never tier; in
+    // an optimized build they reach OptimizedTier1.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long Cold(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long AlreadyTier1(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long AlreadyTier0(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long AlreadyTier0DelayedCallCounting(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)]
+    private static long NoOptimization(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
+    private static long AggressiveOptimization(long x) => x * x + 1;
+    // Two distinct methods watched together by one listener (JitStage_MultipleMethods).
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long MultiFirst(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long MultiSecond(long x) => x * x + 1;
+    // Watched together but invoked at different rates (JitStage_MultipleMethodsUnevenInvocationRates).
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long MultiUnevenFast(long x) => x * x + 1;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long MultiUnevenSlow(long x) => x * x + 1;
+
+    // A loop long enough to cross the OSR back-edge threshold so these methods are On-Stack-Replaced where OSR is
+    // enabled. Timing is irrelevant (RunJitStageToCompletion records 0ns measurements, so the stage never takes its
+    // long-running early-exit), so the only requirement is enough iterations to trigger OSR.
+    private const int OsrLoopCount = 1_000_000;
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long Osr(long x)
+    {
+        long sum = x;
+        for (int i = 0; i < OsrLoopCount; i++)
+            sum += i;
+        return sum;
+    }
+    // The benchmark method: it does nothing but call the OSR'd method, which NoInlining keeps as a separate jit unit.
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long CallsOsr(long x) => OsrCallee(x);
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    private static long OsrCallee(long x)
+    {
+        long sum = x;
+        for (int i = 0; i < OsrLoopCount; i++)
+            sum += i;
+        return sum;
+    }
+
+    // Minimal host that surfaces a cancellation token so the stage's unbounded per-tier wait stays interruptible.
+    private sealed class CancellableHost(CancellationToken cancellationToken) : IHost
+    {
+        public CancellationToken CancellationToken { get; } = cancellationToken;
+        public void Dispose() { }
+        public void WriteLine() { }
+        public void WriteLine(string message) { }
+        public void SendError(string message) { }
+        public void ReportResults(RunResults runResults) { }
+        public ValueTask SendSignalAsync(HostSignal hostSignal) => new();
+        public ValueTask Yield() => new();
+    }
+}
diff --git a/tests/BenchmarkDotNet.IntegrationTests/ValuesReturnedByBenchmarkTest.cs b/tests/BenchmarkDotNet.IntegrationTests/ValuesReturnedByBenchmarkTest.cs
index 6c35a91748..77c0fc4567 100644
--- a/tests/BenchmarkDotNet.IntegrationTests/ValuesReturnedByBenchmarkTest.cs
+++ b/tests/BenchmarkDotNet.IntegrationTests/ValuesReturnedByBenchmarkTest.cs
@@ -107,6 +107,11 @@ public class Job { }
             [Benchmark]
             public unsafe int* PointerToUnmanagedType() => (int*)System.IntPtr.Zero.ToPointer();
 
+            [Benchmark]
+            public unsafe delegate*<int, int> FunctionPointer() => &ReturnArgument;
+
+            private static int ReturnArgument(int value) => value;
+
             [Benchmark]
             public System.IntPtr IntPtr() => System.IntPtr.Zero;
 
diff --git a/tests/BenchmarkDotNet.Tests/Engine/EnumerateStagesTests.cs b/tests/BenchmarkDotNet.Tests/Engine/EnumerateStagesTests.cs
index 569889ba10..470a5a7fa6 100644
--- a/tests/BenchmarkDotNet.Tests/Engine/EnumerateStagesTests.cs
+++ b/tests/BenchmarkDotNet.Tests/Engine/EnumerateStagesTests.cs
@@ -31,7 +31,7 @@ public void JobsThatDontRequireJittingSkipJitStage(string jobName)
             var engineParameters = CreateEngineParameters(job);
 
             bool didRunStages = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 Assert.True(stage is not EngineJitStage);
                 didRunStages = true;
@@ -47,7 +47,7 @@ public void DefaultSettingsVeryTimeConsumingBenchmarksAreExecutedOncePerIteratio
             var engineParameters = CreateEngineParameters(Job.Default);
 
             bool didRunActualStage = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 Assert.NotEqual(IterationMode.Overhead, stage.Mode);
 
@@ -81,7 +81,7 @@ public void BenchmarksThatRunLongerThanIterationTimeOnlyDuringFirstInvocationAre
             var engineParameters = CreateEngineParameters(Job.Default.WithIterationTime(TimeInterval.FromMilliseconds(iterationTime)));
 
             bool didRunActualStage = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -119,7 +119,7 @@ private void AssertUnroll(Job job)
             var engineParameters = CreateEngineParameters(job);
 
             bool didRunUnroll = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -150,7 +150,7 @@ public void JobWithExplicitInvocationCount(long invocationCount)
             // A short measurement encourages the JIT stage to batch many invocations into a single iteration,
             // which is the regression introduced by #2806.
             var fastMeasurement = TimeInterval.FromMicroseconds(1);
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -177,7 +177,7 @@ public void LongRunningBenchmarksExitJitStageEarly()
             var engineParameters = CreateEngineParameters(job);
 
             int jitWorkloadCount = 0;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -210,7 +210,7 @@ public void SlowFirstIterationButFastSteadyStateDoesNotExitJitStageEarly()
             var engineParameters = CreateEngineParameters(Job.Default.WithInvocationCount(1).WithUnrollFactor(1));
 
             int jitWorkloadCount = 0;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -226,9 +226,9 @@ public void SlowFirstIterationButFastSteadyStateDoesNotExitJitStageEarly()
                 if (stage is EngineJitStage) break;
             }
 
-            // Pre-loop iter + confirmation + full tiering loop (one yield per tier since the user
-            // pinned InvocationCount=1, matching JitInfo.MaxTierPromotions * TieredCallCountThreshold)
-            // + one stabilization iteration. Just assert it ran the full tiering loop rather than bailing.
+            // Pre-loop iter + confirmation + full tiering loop (one yield per tier since the user pinned
+            // InvocationCount=1, matching JitInfo.MaxTierPromotions * TieredCallCountThreshold) + the trailing
+            // stabilization iteration. Just assert it ran the full tiering loop rather than bailing.
             Assert.True(jitWorkloadCount > 2, $"Expected the tiering loop to run after confirmation disagreed, got {jitWorkloadCount} jitting iterations.");
         }
 
@@ -247,7 +247,7 @@ public void ForceJitTieringModeRunsFullTieringLoopEvenForLongRunningBenchmarks()
 
             int jitWorkloadCount = 0;
             bool didStopEarly = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -282,7 +282,7 @@ public void SkipJitTieringModeSkipsTierPromotion()
 
             int jitWorkloadCount = 0;
             bool didStopEarly = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -316,7 +316,7 @@ public void MediumTimeConsumingBenchmarksStartPilotFrom2AndIncrementItWithEveryS
             var engineParameters = CreateEngineParameters(Job.Default);
 
             bool didRunPilotStage = false;
-            foreach (var stage in EngineStage.EnumerateStages(engineParameters))
+            foreach (var stage in EngineStage.EnumerateStages(engineParameters, skipJitDelays: true))
             {
                 var stageMeasurements = stage.GetMeasurementList();
                 while (stage.GetShouldRunIteration(stageMeasurements, out var iterationData))
@@ -347,6 +347,7 @@ private EngineParameters CreateEngineParameters(Job job)
             Func<long, IClock, ValueTask<ClockSpan>> emptyAction = (_, _) => new(default(ClockSpan));
             return new()
             {
+                WorkloadMethods = [],
                 GlobalSetupAction = () => new(),
                 GlobalCleanupAction = () => new(),
                 Host = host,
diff --git a/tests/BenchmarkDotNet.Tests/Shared/Mocks/MockEngine.cs b/tests/BenchmarkDotNet.Tests/Shared/Mocks/MockEngine.cs
index 59dcb72633..a6e6ab7803 100644
--- a/tests/BenchmarkDotNet.Tests/Shared/Mocks/MockEngine.cs
+++ b/tests/BenchmarkDotNet.Tests/Shared/Mocks/MockEngine.cs
@@ -22,6 +22,7 @@ internal MockEngine(ITestOutputHelper output, Job job, Func<IterationData, TimeI
             Parameters = new EngineParameters
             {
                 TargetJob = job,
+                WorkloadMethods = [],
                 WorkloadActionUnroll = emptyAction,
                 WorkloadActionNoUnroll = emptyAction,
                 OverheadActionUnroll = emptyAction,