evstack · chatton · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -5,16 +5,16 @@ permissions: {}
   push:
     branches:
       - main
+  pull_request:
+    branches:
+      - main
   workflow_dispatch:
 
 jobs:
   evm-benchmark:
     name: EVM Contract Benchmark
     runs-on: ubuntu-latest
     timeout-minutes: 30
-    permissions:
-      contents: write
-      issues: write
     steps:
       - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
       - name: Set up Go
@@ -29,30 +29,112 @@ jobs:
         run: |
           cd test/e2e && go test -tags evm -bench=. -benchmem -run='^$' \
             -timeout=10m --evm-binary=../../build/evm | tee output.txt
-      - name: Store benchmark result
+      - name: Run Block Executor benchmarks
+        run: |
+          go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
+            ./block/internal/executing/... > block_executor_output.txt
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: evm-benchmark-results
+          path: |
+            test/e2e/output.txt
+            block_executor_output.txt
+
+  spamoor-benchmark:
+    name: Spamoor Trace Benchmark
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - name: Set up Go
+        uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
+        with:
+          go-version-file: ./go.mod
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
+      - name: Build binaries
+        run: make build-evm build-da
+      - name: Run Spamoor smoke test
+        run: |
+          cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \
+            -run='^TestSpamoorSmoke$' -v -timeout=15m --evm-binary=../../build/evm
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: spamoor-benchmark-results
+          path: test/e2e/spamoor_bench.json
+
+  # single job to push all results to gh-pages sequentially, avoiding race conditions
+  publish-benchmarks:
+    name: Publish Benchmark Results
+    needs: [evm-benchmark, spamoor-benchmark]
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      issues: write
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - name: Download EVM benchmark results
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: evm-benchmark-results
+      - name: Download Spamoor benchmark results
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: spamoor-benchmark-results
+          path: test/e2e/
+
+      # only update the benchmark baseline on push/dispatch, not on PRs
+      - name: Store EVM Contract Roundtrip result
+        if: always()
         uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
         with:
           name: EVM Contract Roundtrip
           tool: 'go'
           output-file-path: test/e2e/output.txt
-          auto-push: true
+          auto-push: ${{ github.event_name != 'pull_request' }}
+          save-data-file: ${{ github.event_name != 'pull_request' }}
           github-token: ${{ secrets.GITHUB_TOKEN }}
           alert-threshold: '150%'
           fail-on-alert: true
           comment-on-alert: true
 
-      - name: Run Block Executor benchmarks
-        run: |
-          go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
-            ./block/internal/executing/... > block_executor_output.txt
-      - name: Store Block Executor benchmark result
+      # delete local gh-pages so the next benchmark action step fetches fresh from remote
+      - name: Reset local gh-pages branch
+        if: always()
+        run: git branch -D gh-pages || true
+
+      - name: Store Block Executor result
+        if: always()
         uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
         with:
           name: Block Executor Benchmark
           tool: 'go'
           output-file-path: block_executor_output.txt
-          auto-push: true
+          auto-push: ${{ github.event_name != 'pull_request' }}
+          save-data-file: ${{ github.event_name != 'pull_request' }}
           github-token: ${{ secrets.GITHUB_TOKEN }}
           alert-threshold: '150%'
           fail-on-alert: true
           comment-on-alert: true
+
+      # delete local gh-pages so the next benchmark action step fetches fresh from remote
+      - name: Reset local gh-pages branch
+        if: always()
+        run: git branch -D gh-pages || true
+
+      - name: Store Spamoor Trace result
+        if: always()
+        uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
+        with:
+          name: Spamoor Trace Benchmarks
+          tool: 'customSmallerIsBetter'
+          output-file-path: test/e2e/spamoor_bench.json
+          auto-push: ${{ github.event_name != 'pull_request' }}
+          save-data-file: ${{ github.event_name != 'pull_request' }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          alert-threshold: '150%'
+          fail-on-alert: false
+          comment-on-alert: true
diff --git a/test/e2e/evm_spamoor_smoke_test.go b/test/e2e/evm_spamoor_smoke_test.go
@@ -6,6 +6,7 @@ import (
 	"context"
 	"fmt"
 	"net/http"
+	"os"
 	"path/filepath"
 	"testing"
 	"time"
@@ -164,6 +165,11 @@ func TestSpamoorSmoke(t *testing.T) {
 	evRethSpans := extractSpansFromTraces(evRethTraces)
 	printTraceReport(t, "ev-reth", toTraceSpans(evRethSpans))
 
+	// write benchmark JSON for ev-node spans when output path is configured
+	if outputPath := os.Getenv("BENCH_JSON_OUTPUT"); outputPath != "" {
+		writeTraceBenchmarkJSON(t, "SpamoorSmoke", toTraceSpans(evNodeSpans), outputPath)
+	}
+
 	// assert expected ev-node span names are present.
 	// these spans reliably appear during block production with transactions flowing.
 	expectedSpans := []string{

diff --git a/test/e2e/evm_test_common.go b/test/e2e/evm_test_common.go
@@ -16,6 +16,7 @@ package e2e
 
 import (
 	"context"
+	"encoding/json"
 	"flag"
 	"fmt"
 	"math/big"
@@ -855,21 +856,17 @@ type traceSpan interface {
 	SpanDuration() time.Duration
 }
 
-// printTraceReport aggregates spans by operation name and prints a timing breakdown.
-func printTraceReport(t testing.TB, label string, spans []traceSpan) {
-	t.Helper()
-	if len(spans) == 0 {
-		t.Logf("WARNING: no spans found for %s", label)
-		return
-	}
+// spanStats holds aggregated timing statistics for a single span operation.
+type spanStats struct {
+	count int
+	total time.Duration
+	min   time.Duration
+	max   time.Duration
+}
 
-	type stats struct {
-		count int
-		total time.Duration
-		min   time.Duration
-		max   time.Duration
-	}
-	m := make(map[string]*stats)
+// aggregateSpanStats groups spans by operation name and computes count, total, min, max.
+func aggregateSpanStats(spans []traceSpan) map[string]*spanStats {
+	m := make(map[string]*spanStats)
 	for _, span := range spans {
 		d := span.SpanDuration()
 		if d <= 0 {
@@ -878,7 +875,7 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
 		name := span.SpanName()
 		s, ok := m[name]
 		if !ok {
-			s = &stats{min: d, max: d}
+			s = &spanStats{min: d, max: d}
 			m[name] = s
 		}
 		s.count++
@@ -890,6 +887,18 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
 			s.max = d
 		}
 	}
+	return m
+}
+
+// printTraceReport aggregates spans by operation name and prints a timing breakdown.
+func printTraceReport(t testing.TB, label string, spans []traceSpan) {
+	t.Helper()
+	if len(spans) == 0 {
+		t.Logf("WARNING: no spans found for %s", label)
+		return
+	}
+
+	m := aggregateSpanStats(spans)
 
 	names := make([]string, 0, len(m))
 	for name := range m {
@@ -924,3 +933,49 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
 		t.Logf("%-40s %5.1f%% %s", name, pct, bar)
 	}
 }
+
+// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
+type benchmarkEntry struct {
+	Name  string  `json:"name"`
+	Unit  string  `json:"unit"`
+	Value float64 `json:"value"`
+}
+
+// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
+// If outputPath is empty, the function is a no-op.
+func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
+	t.Helper()
+	if outputPath == "" {
+		return
+	}
+	m := aggregateSpanStats(spans)
+	if len(m) == 0 {
+		t.Logf("WARNING: no span stats to write for %s", label)
+		return
+	}
+
+	// sort by name for stable output
+	names := make([]string, 0, len(m))
+	for name := range m {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	var entries []benchmarkEntry
+	for _, name := range names {
+		s := m[name]
+		avg := float64(s.total.Microseconds()) / float64(s.count)
+		entries = append(entries,
+			benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
+		)
+	}
+
+	data, err := json.MarshalIndent(entries, "", "  ")
+	if err != nil {
+		t.Fatalf("failed to marshal benchmark JSON: %v", err)
+	}
+	if err := os.WriteFile(outputPath, data, 0644); err != nil {
+		t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
+	}
+	t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
+}
-// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
-type benchmarkEntry struct {
-	Name  string  `json:"name"`
-	Unit  string  `json:"unit"`
-	Value float64 `json:"value"`
-}
-
-// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
-// If outputPath is empty, the function is a no-op.
-func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
-	t.Helper()
-	if outputPath == "" {
-		return
-	}
-	m := aggregateSpanStats(spans)
-	if len(m) == 0 {
-		t.Logf("WARNING: no span stats to write for %s", label)
-		return
-	}
-
-	// sort by name for stable output
-	names := make([]string, 0, len(m))
-	for name := range m {
-		names = append(names, name)
-	}
-	sort.Strings(names)
-
-	var entries []benchmarkEntry
-	for _, name := range names {
-		s := m[name]
-		avg := float64(s.total.Microseconds()) / float64(s.count)
-		entries = append(entries,
-			benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
-		)
-	}
-
-	data, err := json.MarshalIndent(entries, "", "  ")
-	if err != nil {
-		t.Fatalf("failed to marshal benchmark JSON: %v", err)
-	}
-	if err := os.WriteFile(outputPath, data, 0644); err != nil {
-		t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
-	}
-	t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
-}
+// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
+type benchmarkEntry struct {
+	Name  string  `json:"name"`
+	Unit  string  `json:"unit"`
+	Value float64 `json:"value"`
+}
+
+// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
+// If outputPath is empty, the function is a no-op.
+func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
+	t.Helper()
+	if outputPath == "" {
+		return
+	}
+	m := aggregateSpanStats(spans)
+	if len(m) == 0 {
+		t.Logf("WARNING: no span stats to write for %s", label)
+		return
+	}
+
+	// sort by name for stable output
+	names := make([]string, 0, len(m))
+	for name := range m {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	var entries []benchmarkEntry
+	for _, name := range names {
+		s := m[name]
+		avg := (float64(s.total) / float64(time.Microsecond)) / float64(s.count)
+		entries = append(entries,
+			benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
+		)
+	}
+
+	data, err := json.MarshalIndent(entries, "", "  ")
+	if err != nil {
+		t.Fatalf("failed to marshal benchmark JSON: %v", err)
+	}
+	if err := os.WriteFile(outputPath, data, 0644); err != nil {
+		t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
+	}
+	t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
+}
-// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
-type benchmarkEntry struct {
-	Name  string  `json:"name"`
-	Unit  string  `json:"unit"`
-	Value float64 `json:"value"`
-}
-
-// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
-// If outputPath is empty, the function is a no-op.
-func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
-	t.Helper()
-	if outputPath == "" {
-		return
-	}
-	m := aggregateSpanStats(spans)
-	if len(m) == 0 {
-		t.Logf("WARNING: no span stats to write for %s", label)
-		return
-	}
-
-	// sort by name for stable output
-	names := make([]string, 0, len(m))
-	for name := range m {
-		names = append(names, name)
-	}
-	sort.Strings(names)
-
-	var entries []benchmarkEntry
-	for _, name := range names {
-		s := m[name]
-		avg := float64(s.total.Microseconds()) / float64(s.count)
-		entries = append(entries,
-			benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
-		)
-	}
-
-	data, err := json.MarshalIndent(entries, "", "  ")
-	if err != nil {
-		t.Fatalf("failed to marshal benchmark JSON: %v", err)
-	}
-	if err := os.WriteFile(outputPath, data, 0644); err != nil {
-		t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
-	}
-	t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
-}
+// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
+type benchmarkEntry struct {
+	Name  string  `json:"name"`
+	Unit  string  `json:"unit"`
+	Value float64 `json:"value"`
+}
+
+// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
+// If outputPath is empty, the function is a no-op.
+func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
+	t.Helper()
+	if outputPath == "" {
+		return
+	}
+	m := aggregateSpanStats(spans)
+	if len(m) == 0 {
+		t.Logf("WARNING: no span stats to write for %s", label)
+		return
+	}
+
+	// sort by name for stable output
+	names := make([]string, 0, len(m))
+	for name := range m {
+		names = append(names, name)
+	}
+	sort.Strings(names)
+
+	var entries []benchmarkEntry
+	for _, name := range names {
+		s := m[name]
+		avg := (float64(s.total) / float64(time.Microsecond)) / float64(s.count)
+		entries = append(entries,
+			benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
+		)
+	}
+
+	data, err := json.MarshalIndent(entries, "", "  ")
+	if err != nil {
+		t.Fatalf("failed to marshal benchmark JSON: %v", err)
+	}
+	if err := os.WriteFile(outputPath, data, 0644); err != nil {
+		t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
+	}
+	t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
+}