Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 93 additions & 11 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ permissions: {}
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:

jobs:
evm-benchmark:
name: EVM Contract Benchmark
runs-on: ubuntu-latest
timeout-minutes: 30
permissions:
contents: write
issues: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
Expand All @@ -29,30 +29,112 @@ jobs:
run: |
cd test/e2e && go test -tags evm -bench=. -benchmem -run='^$' \
-timeout=10m --evm-binary=../../build/evm | tee output.txt
- name: Store benchmark result
- name: Run Block Executor benchmarks
run: |
go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
./block/internal/executing/... > block_executor_output.txt
- name: Upload benchmark results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: evm-benchmark-results
path: |
test/e2e/output.txt
block_executor_output.txt

spamoor-benchmark:
name: Spamoor Trace Benchmark
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
with:
go-version-file: ./go.mod
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
- name: Build binaries
run: make build-evm build-da
- name: Run Spamoor smoke test
run: |
cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \
-run='^TestSpamoorSmoke$' -v -timeout=15m --evm-binary=../../build/evm
- name: Upload benchmark results
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: spamoor-benchmark-results
path: test/e2e/spamoor_bench.json

# single job to push all results to gh-pages sequentially, avoiding race conditions
publish-benchmarks:
name: Publish Benchmark Results
needs: [evm-benchmark, spamoor-benchmark]
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
pull-requests: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download EVM benchmark results
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: evm-benchmark-results
- name: Download Spamoor benchmark results
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: spamoor-benchmark-results
path: test/e2e/

# only update the benchmark baseline on push/dispatch, not on PRs
- name: Store EVM Contract Roundtrip result
if: always()
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: EVM Contract Roundtrip
tool: 'go'
output-file-path: test/e2e/output.txt
auto-push: true
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: true
comment-on-alert: true

- name: Run Block Executor benchmarks
run: |
go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \
./block/internal/executing/... > block_executor_output.txt
- name: Store Block Executor benchmark result
# delete local gh-pages so the next benchmark action step fetches fresh from remote
- name: Reset local gh-pages branch
if: always()
run: git branch -D gh-pages || true

- name: Store Block Executor result
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because there are now multiple jobs pushing benchmarks, in order to not hit race conditions, we can gather up all the results, then sequentially push them all one by one.

if: always()
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Block Executor Benchmark
tool: 'go'
output-file-path: block_executor_output.txt
auto-push: true
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: true
comment-on-alert: true

# delete local gh-pages so the next benchmark action step fetches fresh from remote
- name: Reset local gh-pages branch
if: always()
run: git branch -D gh-pages || true

- name: Store Spamoor Trace result
if: always()
uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7
with:
name: Spamoor Trace Benchmarks
tool: 'customSmallerIsBetter'
output-file-path: test/e2e/spamoor_bench.json
auto-push: ${{ github.event_name != 'pull_request' }}
save-data-file: ${{ github.event_name != 'pull_request' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: '150%'
fail-on-alert: false
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for now not failing on anything, just working on getting results pushed and visible.

comment-on-alert: true
6 changes: 6 additions & 0 deletions test/e2e/evm_spamoor_smoke_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"fmt"
"net/http"
"os"
"path/filepath"
"testing"
"time"
Expand Down Expand Up @@ -164,6 +165,11 @@ func TestSpamoorSmoke(t *testing.T) {
evRethSpans := extractSpansFromTraces(evRethTraces)
printTraceReport(t, "ev-reth", toTraceSpans(evRethSpans))

// write benchmark JSON for ev-node spans when output path is configured
if outputPath := os.Getenv("BENCH_JSON_OUTPUT"); outputPath != "" {
writeTraceBenchmarkJSON(t, "SpamoorSmoke", toTraceSpans(evNodeSpans), outputPath)
}

// assert expected ev-node span names are present.
// these spans reliably appear during block production with transactions flowing.
expectedSpans := []string{
Expand Down
85 changes: 70 additions & 15 deletions test/e2e/evm_test_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package e2e

import (
"context"
"encoding/json"
"flag"
"fmt"
"math/big"
Expand Down Expand Up @@ -855,21 +856,17 @@ type traceSpan interface {
SpanDuration() time.Duration
}

// printTraceReport aggregates spans by operation name and prints a timing breakdown.
func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Helper()
if len(spans) == 0 {
t.Logf("WARNING: no spans found for %s", label)
return
}
// spanStats holds aggregated timing statistics for a single span operation.
type spanStats struct {
count int
total time.Duration
min time.Duration
max time.Duration
}

type stats struct {
count int
total time.Duration
min time.Duration
max time.Duration
}
m := make(map[string]*stats)
// aggregateSpanStats groups spans by operation name and computes count, total, min, max.
func aggregateSpanStats(spans []traceSpan) map[string]*spanStats {
m := make(map[string]*spanStats)
for _, span := range spans {
d := span.SpanDuration()
if d <= 0 {
Expand All @@ -878,7 +875,7 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
name := span.SpanName()
s, ok := m[name]
if !ok {
s = &stats{min: d, max: d}
s = &spanStats{min: d, max: d}
m[name] = s
}
s.count++
Expand All @@ -890,6 +887,18 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
s.max = d
}
}
return m
}

// printTraceReport aggregates spans by operation name and prints a timing breakdown.
func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Helper()
if len(spans) == 0 {
t.Logf("WARNING: no spans found for %s", label)
return
}

m := aggregateSpanStats(spans)

names := make([]string, 0, len(m))
for name := range m {
Expand Down Expand Up @@ -924,3 +933,49 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) {
t.Logf("%-40s %5.1f%% %s", name, pct, bar)
}
}

// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
type benchmarkEntry struct {
Name string `json:"name"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}

// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
// If outputPath is empty, the function is a no-op.
func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
t.Helper()
if outputPath == "" {
return
}
m := aggregateSpanStats(spans)
if len(m) == 0 {
t.Logf("WARNING: no span stats to write for %s", label)
return
}

// sort by name for stable output
names := make([]string, 0, len(m))
for name := range m {
names = append(names, name)
}
sort.Strings(names)

var entries []benchmarkEntry
for _, name := range names {
s := m[name]
avg := float64(s.total.Microseconds()) / float64(s.count)
entries = append(entries,
benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
)
}

data, err := json.MarshalIndent(entries, "", " ")
if err != nil {
t.Fatalf("failed to marshal benchmark JSON: %v", err)
}
if err := os.WriteFile(outputPath, data, 0644); err != nil {
t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
}
t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
}
Comment on lines +937 to +981
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Avoid microsecond truncation to preserve small-span accuracy.

Duration.Microseconds() truncates sub-micro values, which can collapse fast spans to 0. Use a float conversion to keep fractional microseconds.

🔧 Suggested fix
-		avg := float64(s.total.Microseconds()) / float64(s.count)
+		avg := (float64(s.total) / float64(time.Microsecond)) / float64(s.count)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
type benchmarkEntry struct {
Name string `json:"name"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}
// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
// If outputPath is empty, the function is a no-op.
func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
t.Helper()
if outputPath == "" {
return
}
m := aggregateSpanStats(spans)
if len(m) == 0 {
t.Logf("WARNING: no span stats to write for %s", label)
return
}
// sort by name for stable output
names := make([]string, 0, len(m))
for name := range m {
names = append(names, name)
}
sort.Strings(names)
var entries []benchmarkEntry
for _, name := range names {
s := m[name]
avg := float64(s.total.Microseconds()) / float64(s.count)
entries = append(entries,
benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
)
}
data, err := json.MarshalIndent(entries, "", " ")
if err != nil {
t.Fatalf("failed to marshal benchmark JSON: %v", err)
}
if err := os.WriteFile(outputPath, data, 0644); err != nil {
t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
}
t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
}
// benchmarkEntry matches the customSmallerIsBetter format for github-action-benchmark.
type benchmarkEntry struct {
Name string `json:"name"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}
// writeTraceBenchmarkJSON aggregates spans and writes a customSmallerIsBetter JSON file.
// If outputPath is empty, the function is a no-op.
func writeTraceBenchmarkJSON(t testing.TB, label string, spans []traceSpan, outputPath string) {
t.Helper()
if outputPath == "" {
return
}
m := aggregateSpanStats(spans)
if len(m) == 0 {
t.Logf("WARNING: no span stats to write for %s", label)
return
}
// sort by name for stable output
names := make([]string, 0, len(m))
for name := range m {
names = append(names, name)
}
sort.Strings(names)
var entries []benchmarkEntry
for _, name := range names {
s := m[name]
avg := (float64(s.total) / float64(time.Microsecond)) / float64(s.count)
entries = append(entries,
benchmarkEntry{Name: fmt.Sprintf("%s - %s (avg)", label, name), Unit: "us", Value: avg},
)
}
data, err := json.MarshalIndent(entries, "", " ")
if err != nil {
t.Fatalf("failed to marshal benchmark JSON: %v", err)
}
if err := os.WriteFile(outputPath, data, 0644); err != nil {
t.Fatalf("failed to write benchmark JSON to %s: %v", outputPath, err)
}
t.Logf("wrote %d benchmark entries to %s", len(entries), outputPath)
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/evm_test_common.go` around lines 937 - 981, The avg computation in
writeTraceBenchmarkJSON uses s.total.Microseconds() which truncates
sub-microsecond precision; change it to a float-based conversion (e.g. use
s.total.Seconds()*1e6 or float64(s.total.Nanoseconds())/1e3) and divide by
float64(s.count) so avg keeps fractional microsecond values; update the avg
assignment that references s.total and s.count accordingly.

Loading