diff --git a/.github/actions/dispatch-and-wait/action.yml b/.github/actions/dispatch-and-wait/action.yml new file mode 100644 index 0000000000..f779db4787 --- /dev/null +++ b/.github/actions/dispatch-and-wait/action.yml @@ -0,0 +1,67 @@ +name: dispatch-and-wait +description: Dispatch a workflow_dispatch workflow in another repo and wait for it to complete. +inputs: + repo: + description: 'owner/repo to dispatch in' + required: true + workflow: + description: 'Workflow file name (e.g. deploy-core.testnet.yml). Must accept a correlation_id input surfaced in its run-name.' + required: true + ref: + description: 'Git ref to run the workflow from' + required: false + default: 'main' + fields: + description: 'Extra inputs, one key=value per line' + required: false + default: '' + token: + description: 'Token with actions:write on the target repo' + required: true + slack-webhook: + description: >- + Optional Slack webhook. When set, posts a link to the dispatched run before + watching it, so a run gated on a required-reviewer environment in the target + repo gets approved instead of stalling unnoticed. + required: false + default: '' +runs: + using: composite + steps: + - shell: bash + env: + GH_TOKEN: ${{ inputs.token }} + REPO: ${{ inputs.repo }} + WORKFLOW: ${{ inputs.workflow }} + REF: ${{ inputs.ref }} + FIELDS: ${{ inputs.fields }} + SLACK_WEBHOOK: ${{ inputs.slack-webhook }} + run: | + set -euo pipefail + CID="cid-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${RANDOM}" + ARGS=(-f "correlation_id=$CID") + while IFS= read -r kv; do + [ -n "$kv" ] && ARGS+=(-f "$kv") + done <<< "$FIELDS" + echo "Dispatching $WORKFLOW in $REPO with correlation id $CID" + gh workflow run "$WORKFLOW" -R "$REPO" --ref "$REF" "${ARGS[@]}" + RUN_ID="" + for _ in $(seq 1 30); do + sleep 10 + RUN_ID=$(gh run list -R "$REPO" --workflow "$WORKFLOW" --limit 20 \ + --json databaseId,displayTitle \ + --jq "[.[] | select(.displayTitle | contains(\"$CID\"))][0].databaseId // empty") + [ -n "$RUN_ID" ] && break + done + if [ -z "$RUN_ID" ]; then + echo "::error::could not find dispatched run for correlation id $CID in $REPO/$WORKFLOW" + exit 1 + fi + RUN_URL="https://github.com/$REPO/actions/runs/$RUN_ID" + if [ -n "$SLACK_WEBHOOK" ]; then + PAYLOAD=$(jq -n --arg text "$WORKFLOW run in $REPO may need environment approval: $RUN_URL" '{text: $text}') + curl -sf -X POST -H 'Content-type: application/json' --data "$PAYLOAD" "$SLACK_WEBHOOK" \ + || echo "::warning::failed to post Slack approval ping for $RUN_URL" + fi + echo "Watching $RUN_URL" + gh run watch "$RUN_ID" -R "$REPO" --exit-status --interval 30 diff --git a/.github/workflows/release.testnet.push.tags.yml b/.github/workflows/release.testnet.push.tags.yml index d174633074..3c9d62eb98 100644 --- a/.github/workflows/release.testnet.push.tags.yml +++ b/.github/workflows/release.testnet.push.tags.yml @@ -11,6 +11,11 @@ on: description: 'The component to tag' required: true type: string + skip_existing: + description: 'Succeed without re-tagging if the tag already exists (for orchestrator re-runs)' + required: false + type: boolean + default: false secrets: DOUBLEZERO_PAT: description: 'PAT to push tags to the repository' @@ -44,6 +49,10 @@ jobs: echo "Version format is valid." if git rev-parse -q --verify "refs/tags/$TAG_NAME"; then + if [ "${{ inputs.skip_existing }}" = "true" ]; then + echo "Tag '$TAG_NAME' already exists; skipping (skip_existing=true)." + exit 0 + fi echo "Error: Tag '$TAG_NAME' already exists in the repository." exit 1 fi diff --git a/.github/workflows/release.testnet.yml b/.github/workflows/release.testnet.yml new file mode 100644 index 0000000000..f74c219636 --- /dev/null +++ b/.github/workflows/release.testnet.yml @@ -0,0 +1,440 @@ +name: release.testnet +run-name: "testnet release v${{ inputs.version }}${{ inputs.dry_run && ' (dry run)' || '' }}" + +on: + workflow_dispatch: + inputs: + version: + description: 'Version to release (format: X.Y.Z, no leading v)' + required: true + type: string + dry_run: + description: 'Validate plumbing: draft PRs, no tags, check-mode deploys' + required: false + type: boolean + default: false + skip_devnet_check: + description: 'Skip the "latest devnet daily succeeded" preflight check' + required: false + type: boolean + default: false + +permissions: + contents: read + +jobs: + preflight: + runs-on: ubuntu-latest + outputs: + prev_version: ${{ steps.prev.outputs.prev_version }} + steps: + - name: Validate version format + run: | + [[ "${{ inputs.version }}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || { + echo "::error::version must be X.Y.Z (got '${{ inputs.version }}')"; exit 1; } + - uses: actions/checkout@v4 + - name: Read current workspace version + id: prev + run: | + PREV=$(grep -m1 '^version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/') + echo "prev_version=$PREV" >> "$GITHUB_OUTPUT" + if [ "$PREV" = "${{ inputs.version }}" ] && [ "${{ inputs.dry_run }}" != "true" ]; then + echo "::error::main is already at ${{ inputs.version }}"; exit 1 + fi + - name: Check latest devnet daily release succeeded + if: ${{ !inputs.skip_devnet_check }} + env: + GH_TOKEN: ${{ github.token }} + run: | + CONCLUSION=$(gh run list -R malbeclabs/doublezero \ + --workflow release.devnet.all.daily.yml --limit 1 \ + --json conclusion --jq '.[0].conclusion') + echo "latest devnet daily run: $CONCLUSION" + [ "$CONCLUSION" = "success" ] || { + echo "::error::latest devnet daily was '$CONCLUSION'; fix devnet or re-run with skip_devnet_check"; exit 1; } + + open-prs: + runs-on: ubuntu-latest + needs: preflight + outputs: + dz_pr: ${{ steps.dzpr.outputs.pr }} + infra_pr: ${{ steps.infrapr.outputs.pr }} + steps: + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: doublezero,infra + - uses: actions/checkout@v4 + with: + token: ${{ steps.app.outputs.token }} + - uses: dtolnay/rust-toolchain@1.90.0 + - name: Open doublezero version-bump PR + id: dzpr + env: + GH_TOKEN: ${{ steps.app.outputs.token }} + VERSION: ${{ inputs.version }} + DRY_RUN: ${{ inputs.dry_run }} + run: | + set -euo pipefail + git config user.name "dz-release-bot" + git config user.email "dz-release-bot@malbeclabs.com" + BRANCH="release/v${VERSION}" + ./scripts/release/bump-version.sh "$VERSION" + git checkout -b "$BRANCH" + git add Cargo.toml Cargo.lock CHANGELOG.md + git commit -m "release: bump version to ${VERSION}" + git push -f origin "$BRANCH" + PR=$(gh pr list -R malbeclabs/doublezero --head "$BRANCH" --state open \ + --json number --jq '.[0].number // empty') + if [ -z "$PR" ]; then + TITLE="release: bump version to ${VERSION}" + DRAFT="" + if [ "$DRY_RUN" = "true" ]; then TITLE="[DRY RUN] $TITLE"; DRAFT="--draft"; fi + URL=$(gh pr create -R malbeclabs/doublezero --head "$BRANCH" $DRAFT \ + --title "$TITLE" \ + --body "Automated version bump for the v${VERSION} testnet release. Review the promoted CHANGELOG section before merging.") + PR=$(gh pr view "$URL" --json number --jq .number) + fi + echo "pr=$PR" >> "$GITHUB_OUTPUT" + echo "doublezero version PR: https://github.com/malbeclabs/doublezero/pull/$PR" >> "$GITHUB_STEP_SUMMARY" + - name: Checkout infra + uses: actions/checkout@v4 + with: + repository: malbeclabs/infra + token: ${{ steps.app.outputs.token }} + path: infra + - name: Open infra pinned-versions PR + id: infrapr + working-directory: infra + env: + GH_TOKEN: ${{ steps.app.outputs.token }} + VERSION: ${{ inputs.version }} + DRY_RUN: ${{ inputs.dry_run }} + run: | + set -euo pipefail + git config user.name "dz-release-bot" + git config user.email "dz-release-bot@malbeclabs.com" + BRANCH="release/testnet-v${VERSION}" + ./scripts/bump-testnet-versions.sh "$VERSION" + git checkout -b "$BRANCH" + git add ansible/inventory/testnet/group_vars/all.yml + git commit -m "release: tn ${VERSION}" + git push -f origin "$BRANCH" + PR=$(gh pr list -R malbeclabs/infra --head "$BRANCH" --state open \ + --json number --jq '.[0].number // empty') + if [ -z "$PR" ]; then + TITLE="release: tn ${VERSION}" + DRAFT="" + if [ "$DRY_RUN" = "true" ]; then TITLE="[DRY RUN] $TITLE"; DRAFT="--draft"; fi + URL=$(gh pr create -R malbeclabs/infra --head "$BRANCH" $DRAFT \ + --title "$TITLE" \ + --body "Automated testnet version pin bump to ${VERSION}-1.") + PR=$(gh pr view "$URL" --json number --jq .number) + fi + echo "pr=$PR" >> "$GITHUB_OUTPUT" + echo "infra version PR: https://github.com/malbeclabs/infra/pull/$PR" >> "$GITHUB_STEP_SUMMARY" + + # Gate 1: approve only after BOTH version PRs are merged. + gate-tags: + runs-on: ubuntu-latest + needs: open-prs + environment: testnet-release-gate + steps: + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: doublezero,infra + - name: Verify version PRs are merged + env: + GH_TOKEN: ${{ steps.app.outputs.token }} + run: | + set -euo pipefail + DZ=$(gh pr view ${{ needs.open-prs.outputs.dz_pr }} -R malbeclabs/doublezero --json state --jq .state) + INFRA=$(gh pr view ${{ needs.open-prs.outputs.infra_pr }} -R malbeclabs/infra --json state --jq .state) + echo "doublezero PR: $DZ, infra PR: $INFRA" + if [ "${{ inputs.dry_run }}" = "true" ]; then + { [ "$DZ" != "CLOSED" ] && [ "$INFRA" != "CLOSED" ]; } || { + echo "::error::dry-run PRs were closed"; exit 1; } + else + { [ "$DZ" = "MERGED" ] && [ "$INFRA" = "MERGED" ]; } || { + echo "::error::approve this gate only after BOTH version PRs are merged"; exit 1; } + fi + + push-tags: + if: ${{ !inputs.dry_run }} + needs: gate-tags + uses: ./.github/workflows/release.testnet.push.tags.yml + permissions: + contents: write + strategy: + matrix: + component: + - controller + - internet-latency-collector + - agent + - device-telemetry-agent + - geoprobe-agent + - geoprobe-target + - funder + - monitor + - client + with: + version: v${{ inputs.version }} + component: ${{ matrix.component }} + skip_existing: true + secrets: + DOUBLEZERO_PAT: ${{ secrets.DOUBLEZERO_PAT }} + + verify-cloudsmith: + runs-on: ubuntu-latest + needs: [preflight, gate-tags, push-tags] + # In dry-run, push-tags is skipped; still verify (against the current version) to exercise the query logic. + if: ${{ !cancelled() && needs.gate-tags.result == 'success' && (needs.push-tags.result == 'success' || (inputs.dry_run && needs.push-tags.result == 'skipped')) }} + timeout-minutes: 75 + steps: + - name: Install Cloudsmith CLI + run: pip install cloudsmith-cli==1.13.0 + - name: Wait for all packages to publish + env: + CLOUDSMITH_API_KEY: ${{ secrets.CLOUDSMITH_TOKEN }} + VERSION: ${{ inputs.dry_run && needs.preflight.outputs.prev_version || inputs.version }} + run: | + set -euo pipefail + REPO="malbeclabs/doublezero-testnet" + PACKAGES="doublezero-agent doublezero-controller doublezero-device-telemetry-agent doublezero-funder doublezero-internet-latency-collector doublezero-monitor doublezero-geoprobe-agent doublezero-geoprobe-target doublezero" + MISSING="" + for i in $(seq 1 60); do + MISSING="" + for pkg in $PACKAGES; do + # A transient API error counts as a failed poll attempt, not a job failure. + RPM=$(cloudsmith ls pkg "$REPO" -q "name:^${pkg}$ AND version:^${VERSION}$" -F json | jq '.data | length') || RPM=0 + DEB=$(cloudsmith ls pkg "$REPO" -q "name:^${pkg}$ AND version:^${VERSION}-1$" -F json | jq '.data | length') || DEB=0 + [ "$((RPM + DEB))" -gt 0 ] || MISSING="$MISSING $pkg" + done + if [ -z "$MISSING" ]; then echo "All packages present at $VERSION"; exit 0; fi + echo "attempt $i/60: still waiting for:$MISSING" + sleep 60 + done + echo "::error::timed out waiting for packages:$MISSING" + exit 1 + + build-programs: + runs-on: ubuntu-24.04-16c-64gb + needs: gate-tags + steps: + - uses: actions/checkout@v4 + with: + ref: main # the merged version-bump commit + - uses: dtolnay/rust-toolchain@1.90.0 + - uses: Swatinem/rust-cache@v2 + - name: Install agave solana tools + run: | + sh -c "$(curl -sSfL https://release.anza.xyz/v2.3.13/install)" + echo "$HOME/.local/share/solana/install/active_release/bin" >> $GITHUB_PATH + - name: Build programs for testnet + run: | + set -euo pipefail + (cd smartcontract/programs/doublezero-serviceability && cargo build-sbf) + (cd smartcontract/programs/doublezero-telemetry && cargo build-sbf --features testnet) + (cd smartcontract/programs/doublezero-geolocation && cargo build-sbf --features testnet) + - name: Assemble artifact with deploy manifest + env: + VERSION: ${{ inputs.version }} + run: | + set -euo pipefail + mkdir -p staged + cp target/deploy/doublezero_serviceability.so staged/ + cp target/deploy/doublezero_telemetry.so staged/ + cp target/deploy/doublezero_geolocation.so staged/ + { + echo "# Testnet program deploy v${VERSION}" + echo + echo "Built from commit $(git rev-parse HEAD)" + echo "Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" + echo "Features: serviceability (default), telemetry (testnet), geolocation (testnet)" + echo + echo "## Checksums" + echo '```' + (cd staged && sha256sum ./*.so) + echo '```' + echo + echo "## Deploy" + echo "Follow the Notion runbook 'Build and deploy DoubleZero solana programs - testnet'," + echo "using these prebuilt artifacts instead of building locally." + echo "After deploying, set the serviceability program version:" + echo " doublezero --env testnet global-config set-version --min-compatible-version " + echo "(serviceability is the only program with a settable version account; telemetry has" + echo "none and geolocation has no CLI setter). Verify with: doublezero --env testnet version" + echo + echo "Then approve the 'testnet-program-deploy' gate on the orchestrator run above." + } > staged/DEPLOY.md + - uses: actions/upload-artifact@v4 + with: + name: solana-programs-v${{ inputs.version }} + path: staged/ + + stage-programs: + runs-on: ubuntu-latest + needs: build-programs + # The infra run waits on a required-reviewer approval of infra's `testnet` + # environment; leave room for a slow approval. + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: infra + - uses: ./.github/actions/dispatch-and-wait + with: + repo: malbeclabs/infra + workflow: stage-programs.testnet.yml + token: ${{ steps.app.outputs.token }} + slack-webhook: ${{ secrets.SLACK_INT_TECH_WEBHOOK }} + fields: | + version=${{ inputs.version }} + source_run_id=${{ github.run_id }} + - name: Post deploy instructions + run: | + { + echo "## Program deploy required" + echo "Artifacts staged on nyc-tn-bm2 at /opt/doublezero/program-releases/v${{ inputs.version }}/ (see DEPLOY.md there)." + echo "Deploy the three programs with the local keypair, set the onchain version, then approve the testnet-program-deploy gate." + } >> "$GITHUB_STEP_SUMMARY" + - name: Ping dev team + uses: malbeclabs/action-slack-notify@v2 + env: + SLACK_COLOR: warning + SLACK_USERNAME: Doublezero Releaser + SLACK_TITLE: "Testnet release v${{ inputs.version }}: program deploy needed" + SLACK_MESSAGE: "Artifacts staged on nyc-tn-bm2:/opt/doublezero/program-releases/v${{ inputs.version }}/. Deploy programs, then approve the testnet-program-deploy gate." + MSG_MINIMAL: actions url + SLACK_WEBHOOK: ${{ secrets.SLACK_TESTNET_ALERTS_WEBHOOK }} + + # Gate 2: approve only after the programs are deployed on testnet. + gate-programs: + runs-on: ubuntu-latest + needs: [stage-programs, verify-cloudsmith] + environment: testnet-program-deploy + steps: + - name: Gate passed + run: echo "Program deploy confirmed by approver; verifying onchain next." + + verify-onchain: + runs-on: ubuntu-latest + needs: gate-programs + steps: + - name: Short-circuit on dry run + if: ${{ inputs.dry_run }} + run: echo "dry run — skipping onchain version check" + - name: Install doublezero client from Cloudsmith + if: ${{ !inputs.dry_run }} + run: | + curl -1sLf 'https://dl.cloudsmith.io/public/malbeclabs/doublezero-testnet/setup.deb.sh' | sudo -E bash + sudo apt-get install -y doublezero=${{ inputs.version }}-1 + - name: Verify onchain program version + if: ${{ !inputs.dry_run }} + run: | + set -euo pipefail + OUT=$(doublezero --env testnet version) + echo "$OUT" + echo "$OUT" | grep -qi "program version.*${{ inputs.version }}" || { + echo "::error::onchain program version is not ${{ inputs.version }} — was the deploy completed?"; exit 1; } + + deploy-core: + runs-on: ubuntu-latest + needs: verify-onchain + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: infra + - uses: ./.github/actions/dispatch-and-wait + with: + repo: malbeclabs/infra + workflow: deploy-core.testnet.yml + token: ${{ steps.app.outputs.token }} + slack-webhook: ${{ secrets.SLACK_INT_TECH_WEBHOOK }} + fields: | + mode=${{ inputs.dry_run && 'dry-run' || 'send-it' }} + + deploy-clients: + runs-on: ubuntu-latest + needs: deploy-core + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: infra + - uses: ./.github/actions/dispatch-and-wait + with: + repo: malbeclabs/infra + workflow: deploy-clients.testnet.yml + token: ${{ steps.app.outputs.token }} + slack-webhook: ${{ secrets.SLACK_INT_TECH_WEBHOOK }} + fields: | + mode=${{ inputs.dry_run && 'dry-run' || 'send-it' }} + + qa: + runs-on: ubuntu-latest + needs: deploy-clients + timeout-minutes: 90 # qa.testnet has an hourly cron + concurrency group; we may queue behind one run + steps: + - uses: actions/checkout@v4 + - uses: actions/create-github-app-token@v2 + id: app + with: + app-id: ${{ secrets.RELEASE_BOT_APP_ID }} + private-key: ${{ secrets.RELEASE_BOT_PRIVATE_KEY }} + owner: malbeclabs + repositories: infra + - uses: ./.github/actions/dispatch-and-wait + with: + repo: malbeclabs/infra + workflow: qa.testnet.yml + token: ${{ steps.app.outputs.token }} + + announce: + runs-on: ubuntu-latest + needs: qa + steps: + - uses: malbeclabs/action-slack-notify@v2 + env: + SLACK_COLOR: good + SLACK_USERNAME: Doublezero Releaser + SLACK_TITLE: "Testnet release v${{ inputs.version }} deployed${{ inputs.dry_run && ' (dry run)' || '' }}" + SLACK_MESSAGE: "QA passed. Watch the system dashboard for ~30 min: https://doublezero.grafana.net/d/bf3dece9-51ac-4087-b6b1-579b3859ce14/ — and remember the community announcement (foundation)." + MSG_MINIMAL: actions url + SLACK_WEBHOOK: ${{ secrets.SLACK_TESTNET_ALERTS_WEBHOOK }} + + notify-failure: + runs-on: ubuntu-latest + needs: [preflight, open-prs, gate-tags, push-tags, verify-cloudsmith, build-programs, stage-programs, gate-programs, verify-onchain, deploy-core, deploy-clients, qa] + if: failure() + steps: + - uses: malbeclabs/action-slack-notify@v2 + env: + SLACK_COLOR: failure + SLACK_USERNAME: Doublezero Releaser + SLACK_TITLE: "Testnet release v${{ inputs.version }} failed" + MSG_MINIMAL: actions url + SLACK_WEBHOOK: ${{ secrets.SLACK_TESTNET_ALERTS_WEBHOOK }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 0398f639ad..43802880ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ All notable changes to this project will be documented in this file. ### Changes +- CI + - Add a testnet release orchestrator workflow (`release.testnet.yml`) that drives the release end to end: preflight checks, version-bump PRs for doublezero and infra, a human-approved gate before pushing the 9 component tags, CloudSmith package verification, Solana program build and staging with a manual deploy gate, onchain version verification, infra core/client deploys, QA, and Slack notifications. Supports `dry_run` for plumbing validation and safe re-runs (existing PRs are reused; already-pushed tags are skipped via a new `skip_existing` input on the tag workflow). Runbook at `docs/testnet-release.md`. + ## [v0.27.1](https://github.com/malbeclabs/doublezero/compare/client/v0.27.0...client/v0.27.1) - 2026-06-10 ### Breaking diff --git a/docs/testnet-release.md b/docs/testnet-release.md new file mode 100644 index 0000000000..b923dc7075 --- /dev/null +++ b/docs/testnet-release.md @@ -0,0 +1,74 @@ +# Testnet Release Runbook + +The testnet release is driven by a single orchestrator workflow, +[`release.testnet.yml`](../.github/workflows/release.testnet.yml). It automates +everything that can be automated and pauses at two human gates: one before tags +are pushed, and one around the Solana program deploy (which stays manual). + +Beyond the two gates, several jobs run in approval-protected environments, so a +full release is roughly 6–7 approval interactions across the two repos: merge +both version PRs, approve gate 1, approve the `testnet` environment on the tag +jobs, approve infra's `testnet` environment three times (`stage-programs`, +`deploy-core`, `deploy-clients` — each dispatched infra run posts a link to +`#int-tech` when it may be waiting), and approve gate 2 after the program +deploy. + +## Starting a release + +```bash +gh workflow run release.testnet.yml -R malbeclabs/doublezero -f version=X.Y.Z +``` + +`version` is plain `X.Y.Z` with no leading `v`. Optional inputs: + +| Input | Default | Effect | +| --- | --- | --- | +| `dry_run` | `false` | Validate plumbing end to end: version PRs are opened as drafts, no tags are pushed, downstream deploys run in check mode, the onchain check is skipped. | +| `skip_devnet_check` | `false` | Skip the preflight check that the latest devnet daily release succeeded. Use only when you know why devnet is red. | + +## Stage by stage + +| Stage | Automated | Human action required | +| --- | --- | --- | +| `preflight` | Validates the version, reads the current workspace version, checks the latest devnet daily release succeeded. | — | +| `open-prs` | Opens the doublezero version-bump PR (`release/vX.Y.Z`: Cargo.toml, Cargo.lock, CHANGELOG promotion) and the infra pinned-versions PR (`release/testnet-vX.Y.Z`). PR links appear in the run summary. | Review and **merge both PRs**. | +| `gate-tags` | Waits on the `testnet-release-gate` environment, then verifies both PRs are merged (the gate fails if you approve early). | **Approve gate 1** after both PRs are merged. | +| `push-tags` | Pushes the 9 component tags (`controller`, `internet-latency-collector`, `agent`, `device-telemetry-agent`, `geoprobe-agent`, `geoprobe-target`, `funder`, `monitor`, `client`) via the reusable tag workflow, which runs in the protected `testnet` environment. | **Approve the `testnet` environment prompt** on the tag jobs. | +| `verify-cloudsmith` | Polls CloudSmith (up to ~60 min) until all 9 packages exist at the new version. | — | +| `build-programs` | Builds the three Solana programs (`serviceability` default features; `telemetry` and `geolocation` with `--features testnet`) from main and uploads them with checksums and a `DEPLOY.md` manifest. | — | +| `stage-programs` | Dispatches the infra `stage-programs.testnet.yml` workflow, which copies the artifacts to `nyc-tn-bm2:/opt/doublezero/program-releases/vX.Y.Z/`, then pings Slack. | **Approve infra's `testnet` environment** on the dispatched run (link posted to `#int-tech`). Then **deploy the programs** on nyc-tn-bm2 from that directory per the Notion runbook ("Build and deploy DoubleZero solana programs - testnet"), and set the onchain version. | +| `gate-programs` | Waits on the `testnet-program-deploy` environment. | **Approve gate 2** once the programs are deployed and the onchain version is set. | +| `verify-onchain` | Installs the released client from CloudSmith and checks `doublezero --env testnet version` reports the new program version. | — | +| `deploy-core` | Dispatches infra `deploy-core.testnet.yml` and waits for it. | **Approve infra's `testnet` environment** on the dispatched run (link posted to `#int-tech`). | +| `deploy-clients` | Dispatches infra `deploy-clients.testnet.yml` and waits for it. | **Approve infra's `testnet` environment** on the dispatched run (link posted to `#int-tech`). | +| `qa` | Dispatches infra `qa.testnet.yml` and waits for it (may queue behind the hourly cron run). | — | +| `announce` | Posts success to Slack with the dashboard link. | **Watch the system dashboard for ~30 min** (https://doublezero.grafana.net/d/bf3dece9-51ac-4087-b6b1-579b3859ce14/). The foundation posts the community announcement. | + +Any failed job triggers a Slack alert via `notify-failure`. + +## Dry-run mode + +`dry_run=true` exercises the plumbing without releasing anything: + +- Both version PRs are opened as **drafts** with `[DRY RUN]` titles. Do not merge them; + gate 1 only checks that they were not closed. +- No tags are pushed. `verify-cloudsmith` still runs, querying the **current** (previous) + version to exercise the CloudSmith query logic. +- Programs are still built and staged, but `verify-onchain` is skipped, and the infra + deploy workflows are dispatched in their check mode (`mode=dry-run`). + +Cleanup after a dry run: close both draft PRs and delete their branches +(`release/vX.Y.Z` in doublezero, `release/testnet-vX.Y.Z` in infra). + +## Recovery / re-running + +Use "Re-run failed jobs" on the orchestrator run to resume from where it stopped: + +- The version PRs are reused if they already exist (the branch is force-pushed and the + open PR is found by head branch). +- Already-pushed tags are skipped (`skip_existing=true` on the tag workflow), so a + partially completed tag matrix is safe to re-run. +- Environment gates prompt for approval again on re-run. + +If a downstream infra workflow failed, fix the cause there first; re-running the +orchestrator job dispatches a fresh run of that workflow. diff --git a/scripts/release/bump-version.sh b/scripts/release/bump-version.sh new file mode 100755 index 0000000000..d8b4570067 --- /dev/null +++ b/scripts/release/bump-version.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# Bumps the workspace version and promotes the CHANGELOG "Unreleased" section +# for a testnet release. Run from the repo root. Requires cargo on PATH. +set -euo pipefail + +NEW="${1:?usage: bump-version.sh X.Y.Z}" +[[ "$NEW" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || { echo "version must be X.Y.Z" >&2; exit 1; } + +PREV=$(grep -m1 '^version = ' Cargo.toml | sed 's/version = "\(.*\)"/\1/') +[ "$PREV" != "$NEW" ] || { echo "workspace is already at $NEW" >&2; exit 1; } +COUNT=$(grep -c "^version = \"$PREV\"\$" Cargo.toml) +[ "$COUNT" -eq 1 ] || { echo "expected exactly one workspace version line, found $COUNT" >&2; exit 1; } + +sed -i "s/^version = \"$PREV\"\$/version = \"$NEW\"/" Cargo.toml +cargo update --workspace + +grep -q '^## Unreleased$' CHANGELOG.md || { echo "CHANGELOG.md has no '## Unreleased' section" >&2; exit 1; } +DATE=$(date -u +%Y-%m-%d) +export NEW PREV DATE +perl -0pi -e 's{^## Unreleased\n}{## Unreleased\n\n### Breaking\n\n### Changes\n\n## [v$ENV{NEW}](https://github.com/malbeclabs/doublezero/compare/client/v$ENV{PREV}...client/v$ENV{NEW}) - $ENV{DATE}\n}m' CHANGELOG.md + +echo "Bumped $PREV -> $NEW" +git --no-pager diff --stat -- Cargo.toml Cargo.lock CHANGELOG.md