Skip to content
40 changes: 33 additions & 7 deletions packages/orchestrator/cmd/resume-build/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ func main() {
cmdPause := flag.String("cmd-pause", "", "execute command in sandbox, then pause on success")
cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing")
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
fsOnly := flag.Bool("fs-only", false, "pause without a memory snapshot (filesystem-only; resume reboots the guest)")
reboot := flag.Bool("reboot", false, "cold-boot from the build's rootfs instead of resuming from memory")
shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)")

fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-config pause timeout LD flag (0 = use LD default)")
Expand Down Expand Up @@ -157,6 +159,12 @@ func main() {
if *optimize && *iterations > 0 {
log.Fatal("-optimize is incompatible with -iterations (benchmarking doesn't upload)")
}
if *fsOnly && !isPauseMode {
log.Fatal("-fs-only requires a pause flag (-pause, -signal-pause, -cmd-pause, or -cmd-signal-pause)")
}
if *fsOnly && (*optimize || *fphBench) {
log.Fatal("-fs-only is incompatible with -optimize and -fph-bench (no memory snapshot)")
}

if *shell && (isCmdMode || isPauseMode || *iterations > 0) {
log.Fatal("-shell can only be used in interactive mode (no -cmd, no pause flags, no -iterations)")
Expand Down Expand Up @@ -193,6 +201,7 @@ func main() {
newBuildID: outputBuildID,
iterations: *iterations,
optimize: *optimize,
fsOnly: *fsOnly,
}

runOpts := runOptions{
Expand All @@ -206,7 +215,7 @@ func main() {
}
fphBenchOpts := fphBenchOptions{enabled: *fphBench, workload: *cmdPause, iterations: benchIters, delay: *fphBenchDelay}

err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts, fphBenchOpts)
err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, *reboot, pauseOpts, runOpts, fphBenchOpts)
cancel()

if err != nil {
Expand All @@ -228,6 +237,7 @@ type pauseOptions struct {
newBuildID string
iterations int // for benchmarking pause (only with immediate)
optimize bool
fsOnly bool
}

func (p pauseOptions) enabled() bool {
Expand Down Expand Up @@ -322,10 +332,21 @@ type runner struct {
coldStart bool
noPrefetch bool
shell bool
reboot bool
config cfg.BuilderConfig
storage storage.StorageProvider
}

// startSandbox starts a sandbox from the build, either resuming from its memory
// snapshot or cold-booting (rebooting) from its rootfs when -reboot is set.
func (r *runner) startSandbox(ctx context.Context, runtime sandbox.RuntimeMetadata, start, end time.Time) (*sandbox.Sandbox, error) {
if r.reboot {
return r.factory.RebootSandbox(ctx, r.tmpl, r.sbxConfig, runtime, end, nil)
}

return r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, start, end, nil)
}

func (r *runner) resumeOnce(ctx context.Context, iter int) (time.Duration, error) {
runtime := sandbox.RuntimeMetadata{
TemplateID: r.buildID,
Expand All @@ -335,7 +356,7 @@ func (r *runner) resumeOnce(ctx context.Context, iter int) (time.Duration, error
}

t0 := time.Now()
sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil)
sbx, err := r.startSandbox(ctx, runtime, t0, t0.Add(24*time.Hour))
dur := time.Since(t0)

if sbx != nil {
Expand All @@ -355,7 +376,7 @@ func (r *runner) interactive(ctx context.Context) error {

fmt.Println("🚀 Starting...")
t0 := time.Now()
sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil)
sbx, err := r.startSandbox(ctx, runtime, t0, t0.Add(24*time.Hour))
if err != nil {
return err
}
Expand Down Expand Up @@ -405,7 +426,7 @@ func (r *runner) cmdOnce(ctx context.Context, opts runOptions, verbose bool) (cm
fmt.Println("🚀 Starting sandbox...")
}
t0 := time.Now()
sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil)
sbx, err := r.startSandbox(ctx, runtime, t0, t0.Add(24*time.Hour))
resumeDur := time.Since(t0)
if err != nil {
return cmdTimings{resume: resumeDur, err: err}, err
Expand Down Expand Up @@ -608,7 +629,7 @@ func (r *runner) pauseOnce(ctx context.Context, opts pauseOptions, verbose bool)
fmt.Println("🚀 Starting sandbox...")
}
t0 := time.Now()
sbx, err := r.factory.ResumeSandbox(ctx, r.tmpl, r.sbxConfig, runtime, t0, t0.Add(24*time.Hour), nil)
sbx, err := r.startSandbox(ctx, runtime, t0, t0.Add(24*time.Hour))
resumeDur := time.Since(t0)
if err != nil {
return pauseTimings{resume: resumeDur, err: err}, err
Expand Down Expand Up @@ -671,8 +692,12 @@ func (r *runner) pauseOnce(ctx context.Context, opts pauseOptions, verbose bool)
}

// Pause and create snapshot
var pauseSnapshotOpts []sandbox.PauseOption
if opts.fsOnly {
pauseSnapshotOpts = append(pauseSnapshotOpts, sandbox.WithFilesystemSnapshot())
}
pauseStart := time.Now()
snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause)
snapshot, err := sbx.Pause(ctx, newMeta, sandbox.SnapshotUseCasePause, pauseSnapshotOpts...)
pauseDur := time.Since(pauseStart)
totalDur := time.Since(t0)

Expand Down Expand Up @@ -1021,7 +1046,7 @@ func (r *runner) benchmark(ctx context.Context, n int) error {
return lastErr
}

func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions, fphBenchOpts fphBenchOptions) error {
func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell, reboot bool, pauseOpts pauseOptions, runOpts runOptions, fphBenchOpts fphBenchOptions) error {
// Silence other loggers unless verbose mode
var l logger.Logger
if !verbose {
Expand Down Expand Up @@ -1186,6 +1211,7 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe
coldStart: coldStart,
noPrefetch: noPrefetch,
shell: shell,
reboot: reboot,
config: config.BuilderConfig,
storage: persistence,
sbxConfig: sbxCfg,
Expand Down
14 changes: 11 additions & 3 deletions packages/orchestrator/pkg/sandbox/build_upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ func NewUpload(
useCase string,
objectMetadata storage.ObjectMetadata,
) (*Upload, error) {
mem, memV4, err := resolveCompressConfig(ctx, cfg, ff, storage.MemfileName, snap.MemfileBlockSize, useCase)
if err != nil {
return nil, fmt.Errorf("resolve memfile compress config: %w", err)
// Filesystem-only snapshots have no memfile (NoDiff, block size 0), so
// resolving its compress config would fail validation ("block size must be
// positive"). The memfile body and header are never uploaded anyway.
var mem storage.CompressConfig
var memV4 bool
var err error
if !snap.FilesystemSnapshot {
mem, memV4, err = resolveCompressConfig(ctx, cfg, ff, storage.MemfileName, snap.MemorySnapshot.BlockSize, useCase)
if err != nil {
return nil, fmt.Errorf("resolve memfile compress config: %w", err)
}
}
root, rootV4, err := resolveCompressConfig(ctx, cfg, ff, storage.RootfsName, snap.RootfsBlockSize, useCase)
if err != nil {
Expand Down
36 changes: 36 additions & 0 deletions packages/orchestrator/pkg/sandbox/build_upload_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,39 @@ func TestAppendAncestorBuilds_NilDstSkipsSynthesis(t *testing.T) {
err := u.appendAncestorBuilds(t.Context(), nil, mappingTo(t, 4096, ancestorID, 4096), build.Memfile)
require.NoError(t, err)
}

// A filesystem-only snapshot has no memfile, so its MemorySnapshot.BlockSize is
// 0. NewUpload must skip resolving the memfile compress config for it —
// otherwise, with compression enabled, validateCompressConfig would reject the
// zero block size and fail the upload. FrameSizeKB is a multiple of the 4 KiB
// rootfs block so the rootfs config (which is always resolved) stays valid.
func TestNewUpload_FilesystemSnapshotSkipsMemfileCompressConfig(t *testing.T) {
t.Parallel()

cfg := storage.CompressConfig{Enabled: true, Type: "zstd", FrameSizeKB: 256}

t.Run("filesystem-only snapshot with zero memfile block size succeeds", func(t *testing.T) {
t.Parallel()
snap := &Snapshot{
BuildID: uuid.New(),
FilesystemSnapshot: true,
RootfsBlockSize: 4096,
}

u, err := NewUpload(t.Context(), nil, snap, nil, cfg, nil, storage.UseCaseBuild, storage.ObjectMetadata{})
require.NoError(t, err)
require.NotNil(t, u)
})

t.Run("memory snapshot with zero memfile block size still errors", func(t *testing.T) {
t.Parallel()
snap := &Snapshot{
BuildID: uuid.New(),
FilesystemSnapshot: false,
RootfsBlockSize: 4096,
}

_, err := NewUpload(t.Context(), nil, snap, nil, cfg, nil, storage.UseCaseBuild, storage.ObjectMetadata{})
require.Error(t, err)
})
}
12 changes: 9 additions & 3 deletions packages/orchestrator/pkg/sandbox/build_upload_v3.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func (u *Upload) runV3(ctx context.Context) error {
memfilePath, err := u.snap.MemfileDiff.CachePath(ctx)
memfilePath, err := u.snap.MemorySnapshot.Diff.CachePath(ctx)
if err != nil {
return fmt.Errorf("error getting memfile diff path: %w", err)
}
Expand All @@ -28,7 +28,7 @@ func (u *Upload) runV3(ctx context.Context) error {
eg, egCtx := errgroup.WithContext(ctx)

eg.Go(func() error {
h, err := u.snap.MemfileDiffHeader.WaitWithContext(egCtx)
h, err := u.snap.MemorySnapshot.DiffHeader.WaitWithContext(egCtx)
if err != nil {
return fmt.Errorf("wait memfile diff header: %w", err)
}
Expand Down Expand Up @@ -90,6 +90,12 @@ func (u *Upload) runV3(ctx context.Context) error {
})

eg.Go(func() error {
// Filesystem-only snapshots resume by reboot, not snapfile restore, so
// the snapfile (created only for its disk-flush side effect) is not uploaded.
if u.snap.FilesystemSnapshot {
return nil
}

return uploadBlobWithMetrics(egCtx, u.store, u.paths.Snapfile(), storage.SnapfileObjectType, u.snap.Snapfile.Path(), uploadFileSnap, meta)
})

Expand All @@ -103,7 +109,7 @@ func (u *Upload) runV3(ctx context.Context) error {

// Body uploads done; headers must be ready by now (the per-file Goroutines
// above already Wait-ed). Wait() is a fast lookup here.
memfileDiffHeader, err := u.snap.MemfileDiffHeader.WaitWithContext(ctx)
memfileDiffHeader, err := u.snap.MemorySnapshot.DiffHeader.WaitWithContext(ctx)
if err != nil {
return fmt.Errorf("wait memfile diff header: %w", err)
}
Expand Down
10 changes: 8 additions & 2 deletions packages/orchestrator/pkg/sandbox/build_upload_v4.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
)

func (u *Upload) runV4(ctx context.Context) error {
memSrc, err := u.snap.MemfileDiff.CachePath(ctx)
memSrc, err := u.snap.MemorySnapshot.Diff.CachePath(ctx)
if err != nil {
return fmt.Errorf("memfile diff path: %w", err)
}
Expand All @@ -29,7 +29,7 @@ func (u *Upload) runV4(ctx context.Context) error {
eg, ctx := errgroup.WithContext(ctx)

eg.Go(func() error {
h, err := u.snap.MemfileDiffHeader.WaitWithContext(ctx)
h, err := u.snap.MemorySnapshot.DiffHeader.WaitWithContext(ctx)
if err != nil {
return fmt.Errorf("wait memfile diff header: %w", err)
}
Expand All @@ -55,6 +55,12 @@ func (u *Upload) runV4(ctx context.Context) error {
meta := storage.WithMetadata(u.objectMetadata)

eg.Go(func() error {
// Filesystem-only snapshots resume by reboot, not snapfile restore, so
// the snapfile (created only for its disk-flush side effect) is not uploaded.
if u.snap.FilesystemSnapshot {
return nil
}

return uploadBlobWithMetrics(ctx, u.store, u.paths.Snapfile(), storage.SnapfileObjectType, u.snap.Snapfile.Path(), uploadFileSnap, meta)
})

Expand Down
39 changes: 37 additions & 2 deletions packages/orchestrator/pkg/sandbox/fc/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ func (f *fcLogFilter) Write(p []byte) (n int, err error) {
return len(p), err
}

// ext4RootFlags are the ext4 mount flags passed on the kernel cmdline.
// discard: ext4 issues TRIM on freed blocks so they are elided from the
// snapshot diff. It must never include "noload": a filesystem-only snapshot
// resume cold-boots from the snapshot rootfs and relies on ext4 replaying the
// journal on mount.
const ext4RootFlags = "discard"

type ProcessOptions struct {
// IoEngine is the io engine to use for the rootfs drive.
IoEngine *string
Expand All @@ -95,6 +102,14 @@ type ProcessOptions struct {
// KvmClock is a flag to enable kvm-clock as the clocksource for the kernel.
KvmClock bool

// AccessToken, when non-nil, makes Create write the guest MMDS metadata
// (sandbox/template IDs, logs address, and the access-token hash) before the
// VM boots, so a cold-booted envd can authenticate /init the same way it does
// after a memory resume. An empty string hashes to the "no token" value,
// matching Resume. Template-build cold boots leave it nil and skip the write,
// preserving their existing behavior.
AccessToken *string

// Stdout is the writer to which the process stdout will be written.
Stdout io.Writer

Expand Down Expand Up @@ -374,8 +389,7 @@ func (p *Process) Create(
"i8042.noaux": "",
"random.trust_cpu": "on",

// discard: ext4 issues TRIM on freed blocks so they are elided from the snapshot diff.
"rootflags": "discard",
"rootflags": ext4RootFlags,
}

if options.KvmClock {
Expand Down Expand Up @@ -463,6 +477,27 @@ func (p *Process) Create(
)
}

// Write MMDS metadata before boot when an access token is provided (the
// cold-boot/reboot user path) so the guest envd can authenticate /init the
// same way it does after a memory resume. The MMDS transport is already
// configured by setNetworkInterface above. Template-build cold boots leave
// AccessToken nil and skip this, preserving their existing behavior.
if options.AccessToken != nil {
md := sbxMetadata.LoggerMetadata()
meta := &MmdsMetadata{
SandboxID: md.SandboxID,
TemplateID: md.TemplateID,
LogsCollectorAddress: fmt.Sprintf("http://%s/logs", p.config.NetworkConfig.OrchestratorInSandboxIPAddress),
AccessTokenHash: keys.HashAccessToken(*options.AccessToken),
}
if err := p.client.setMmds(ctx, meta); err != nil {
fcStopErr := p.Stop(ctx)

return errors.Join(fmt.Errorf("error setting mmds: %w", err), fcStopErr)
}
telemetry.ReportEvent(ctx, "set fc mmds metadata")
}

err = p.client.startVM(ctx)
if err != nil {
fcStopErr := p.Stop(ctx)
Expand Down
Loading
Loading