Skip to content

Commit ce5403b

Browse files
authored
feat: make brev exec fast with SSH multiplexing (#319)
* feat: make brev exec fast with SSH multiplexing and fire-first approach Skip upfront API status checks — just fire SSH immediately with a 5s connect timeout. If SSH succeeds (especially via a reused multiplexed connection), return instantly. If it fails, then check instance status and give helpful error messages suggesting brev ls. Add ControlMaster/ControlPath/ControlPersist to all SSH config templates so subsequent connections reuse the master socket (~0.6s vs ~3s). Suppress noisy SSH output (Agent pid, PTY warnings, known hosts warnings). * fix: update SSH config test expectations with multiplexing settings * fix: address PR feedback - ControlPath separator and ssh-agent reuse - Use `-` instead of `:` in ControlPath to avoid issues on filesystems that don't support colons (SMB mounts, WSL edge cases) - Only spawn ssh-agent if SSH_AUTH_SOCK is not already set, preventing orphaned agent processes on rapid repeated exec calls
1 parent 6264887 commit ce5403b

4 files changed

Lines changed: 157 additions & 37 deletions

File tree

pkg/cmd/exec/exec.go

Lines changed: 88 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -176,61 +176,110 @@ func parseCommand(command string) (string, error) {
176176
const pollTimeout = 10 * time.Minute
177177

178178
func runExecCommand(t *terminal.Terminal, sstore ExecStore, workspaceNameOrID string, host bool, command string) error {
179-
s := t.NewSpinner()
180-
workspace, err := util.GetUserWorkspaceByNameOrIDErr(sstore, workspaceNameOrID)
181-
if err != nil {
182-
return breverrors.WrapAndTrace(err)
179+
// Determine SSH alias: use the workspace name directly (with -host suffix if needed)
180+
sshName := workspaceNameOrID
181+
if host {
182+
sshName = workspaceNameOrID + "-host"
183+
}
184+
185+
// Fire SSH immediately with a short timeout — skip all status checks for speed.
186+
// SSH multiplexing (ControlMaster) in the config means subsequent
187+
// calls reuse an existing connection and are near-instant.
188+
// Use a 5-second connect timeout so we fail fast if the instance is down.
189+
err := runSSHWithTimeout(sshName, command, 5)
190+
if err == nil {
191+
// Success — fire analytics in background and return
192+
go trackExecAnalytics(sstore, workspaceNameOrID)
193+
return nil
183194
}
184195

185-
if workspace.Status == "STOPPED" { // we start the env for the user
186-
err = util.StartWorkspaceIfStopped(t, s, sstore, workspaceNameOrID, workspace, pollTimeout)
196+
// SSH failed — now check what's going on with the instance
197+
fmt.Fprintf(os.Stderr, "Connection failed, checking instance status...\n")
198+
199+
workspace, lookupErr := util.GetUserWorkspaceByNameOrIDErr(sstore, workspaceNameOrID)
200+
if lookupErr != nil {
201+
return breverrors.WrapAndTrace(fmt.Errorf(
202+
"ssh connection failed and could not look up instance %q: %w\nPlease check your instances with: brev ls",
203+
workspaceNameOrID, err))
204+
}
205+
206+
if workspace.Status == "STOPPED" {
207+
s := t.NewSpinner()
208+
startErr := util.StartWorkspaceIfStopped(t, s, sstore, workspaceNameOrID, workspace, pollTimeout)
209+
if startErr != nil {
210+
return breverrors.WrapAndTrace(startErr)
211+
}
212+
err = util.PollUntil(s, workspace.ID, "RUNNING", sstore, " waiting for instance to be ready...", pollTimeout)
213+
if err != nil {
214+
return breverrors.WrapAndTrace(err)
215+
}
216+
// Refresh SSH config so the host entry is up to date
217+
refreshRes := refresh.RunRefreshAsync(sstore)
218+
if err = refreshRes.Await(); err != nil {
219+
return breverrors.WrapAndTrace(err)
220+
}
221+
222+
localIdentifier := workspace.GetLocalIdentifier()
223+
if host {
224+
localIdentifier = workspace.GetHostIdentifier()
225+
}
226+
sshName = string(localIdentifier)
227+
228+
err = util.WaitForSSHToBeAvailable(sshName, s)
187229
if err != nil {
188230
return breverrors.WrapAndTrace(err)
189231
}
232+
_ = writeconnectionevent.WriteWCEOnEnv(sstore, workspace.DNS)
233+
err = runSSH(sshName, command)
234+
if err != nil {
235+
return breverrors.WrapAndTrace(err)
236+
}
237+
go trackExecAnalytics(sstore, workspaceNameOrID)
238+
return nil
190239
}
191-
err = util.PollUntil(s, workspace.ID, "RUNNING", sstore, " waiting for instance to be ready...", pollTimeout)
192-
if err != nil {
193-
return breverrors.WrapAndTrace(err)
240+
241+
if workspace.Status != "RUNNING" {
242+
return breverrors.WrapAndTrace(fmt.Errorf(
243+
"instance %q is in state %q — please check with: brev ls",
244+
workspaceNameOrID, workspace.Status))
194245
}
195-
refreshRes := refresh.RunRefreshAsync(sstore)
196246

197-
workspace, err = util.GetUserWorkspaceByNameOrIDErr(sstore, workspaceNameOrID)
198-
if err != nil {
247+
// Instance is RUNNING but SSH failed — maybe still booting, do the wait
248+
s := t.NewSpinner()
249+
refreshRes := refresh.RunRefreshAsync(sstore)
250+
if err = refreshRes.Await(); err != nil {
199251
return breverrors.WrapAndTrace(err)
200252
}
201-
if workspace.Status != "RUNNING" {
202-
return breverrors.New("Instance is not running")
203-
}
204253

205254
localIdentifier := workspace.GetLocalIdentifier()
206255
if host {
207256
localIdentifier = workspace.GetHostIdentifier()
208257
}
258+
sshName = string(localIdentifier)
209259

210-
sshName := string(localIdentifier)
211-
212-
err = refreshRes.Await()
213-
if err != nil {
214-
return breverrors.WrapAndTrace(err)
215-
}
216260
err = util.WaitForSSHToBeAvailable(sshName, s)
217261
if err != nil {
218-
return breverrors.WrapAndTrace(err)
262+
return breverrors.WrapAndTrace(fmt.Errorf(
263+
"could not connect to instance %q: %w\nPlease check with: brev ls",
264+
workspaceNameOrID, err))
219265
}
220-
// we don't care about the error here but should log with sentry
221-
// legacy environments wont support this and cause errrors,
222-
// but we don't want to block the user from using the shell
223266
_ = writeconnectionevent.WriteWCEOnEnv(sstore, workspace.DNS)
224267
err = runSSH(sshName, command)
225268
if err != nil {
226269
return breverrors.WrapAndTrace(err)
227270
}
228-
// Call analytics for exec
229-
userID := ""
230-
user, err := sstore.GetCurrentUser()
271+
go trackExecAnalytics(sstore, workspaceNameOrID)
272+
return nil
273+
}
274+
275+
func trackExecAnalytics(sstore ExecStore, workspaceNameOrID string) {
276+
workspace, err := util.GetUserWorkspaceByNameOrIDErr(sstore, workspaceNameOrID)
231277
if err != nil {
232-
userID = workspace.CreatedByUserID
233-
} else {
278+
return
279+
}
280+
userID := workspace.CreatedByUserID
281+
user, err := sstore.GetCurrentUser()
282+
if err == nil {
234283
userID = user.ID
235284
}
236285
data := analytics.EventData{
@@ -241,19 +290,17 @@ func runExecCommand(t *terminal.Terminal, sstore ExecStore, workspaceNameOrID st
241290
},
242291
}
243292
_ = analytics.TrackEvent(data)
244-
245-
return nil
246293
}
247294

248-
func runSSH(sshAlias string, command string) error {
249-
sshAgentEval := "eval $(ssh-agent -s)"
250-
295+
func runSSHWithTimeout(sshAlias string, command string, connectTimeoutSecs int) error {
251296
// Non-interactive: run command and pipe stdout/stderr
252297
// Escape the command for passing to SSH
253298
escapedCmd := strings.ReplaceAll(command, "'", "'\\''")
254-
cmd := fmt.Sprintf("ssh %s '%s'", sshAlias, escapedCmd)
299+
// -T disables pseudo-terminal allocation (no "Pseudo-terminal will not be allocated" warning)
300+
// Only start ssh-agent if one isn't already running (avoids orphaned agent processes)
301+
agentCmd := `if [ -z "$SSH_AUTH_SOCK" ]; then eval $(ssh-agent -s) > /dev/null; fi`
302+
cmd := fmt.Sprintf("%s && ssh -T -o ConnectTimeout=%d -o LogLevel=ERROR %s '%s'", agentCmd, connectTimeoutSecs, sshAlias, escapedCmd)
255303

256-
cmd = fmt.Sprintf("%s && %s", sshAgentEval, cmd)
257304
sshCmd := exec.Command("bash", "-c", cmd) //nolint:gosec //cmd is user input
258305
sshCmd.Stderr = os.Stderr
259306
sshCmd.Stdout = os.Stdout
@@ -265,3 +312,7 @@ func runSSH(sshAlias string, command string) error {
265312
}
266313
return nil
267314
}
315+
316+
func runSSH(sshAlias string, command string) error {
317+
return runSSHWithTimeout(sshAlias, command, 10)
318+
}

pkg/ssh/ssh.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ const workspaceSSHConfigTemplate = `Host {{ .Host }}
3838
RequestTTY yes
3939
ForwardAgent yes
4040
AddKeysToAgent yes
41+
ControlMaster auto
42+
ControlPath ~/.ssh/brev-control-%r@%h-%p
43+
ControlPersist 10m
4144
RemoteCommand cd {{ .Dir }}; $SHELL
4245
4346
`

pkg/ssh/sshconfigurer.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,9 @@ const SSHConfigEntryTemplateV2 = `Host {{ .Alias }}
304304
AddKeysToAgent yes
305305
ForwardAgent yes
306306
RequestTTY yes
307+
ControlMaster auto
308+
ControlPath ~/.ssh/brev-control-%r@%h-%p
309+
ControlPersist 10m
307310
{{ if .RunRemoteCMD }}
308311
RemoteCommand cd {{ .Dir }}; $SHELL
309312
{{ end }}
@@ -321,6 +324,9 @@ const SSHConfigEntryTemplateV3 = `Host {{ .Alias }}
321324
AddKeysToAgent yes
322325
ForwardAgent yes
323326
RequestTTY yes
327+
ControlMaster auto
328+
ControlPath ~/.ssh/brev-control-%r@%h-%p
329+
ControlPersist 10m
324330
Port {{ .Port }}
325331
{{ if .RunRemoteCMD }}
326332
RemoteCommand cd {{ .Dir }}; $SHELL

0 commit comments

Comments
 (0)