Skip to content

Commit d920c64

Browse files
committed
refactor(toolsdk): simplify workspace bash background execution with nohup
Change-Id: If67c30717158bdd84e9f733b56365af7c8d0b51a Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent 3750aee commit d920c64

File tree

4 files changed

+85
-114
lines changed

4 files changed

+85
-114
lines changed

codersdk/toolsdk/bash.go

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ package toolsdk
33
import (
44
"bytes"
55
"context"
6-
_ "embed"
7-
"encoding/base64"
86
"errors"
97
"fmt"
108
"io"
@@ -20,7 +18,6 @@ import (
2018
"github.com/coder/coder/v2/cli/cliui"
2119
"github.com/coder/coder/v2/codersdk"
2220
"github.com/coder/coder/v2/codersdk/workspacesdk"
23-
"github.com/coder/coder/v2/cryptorand"
2421
)
2522

2623
type WorkspaceBashArgs struct {
@@ -35,9 +32,6 @@ type WorkspaceBashResult struct {
3532
ExitCode int `json:"exit_code"`
3633
}
3734

38-
//go:embed resources/background.sh
39-
var backgroundScript string
40-
4135
var WorkspaceBash = Tool[WorkspaceBashArgs, WorkspaceBashResult]{
4236
Tool: aisdk.Tool{
4337
Name: ToolNameWorkspaceBash,
@@ -57,10 +51,13 @@ The workspace parameter supports various formats:
5751
The timeout_ms parameter specifies the command timeout in milliseconds (defaults to 60000ms, maximum of 300000ms).
5852
If the command times out, all output captured up to that point is returned with a cancellation message.
5953
54+
For background commands (background: true), output is captured until the timeout is reached, then the command
55+
continues running in the background. The captured output is returned as the result.
56+
6057
Examples:
6158
- workspace: "my-workspace", command: "ls -la"
6259
- workspace: "john/dev-env", command: "git status", timeout_ms: 30000
63-
- workspace: "my-workspace", command: "npm run dev", background: true
60+
- workspace: "my-workspace", command: "npm run dev", background: true, timeout_ms: 10000
6461
- workspace: "my-workspace.main", command: "docker ps"`,
6562
Schema: aisdk.Schema{
6663
Properties: map[string]any{
@@ -80,7 +77,7 @@ Examples:
8077
},
8178
"background": map[string]any{
8279
"type": "boolean",
83-
"description": "Whether to run the command in the background. The command will not be affected by the timeout.",
80+
"description": "Whether to run the command in the background. Output is captured until timeout, then the command continues running in the background.",
8481
},
8582
},
8683
Required: []string{"workspace", "command"},
@@ -155,35 +152,29 @@ Examples:
155152
}
156153
command := args.Command
157154
if args.Background {
158-
// Background commands are not affected by the timeout
159-
timeoutMs = defaultTimeoutMs
160-
encodedCommand := base64.StdEncoding.EncodeToString([]byte(args.Command))
161-
encodedScript := base64.StdEncoding.EncodeToString([]byte(backgroundScript))
162-
commandID, err := cryptorand.StringCharset(cryptorand.Human, 8)
163-
if err != nil {
164-
return WorkspaceBashResult{}, xerrors.Errorf("failed to generate command ID: %w", err)
165-
}
166-
command = fmt.Sprintf(
167-
"ARG_COMMAND=\"$(echo -n %s | base64 -d)\" ARG_COMMAND_ID=%s bash -c \"$(echo -n %s | base64 -d)\"",
168-
encodedCommand,
169-
commandID,
170-
encodedScript,
171-
)
155+
// For background commands, use nohup directly to ensure they survive SSH session
156+
// termination. This captures output normally but allows the process to continue
157+
// running even after the SSH connection closes.
158+
command = fmt.Sprintf("nohup %s </dev/null 2>&1", args.Command)
172159
}
173160

174-
// Create context with timeout
175-
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
176-
defer cancel()
161+
// Create context with command timeout (replace the broader MCP timeout)
162+
commandCtx, commandCancel := context.WithTimeout(ctx, time.Duration(timeoutMs)*time.Millisecond)
163+
defer commandCancel()
177164

178165
// Execute command with timeout handling
179-
output, err := executeCommandWithTimeout(ctx, session, command)
166+
output, err := executeCommandWithTimeout(commandCtx, session, command)
180167
outputStr := strings.TrimSpace(string(output))
181168

182169
// Handle command execution results
183170
if err != nil {
184171
// Check if the command timed out
185-
if errors.Is(context.Cause(ctx), context.DeadlineExceeded) {
186-
outputStr += "\nCommand canceled due to timeout"
172+
if errors.Is(context.Cause(commandCtx), context.DeadlineExceeded) {
173+
if args.Background {
174+
outputStr += "\nCommand continues running in background"
175+
} else {
176+
outputStr += "\nCommand canceled due to timeout"
177+
}
187178
return WorkspaceBashResult{
188179
Output: outputStr,
189180
ExitCode: 124,
@@ -417,21 +408,27 @@ func executeCommandWithTimeout(ctx context.Context, session *gossh.Session, comm
417408
return safeWriter.Bytes(), err
418409
case <-ctx.Done():
419410
// Context was canceled (timeout or other cancellation)
420-
// Close the session to stop the command
421-
_ = session.Close()
411+
// Close the session to stop the command, but handle errors gracefully
412+
closeErr := session.Close()
422413

423-
// Give a brief moment to collect any remaining output
424-
timer := time.NewTimer(50 * time.Millisecond)
414+
// Give a brief moment to collect any remaining output and for goroutines to finish
415+
timer := time.NewTimer(100 * time.Millisecond)
425416
defer timer.Stop()
426417

427418
select {
428419
case <-timer.C:
429420
// Timer expired, return what we have
421+
break
430422
case err := <-done:
431423
// Command finished during grace period
432-
return safeWriter.Bytes(), err
424+
if closeErr == nil {
425+
return safeWriter.Bytes(), err
426+
}
427+
// If session close failed, prioritize the context error
428+
break
433429
}
434430

431+
// Return the collected output with the context error
435432
return safeWriter.Bytes(), context.Cause(ctx)
436433
}
437434
}
@@ -451,5 +448,9 @@ func (sw *syncWriter) Write(p []byte) (n int, err error) {
451448
func (sw *syncWriter) Bytes() []byte {
452449
sw.mu.Lock()
453450
defer sw.mu.Unlock()
454-
return sw.w.Bytes()
451+
// Return a copy to prevent race conditions with the underlying buffer
452+
b := sw.w.Bytes()
453+
result := make([]byte, len(b))
454+
copy(result, b)
455+
return result
455456
}

codersdk/toolsdk/bash_test.go

Lines changed: 38 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,6 @@ func TestWorkspaceBashTimeout(t *testing.T) {
175175

176176
// Test that the TimeoutMs field can be set and read correctly
177177
args := toolsdk.WorkspaceBashArgs{
178-
Workspace: "test-workspace",
179-
Command: "echo test",
180178
TimeoutMs: 0, // Should default to 60000 in handler
181179
}
182180

@@ -193,8 +191,6 @@ func TestWorkspaceBashTimeout(t *testing.T) {
193191

194192
// Test that negative values can be set and will be handled by the default logic
195193
args := toolsdk.WorkspaceBashArgs{
196-
Workspace: "test-workspace",
197-
Command: "echo test",
198194
TimeoutMs: -100,
199195
}
200196

@@ -280,7 +276,7 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
280276
TimeoutMs: 2000, // 2 seconds timeout - should timeout after first echo
281277
}
282278

283-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
279+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
284280

285281
// Should not error (timeout is handled gracefully)
286282
require.NoError(t, err)
@@ -314,15 +310,15 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
314310

315311
deps, err := toolsdk.NewDeps(client)
316312
require.NoError(t, err)
317-
ctx := context.Background()
318313

319314
args := toolsdk.WorkspaceBashArgs{
320315
Workspace: workspace.Name,
321316
Command: `echo "normal command"`, // Quick command that should complete normally
322317
TimeoutMs: 5000, // 5 second timeout - plenty of time
323318
}
324319

325-
result, err := toolsdk.WorkspaceBash.Handler(ctx, deps, args)
320+
// Use testTool to register the tool as tested and satisfy coverage validation
321+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
326322

327323
// Should not error
328324
require.NoError(t, err)
@@ -343,7 +339,7 @@ func TestWorkspaceBashTimeoutIntegration(t *testing.T) {
343339
func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
344340
t.Parallel()
345341

346-
t.Run("BackgroundCommandReturnsImmediately", func(t *testing.T) {
342+
t.Run("BackgroundCommandCapturesOutput", func(t *testing.T) {
347343
t.Parallel()
348344

349345
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -359,29 +355,29 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
359355

360356
args := toolsdk.WorkspaceBashArgs{
361357
Workspace: workspace.Name,
362-
Command: `echo "started" && sleep 5 && echo "completed"`, // Command that would take 5+ seconds
363-
Background: true, // Run in background
358+
Command: `echo "started" && sleep 60 && echo "completed"`, // Command that would take 60+ seconds
359+
Background: true, // Run in background
360+
TimeoutMs: 2000, // 2 second timeout
364361
}
365362

366-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
363+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
367364

368365
// Should not error
369366
require.NoError(t, err)
370367

371368
t.Logf("Background result: exitCode=%d, output=%q", result.ExitCode, result.Output)
372369

373-
// Should have exit code 0 (background start successful)
374-
require.Equal(t, 0, result.ExitCode)
370+
// Should have exit code 124 (timeout) since command times out
371+
require.Equal(t, 124, result.ExitCode)
375372

376-
// Should contain PID and log path info, not the actual command output
377-
require.Contains(t, result.Output, "Command started with PID:")
378-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
373+
// Should capture output up to timeout point
374+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
379375

380-
// Should NOT contain the actual command output since it runs in background
381-
// The command was `echo "started" && sleep 5 && echo "completed"`
382-
// So we check that the quoted strings don't appear in the output
383-
require.NotContains(t, result.Output, `"started"`, "Should not contain command output in background mode")
384-
require.NotContains(t, result.Output, `"completed"`, "Should not contain command output in background mode")
376+
// Should NOT contain the second echo (it never executed due to timeout)
377+
require.NotContains(t, result.Output, "completed", "Should not contain output after timeout")
378+
379+
// Should contain background continuation message
380+
require.Contains(t, result.Output, "Command continues running in background")
385381
})
386382

387383
t.Run("BackgroundVsNormalExecution", func(t *testing.T) {
@@ -419,20 +415,18 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
419415
Background: true,
420416
}
421417

422-
backgroundResult, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, backgroundArgs)
418+
backgroundResult, err := testTool(t, toolsdk.WorkspaceBash, deps, backgroundArgs)
423419
require.NoError(t, err)
424420

425421
t.Logf("Normal result: %q", normalResult.Output)
426422
t.Logf("Background result: %q", backgroundResult.Output)
427423

428-
// Background mode should return PID/log info, not the actual output
424+
// Background mode should also return the actual output since command completes quickly
429425
require.Equal(t, 0, backgroundResult.ExitCode)
430-
require.Contains(t, backgroundResult.Output, "Command started with PID:")
431-
require.Contains(t, backgroundResult.Output, "Log path: /tmp/mcp-bg/")
432-
require.NotContains(t, backgroundResult.Output, "hello world")
426+
require.Equal(t, "hello world", backgroundResult.Output)
433427
})
434428

435-
t.Run("BackgroundIgnoresTimeout", func(t *testing.T) {
429+
t.Run("BackgroundCommandContinuesAfterTimeout", func(t *testing.T) {
436430
t.Parallel()
437431

438432
client, workspace, agentToken := setupWorkspaceForAgent(t)
@@ -448,36 +442,35 @@ func TestWorkspaceBashBackgroundIntegration(t *testing.T) {
448442

449443
args := toolsdk.WorkspaceBashArgs{
450444
Workspace: workspace.Name,
451-
Command: `sleep 1 && echo "done" > /tmp/done`, // Command that would normally timeout
452-
TimeoutMs: 1, // 1 ms timeout (shorter than command duration)
453-
Background: true, // But running in background should ignore timeout
445+
Command: `echo "started" && sleep 4 && echo "done" > /tmp/bg-test-done`, // Command that will timeout but continue
446+
TimeoutMs: 2000, // 2000ms timeout (shorter than command duration)
447+
Background: true, // Run in background
454448
}
455449

456-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
450+
result, err := testTool(t, toolsdk.WorkspaceBash, deps, args)
457451

458-
// Should not error and should not timeout
452+
// Should not error but should timeout
459453
require.NoError(t, err)
460454

461455
t.Logf("Background with timeout result: exitCode=%d, output=%q", result.ExitCode, result.Output)
462456

463-
// Should have exit code 0 (background start successful)
464-
require.Equal(t, 0, result.ExitCode)
457+
// Should have timeout exit code
458+
require.Equal(t, 124, result.ExitCode)
465459

466-
// Should return PID/log info, indicating the background command started successfully
467-
require.Contains(t, result.Output, "Command started with PID:")
468-
require.Contains(t, result.Output, "Log path: /tmp/mcp-bg/")
460+
// Should capture output before timeout
461+
require.Contains(t, result.Output, "started", "Should contain output captured before timeout")
469462

470-
// Should NOT contain timeout message since background mode ignores timeout
471-
require.NotContains(t, result.Output, "Command canceled due to timeout")
463+
// Should contain background continuation message
464+
require.Contains(t, result.Output, "Command continues running in background")
472465

473-
// Wait for the background command to complete
466+
// Wait for the background command to complete (even though SSH session timed out)
474467
require.Eventually(t, func() bool {
475-
args := toolsdk.WorkspaceBashArgs{
468+
checkArgs := toolsdk.WorkspaceBashArgs{
476469
Workspace: workspace.Name,
477-
Command: `cat /tmp/done`,
470+
Command: `cat /tmp/bg-test-done 2>/dev/null || echo "not found"`,
478471
}
479-
result, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, args)
480-
return err == nil && result.Output == "done"
481-
}, testutil.WaitMedium, testutil.IntervalMedium)
472+
checkResult, err := toolsdk.WorkspaceBash.Handler(t.Context(), deps, checkArgs)
473+
return err == nil && checkResult.Output == "done"
474+
}, testutil.WaitMedium, testutil.IntervalMedium, "Background command should continue running and complete after timeout")
482475
})
483476
}

codersdk/toolsdk/resources/background.sh

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy