Skip to content

Commit 5715f44

Browse files
committed
fix: separate signals for passive, active, and forced shutdown
`SIGTERM`: Passive shutdown stopping provisioner daemons from accepting new jobs but waiting for existing jobs to successfully complete. `SIGINT` (old existing behavior): Notify provisioner daemons to cancel in-flight jobs, wait 5s for jobs to be exited, then force quit. `SIGKILL`: Untouched from before, will force-quit.
1 parent b2a5e2f commit 5715f44

File tree

11 files changed

+84
-30
lines changed

11 files changed

+84
-30
lines changed

cli/organization.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func (r *RootCmd) switchOrganization() *clibase.Cmd {
6060
conf := r.createConfig()
6161
orgs, err := client.OrganizationsByUser(inv.Context(), codersdk.Me)
6262
if err != nil {
63-
return fmt.Errorf("failed to get organizations: %w", err)
63+
return xerrors.Errorf("failed to get organizations: %w", err)
6464
}
6565
// Keep the list of orgs sorted
6666
slices.SortFunc(orgs, func(a, b codersdk.Organization) int {
@@ -84,7 +84,7 @@ func (r *RootCmd) switchOrganization() *clibase.Cmd {
8484
if switchToOrg == "" {
8585
err := conf.Organization().Delete()
8686
if err != nil && !errors.Is(err, os.ErrNotExist) {
87-
return fmt.Errorf("failed to unset organization: %w", err)
87+
return xerrors.Errorf("failed to unset organization: %w", err)
8888
}
8989
_, _ = fmt.Fprintf(inv.Stdout, "Organization unset\n")
9090
} else {
@@ -107,7 +107,7 @@ func (r *RootCmd) switchOrganization() *clibase.Cmd {
107107
// Always write the uuid to the config file. Names can change.
108108
err := conf.Organization().Write(orgs[index].ID.String())
109109
if err != nil {
110-
return fmt.Errorf("failed to write organization to config file: %w", err)
110+
return xerrors.Errorf("failed to write organization to config file: %w", err)
111111
}
112112
}
113113

@@ -123,7 +123,7 @@ func (r *RootCmd) switchOrganization() *clibase.Cmd {
123123
}
124124
return sdkError
125125
}
126-
return fmt.Errorf("failed to get current organization: %w", err)
126+
return xerrors.Errorf("failed to get current organization: %w", err)
127127
}
128128

129129
_, _ = fmt.Fprintf(inv.Stdout, "Current organization context set to %s (%s)\n", current.Name, current.ID.String())
@@ -213,7 +213,7 @@ func (r *RootCmd) currentOrganization() *clibase.Cmd {
213213
typed, ok := data.([]codersdk.Organization)
214214
if !ok {
215215
// This should never happen
216-
return "", fmt.Errorf("expected []Organization, got %T", data)
216+
return "", xerrors.Errorf("expected []Organization, got %T", data)
217217
}
218218
return stringFormat(typed)
219219
}),
@@ -250,7 +250,7 @@ func (r *RootCmd) currentOrganization() *clibase.Cmd {
250250
case "current":
251251
stringFormat = func(orgs []codersdk.Organization) (string, error) {
252252
if len(orgs) != 1 {
253-
return "", fmt.Errorf("expected 1 organization, got %d", len(orgs))
253+
return "", xerrors.Errorf("expected 1 organization, got %d", len(orgs))
254254
}
255255
return fmt.Sprintf("Current CLI Organization: %s (%s)\n", orgs[0].Name, orgs[0].ID.String()), nil
256256
}
@@ -275,7 +275,7 @@ func (r *RootCmd) currentOrganization() *clibase.Cmd {
275275
default:
276276
stringFormat = func(orgs []codersdk.Organization) (string, error) {
277277
if len(orgs) != 1 {
278-
return "", fmt.Errorf("expected 1 organization, got %d", len(orgs))
278+
return "", xerrors.Errorf("expected 1 organization, got %d", len(orgs))
279279
}
280280
return fmt.Sprintf("Organization: %s (%s)\n", orgs[0].Name, orgs[0].ID.String()), nil
281281
}

cli/organizationmanage.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func (r *RootCmd) createOrganization() *clibase.Cmd {
3434
// from creating it.
3535
existing, _ := client.OrganizationByName(inv.Context(), orgName)
3636
if existing.ID != uuid.Nil {
37-
return fmt.Errorf("organization %q already exists", orgName)
37+
return xerrors.Errorf("organization %q already exists", orgName)
3838
}
3939

4040
_, err := cliui.Prompt(inv, cliui.PromptOptions{
@@ -53,7 +53,7 @@ func (r *RootCmd) createOrganization() *clibase.Cmd {
5353
Name: orgName,
5454
})
5555
if err != nil {
56-
return fmt.Errorf("failed to create organization: %w", err)
56+
return xerrors.Errorf("failed to create organization: %w", err)
5757
}
5858

5959
_, _ = fmt.Fprintf(inv.Stdout, "Organization %s (%s) created.\n", organization.Name, organization.ID)

cli/root.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ func CurrentOrganization(r *RootCmd, inv *clibase.Invocation, client *codersdk.C
715715
if selected == "" && conf.Organization().Exists() {
716716
org, err := conf.Organization().Read()
717717
if err != nil {
718-
return codersdk.Organization{}, fmt.Errorf("read selected organization from config file %q: %w", conf.Organization(), err)
718+
return codersdk.Organization{}, xerrors.Errorf("read selected organization from config file %q: %w", conf.Organization(), err)
719719
}
720720
selected = org
721721
}

cli/server.go

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -337,16 +337,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
337337

338338
// Register signals early on so that graceful shutdown can't
339339
// be interrupted by additional signals. Note that we avoid
340-
// shadowing cancel() (from above) here because notifyStop()
340+
// shadowing cancel() (from above) here because stopCancel()
341341
// restores default behavior for the signals. This protects
342342
// the shutdown sequence from abruptly terminating things
343343
// like: database migrations, provisioner work, workspace
344344
// cleanup in dev-mode, etc.
345345
//
346346
// To get out of a graceful shutdown, the user can send
347347
// SIGQUIT with ctrl+\ or SIGKILL with `kill -9`.
348-
notifyCtx, notifyStop := inv.SignalNotifyContext(ctx, InterruptSignals...)
349-
defer notifyStop()
348+
stopCtx, stopCancel := signalNotifyContext(ctx, inv, StopSignals...)
349+
defer stopCancel()
350+
interruptCtx, interruptCancel := signalNotifyContext(ctx, inv, InterruptSignals...)
351+
defer interruptCancel()
350352

351353
cacheDir := vals.CacheDir.String()
352354
err = os.MkdirAll(cacheDir, 0o700)
@@ -1028,13 +1030,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10281030
hangDetector.Start()
10291031
defer hangDetector.Close()
10301032

1033+
graceful := false
10311034
// Currently there is no way to ask the server to shut
10321035
// itself down, so any exit signal will result in a non-zero
10331036
// exit of the server.
10341037
var exitErr error
10351038
select {
1036-
case <-notifyCtx.Done():
1037-
exitErr = notifyCtx.Err()
1039+
case <-stopCtx.Done():
1040+
exitErr = stopCtx.Err()
1041+
graceful = true
1042+
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit"))
1043+
case <-interruptCtx.Done():
1044+
exitErr = interruptCtx.Err()
10381045
_, _ = io.WriteString(inv.Stdout, cliui.Bold("Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit"))
10391046
case <-tunnelDone:
10401047
exitErr = xerrors.New("dev tunnel closed unexpectedly")
@@ -1082,7 +1089,16 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10821089
defer wg.Done()
10831090

10841091
r.Verbosef(inv, "Shutting down provisioner daemon %d...", id)
1085-
err := shutdownWithTimeout(provisionerDaemon.Shutdown, 5*time.Second)
1092+
timeout := 5 * time.Second
1093+
if graceful {
1094+
// It can last for a long time...
1095+
timeout = 30 * time.Minute
1096+
}
1097+
1098+
err := shutdownWithTimeout(func(ctx context.Context) error {
1099+
// We only want to cancel active jobs if we aren't exiting gracefully.
1100+
return provisionerDaemon.Shutdown(ctx, !graceful)
1101+
}, timeout)
10861102
if err != nil {
10871103
cliui.Errorf(inv.Stderr, "Failed to shut down provisioner daemon %d: %s\n", id, err)
10881104
return
@@ -2512,3 +2528,12 @@ func escapePostgresURLUserInfo(v string) (string, error) {
25122528

25132529
return v, nil
25142530
}
2531+
2532+
func signalNotifyContext(ctx context.Context, inv *clibase.Invocation, sig ...os.Signal) (context.Context, context.CancelFunc) {
2533+
// On Windows, some of our signal functions lack support.
2534+
// If we pass in no signals, we should just return the context as-is.
2535+
if len(sig) == 0 {
2536+
return context.WithCancel(ctx)
2537+
}
2538+
return inv.SignalNotifyContext(ctx, sig...)
2539+
}

cli/signal_unix.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,24 @@ import (
77
"syscall"
88
)
99

10+
// StopSignals are used to gracefully exit.
11+
// An example is exiting provisioner daemons but not canceling
12+
// jobs, allowing a successful and clean exit.
13+
var StopSignals = []os.Signal{
14+
syscall.SIGTERM,
15+
}
16+
17+
// InterruptSignals are used to less gracefully exit.
18+
// An example is canceling a job, waiting for a timeout,
19+
// and then exiting.
1020
var InterruptSignals = []os.Signal{
21+
// SIGINT
1122
os.Interrupt,
12-
syscall.SIGTERM,
1323
syscall.SIGHUP,
1424
}
25+
26+
// KillSignals will force exit.
27+
var KillSignals = []os.Signal{
28+
// SIGKILL
29+
os.Kill,
30+
}

cli/signal_windows.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,14 @@ import (
66
"os"
77
)
88

9-
var InterruptSignals = []os.Signal{os.Interrupt}
9+
// Check the UNIX file for the comments on the signals
10+
11+
var StopSignals = []os.Signal{}
12+
13+
var InterruptSignals = []os.Signal{
14+
os.Interrupt,
15+
}
16+
17+
var KillSignals = []os.Signal{
18+
os.Kill,
19+
}

coderd/coderdtest/coderdtest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ func (c *provisionerdCloser) Close() error {
498498
c.closed = true
499499
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
500500
defer cancel()
501-
shutdownErr := c.d.Shutdown(ctx)
501+
shutdownErr := c.d.Shutdown(ctx, true)
502502
closeErr := c.d.Close()
503503
if shutdownErr != nil {
504504
return shutdownErr

enterprise/cli/provisionerdaemons.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
225225
cliui.Errorf(inv.Stderr, "Unexpected error, shutting down server: %s\n", exitErr)
226226
}
227227

228-
err = srv.Shutdown(ctx)
228+
err = srv.Shutdown(ctx, false)
229229
if err != nil {
230230
return xerrors.Errorf("shutdown: %w", err)
231231
}

enterprise/coderd/provisionerdaemons_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ func TestProvisionerDaemonServe(t *testing.T) {
440440
build := coderdtest.AwaitWorkspaceBuildJobCompleted(t, client, workspace.LatestBuild.ID)
441441
require.Equal(t, codersdk.WorkspaceStatusRunning, build.Status)
442442

443-
err = pd.Shutdown(ctx)
443+
err = pd.Shutdown(ctx, false)
444444
require.NoError(t, err)
445445
err = terraformServer.Close()
446446
require.NoError(t, err)

provisionerd/provisionerd.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,18 @@ func (p *Server) isClosed() bool {
474474
}
475475
}
476476

477-
// Shutdown triggers a graceful exit of each registered provisioner.
478-
func (p *Server) Shutdown(ctx context.Context) error {
477+
// Shutdown gracefully exists with the option to cancel the active job.
478+
// If false, it will wait for the job to complete.
479+
//
480+
//nolint:revive
481+
func (p *Server) Shutdown(ctx context.Context, cancelActiveJob bool) error {
479482
p.mutex.Lock()
480483
p.opts.Logger.Info(ctx, "attempting graceful shutdown")
481484
if !p.shuttingDownB {
482485
close(p.shuttingDownCh)
483486
p.shuttingDownB = true
484487
}
485-
if p.activeJob != nil {
488+
if cancelActiveJob && p.activeJob != nil {
486489
p.activeJob.Cancel()
487490
}
488491
p.mutex.Unlock()

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy