From 06399d0b94e76f3b7f7412e779f81854b8062909 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 13 Jun 2023 00:29:45 +0000 Subject: [PATCH 1/9] chore: move autobuild/executor into autobuild --- cli/server.go | 4 +- coderd/autobuild/doc.go | 3 + .../{executor => }/lifecycle_executor.go | 134 +++++++++--------- .../{executor => }/lifecycle_executor_test.go | 36 ++--- coderd/autobuild/notify/notifier_test.go | 2 +- coderd/coderdtest/coderdtest.go | 6 +- 6 files changed, 96 insertions(+), 89 deletions(-) create mode 100644 coderd/autobuild/doc.go rename coderd/autobuild/{executor => }/lifecycle_executor.go (62%) rename coderd/autobuild/{executor => }/lifecycle_executor_test.go (97%) diff --git a/cli/server.go b/cli/server.go index e30bf5fd71fde..c77b252429fcd 100644 --- a/cli/server.go +++ b/cli/server.go @@ -62,7 +62,7 @@ import ( "github.com/coder/coder/cli/cliui" "github.com/coder/coder/cli/config" "github.com/coder/coder/coderd" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbfake" "github.com/coder/coder/coderd/database/dbmetrics" @@ -899,7 +899,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. autobuildPoller := time.NewTicker(cfg.AutobuildPollInterval.Value()) defer autobuildPoller.Stop() - autobuildExecutor := executor.New(ctx, options.Database, coderAPI.TemplateScheduleStore, logger, autobuildPoller.C) + autobuildExecutor := autobuild.NewExecutor(ctx, options.Database, coderAPI.TemplateScheduleStore, logger, autobuildPoller.C) autobuildExecutor.Run() // Currently there is no way to ask the server to shut diff --git a/coderd/autobuild/doc.go b/coderd/autobuild/doc.go new file mode 100644 index 0000000000000..2e6e7c5a0c287 --- /dev/null +++ b/coderd/autobuild/doc.go @@ -0,0 +1,3 @@ +// Package autobuild contains logic for scheduling workspace +// builds in the background. +package autobuild diff --git a/coderd/autobuild/executor/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go similarity index 62% rename from coderd/autobuild/executor/lifecycle_executor.go rename to coderd/autobuild/lifecycle_executor.go index 761293fc63ae5..6138e8ec58cf2 100644 --- a/coderd/autobuild/executor/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -1,4 +1,4 @@ -package executor +package autobuild import ( "context" @@ -35,8 +35,8 @@ type Stats struct { Error error } -// New returns a new autobuild executor. -func New(ctx context.Context, db database.Store, tss *atomic.Pointer[schedule.TemplateScheduleStore], log slog.Logger, tick <-chan time.Time) *Executor { +// New returns a new wsactions executor. +func NewExecutor(ctx context.Context, db database.Store, tss *atomic.Pointer[schedule.TemplateScheduleStore], log slog.Logger, tick <-chan time.Time) *Executor { le := &Executor{ //nolint:gocritic // Autostart has a limited set of permissions. ctx: dbauthz.AsAutostart(ctx), @@ -125,77 +125,57 @@ func (e *Executor) runOnce(t time.Time) Stats { log := e.log.With(slog.F("workspace_id", wsID)) eg.Go(func() error { - err := e.db.InTx(func(db database.Store) error { + err := e.db.InTx(func(tx database.Store) error { // Re-check eligibility since the first check was outside the // transaction and the workspace settings may have changed. - ws, err := db.GetWorkspaceByID(e.ctx, wsID) + ws, err := tx.GetWorkspaceByID(e.ctx, wsID) if err != nil { log.Error(e.ctx, "get workspace autostart failed", slog.Error(err)) return nil } // Determine the workspace state based on its latest build. - priorHistory, err := db.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) + latestBuild, err := tx.GetLatestWorkspaceBuildByWorkspaceID(e.ctx, ws.ID) if err != nil { log.Warn(e.ctx, "get latest workspace build", slog.Error(err)) return nil } - templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, db, ws.TemplateID) + templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, tx, ws.TemplateID) if err != nil { log.Warn(e.ctx, "get template schedule options", slog.Error(err)) return nil } - if !isEligibleForAutoStartStop(ws, priorHistory, templateSchedule) { - return nil - } - - priorJob, err := db.GetProvisionerJobByID(e.ctx, priorHistory.JobID) + latestJob, err := tx.GetProvisionerJobByID(e.ctx, latestBuild.JobID) if err != nil { log.Warn(e.ctx, "get last provisioner job for workspace %q: %w", slog.Error(err)) return nil } - validTransition, nextTransition, err := getNextTransition(ws, priorHistory, priorJob) + nextTransition, reason, err := getNextTransition(ws, latestBuild, latestJob, templateSchedule, currentTick) if err != nil { log.Debug(e.ctx, "skipping workspace", slog.Error(err)) return nil } - if currentTick.Before(nextTransition) { - log.Debug(e.ctx, "skipping workspace: too early", - slog.F("next_transition_at", nextTransition), - slog.F("transition", validTransition), - slog.F("current_tick", currentTick), - ) - return nil - } - builder := wsbuilder.New(ws, validTransition). - SetLastWorkspaceBuildInTx(&priorHistory). - SetLastWorkspaceBuildJobInTx(&priorJob) - - switch validTransition { - case database.WorkspaceTransitionStart: - builder = builder.Reason(database.BuildReasonAutostart) - case database.WorkspaceTransitionStop: - builder = builder.Reason(database.BuildReasonAutostop) - default: - log.Error(e.ctx, "unsupported transition", slog.F("transition", validTransition)) - return nil - } - if _, _, err := builder.Build(e.ctx, db, nil); err != nil { + builder := wsbuilder.New(ws, nextTransition). + SetLastWorkspaceBuildInTx(&latestBuild). + SetLastWorkspaceBuildJobInTx(&latestJob). + Reason(reason) + + if _, _, err := builder.Build(e.ctx, tx, nil); err != nil { log.Error(e.ctx, "unable to transition workspace", - slog.F("transition", validTransition), + slog.F("transition", nextTransition), slog.Error(err), ) return nil } statsMu.Lock() - stats.Transitions[ws.ID] = validTransition + stats.Transitions[ws.ID] = nextTransition statsMu.Unlock() - log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", validTransition)) + log.Info(e.ctx, "scheduling workspace transition", slog.F("transition", nextTransition)) return nil @@ -218,7 +198,9 @@ func (e *Executor) runOnce(t time.Time) Stats { return stats } -func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { +// isEligibleForTransition returns true if the workspace meets basic criteria +// for transitioning to a new state. +func isEligibleForTransition(ws database.Workspace, latestBuild database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { if ws.Deleted { return false } @@ -227,7 +209,7 @@ func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.Wor } // Don't check the template schedule to see whether it allows autostop, this // is done during the build when determining the deadline. - if priorHistory.Transition == database.WorkspaceTransitionStart && !priorHistory.Deadline.IsZero() { + if latestBuild.Transition == database.WorkspaceTransitionStart && !latestBuild.Deadline.IsZero() { return true } @@ -236,35 +218,57 @@ func isEligibleForAutoStartStop(ws database.Workspace, priorHistory database.Wor func getNextTransition( ws database.Workspace, - priorHistory database.WorkspaceBuild, - priorJob database.ProvisionerJob, + latestBuild database.WorkspaceBuild, + latestJob database.ProvisionerJob, + templateSchedule schedule.TemplateScheduleOptions, + currentTick time.Time, ) ( - validTransition database.WorkspaceTransition, - nextTransition time.Time, - err error, + database.WorkspaceTransition, + database.BuildReason, + error, ) { - if !priorJob.CompletedAt.Valid || priorJob.Error.String != "" { - return "", time.Time{}, xerrors.Errorf("last workspace build did not complete successfully") + if !isEligibleForTransition(ws, latestBuild, templateSchedule) { + return "", "", xerrors.Errorf("workspace ineligible for transition") } - switch priorHistory.Transition { - case database.WorkspaceTransitionStart: - if priorHistory.Deadline.IsZero() { - return "", time.Time{}, xerrors.Errorf("latest workspace build has zero deadline") - } - // For stopping, do not truncate. This is inconsistent with autostart, but - // it ensures we will not stop too early. - return database.WorkspaceTransitionStop, priorHistory.Deadline, nil - case database.WorkspaceTransitionStop: - sched, err := schedule.Weekly(ws.AutostartSchedule.String) - if err != nil { - return "", time.Time{}, xerrors.Errorf("workspace has invalid autostart schedule: %w", err) - } - // Round down to the nearest minute, as this is the finest granularity cron supports. - // Truncate is probably not necessary here, but doing it anyway to be sure. - nextTransition = sched.Next(priorHistory.CreatedAt).Truncate(time.Minute) - return database.WorkspaceTransitionStart, nextTransition, nil + if !latestJob.CompletedAt.Valid || latestJob.Error.String != "" { + return "", "", xerrors.Errorf("last workspace build did not complete successfully") + } + + switch { + case isEligibleForAutostop(latestBuild, currentTick): + return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil + case isEligibleForAutostart(ws, latestBuild, currentTick): + return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil default: - return "", time.Time{}, xerrors.Errorf("last transition not valid for autostart or autostop") + return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") + } +} + +// isEligibleForAutostart returns true if the workspace should be autostarted. +func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, currentTick time.Time) bool { + // If the last transition for the workspace was not 'stop' then the workspace + // cannot be started. + if build.Transition != database.WorkspaceTransitionStop { + return false + } + + sched, err := schedule.Weekly(ws.AutostartSchedule.String) + if err != nil { + return false } + // Round down to the nearest minute, as this is the finest granularity cron supports. + // Truncate is probably not necessary here, but doing it anyway to be sure. + nextTransition := sched.Next(build.CreatedAt).Truncate(time.Minute) + + return !currentTick.Before(nextTransition) +} + +// isEligibleForAutostart returns true if the workspace should be autostopped. +func isEligibleForAutostop(build database.WorkspaceBuild, currentTick time.Time) bool { + // A workspace must be started in order for it to be auto-stopped. + return build.Transition == database.WorkspaceTransitionStart && + !build.Deadline.IsZero() && + // We do not want to stop a workspace prior to it breaching its deadline. + !currentTick.Before(build.Deadline) } diff --git a/coderd/autobuild/executor/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go similarity index 97% rename from coderd/autobuild/executor/lifecycle_executor_test.go rename to coderd/autobuild/lifecycle_executor_test.go index e942ff490c5a6..0be450528fa5d 100644 --- a/coderd/autobuild/executor/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -1,4 +1,4 @@ -package executor_test +package autobuild_test import ( "context" @@ -11,7 +11,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/goleak" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/schedule" @@ -27,7 +27,7 @@ func TestExecutorAutostartOK(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -66,7 +66,7 @@ func TestExecutorAutostartTemplateUpdated(t *testing.T) { ctx = context.Background() err error tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -113,7 +113,7 @@ func TestExecutorAutostartAlreadyRunning(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -145,7 +145,7 @@ func TestExecutorAutostartNotEnabled(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -180,7 +180,7 @@ func TestExecutorAutostopOK(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -216,7 +216,7 @@ func TestExecutorAutostopExtend(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -266,7 +266,7 @@ func TestExecutorAutostopAlreadyStopped(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -299,7 +299,7 @@ func TestExecutorAutostopNotEnabled(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -341,7 +341,7 @@ func TestExecutorWorkspaceDeleted(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -374,7 +374,7 @@ func TestExecutorWorkspaceAutostartTooEarly(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -405,7 +405,7 @@ func TestExecutorWorkspaceAutostopBeforeDeadline(t *testing.T) { var ( tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -433,7 +433,7 @@ func TestExecutorWorkspaceAutostopNoWaitChangedMyMind(t *testing.T) { var ( ctx = context.Background() tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -501,8 +501,8 @@ func TestExecutorAutostartMultipleOK(t *testing.T) { sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) tickCh2 = make(chan time.Time) - statsCh1 = make(chan executor.Stats) - statsCh2 = make(chan executor.Stats) + statsCh1 = make(chan autobuild.Stats) + statsCh2 = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -556,7 +556,7 @@ func TestExecutorAutostartWithParameters(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, IncludeProvisionerDaemon: true, @@ -609,7 +609,7 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { var ( sched = mustSchedule(t, "CRON_TZ=UTC 0 * * * *") tickCh = make(chan time.Time) - statsCh = make(chan executor.Stats) + statsCh = make(chan autobuild.Stats) client = coderdtest.New(t, &coderdtest.Options{ AutobuildTicker: tickCh, diff --git a/coderd/autobuild/notify/notifier_test.go b/coderd/autobuild/notify/notifier_test.go index fb547defd5b36..e241e19f2a7a4 100644 --- a/coderd/autobuild/notify/notifier_test.go +++ b/coderd/autobuild/notify/notifier_test.go @@ -9,7 +9,7 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" - "github.com/coder/coder/coderd/autobuild/notify" + "github.com/coder/coder/coderd/wsactions/notify" ) func TestNotifier(t *testing.T) { diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index 5d03b49016f28..a28a52cda0d4e 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -54,7 +54,7 @@ import ( "cdr.dev/slog/sloggers/slogtest" "github.com/coder/coder/coderd" "github.com/coder/coder/coderd/audit" - "github.com/coder/coder/coderd/autobuild/executor" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/awsidentity" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/database/dbauthz" @@ -102,7 +102,7 @@ type Options struct { GoogleTokenValidator *idtoken.Validator SSHKeygenAlgorithm gitsshkey.Algorithm AutobuildTicker <-chan time.Time - AutobuildStats chan<- executor.Stats + AutobuildStats chan<- autobuild.Stats Auditor audit.Auditor TLSCertificates []tls.Certificate GitAuthConfigs []*gitauth.Config @@ -244,7 +244,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can templateScheduleStore.Store(&options.TemplateScheduleStore) ctx, cancelFunc := context.WithCancel(context.Background()) - lifecycleExecutor := executor.New( + lifecycleExecutor := autobuild.NewExecutor( ctx, options.Database, &templateScheduleStore, From 315d869004afa17640f847823dd7b20a6287b557 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 13 Jun 2023 02:41:04 +0000 Subject: [PATCH 2/9] feat: add support for stopping failed workspaces --- coderd/autobuild/lifecycle_executor.go | 71 +++--- coderd/autobuild/lifecycle_executor_test.go | 253 ++++++++++++++++++++ coderd/autobuild/notify/notifier_test.go | 2 +- coderd/coderdtest/coderdtest.go | 12 +- coderd/database/dbauthz/dbauthz.go | 4 +- coderd/database/dbfake/dbfake.go | 2 +- coderd/database/dbmetrics/dbmetrics.go | 4 +- coderd/database/dbmock/dbmock.go | 12 +- coderd/database/querier.go | 2 +- coderd/database/queries.sql.go | 17 +- coderd/database/queries/workspaces.sql | 13 +- coderd/wsbuilder/wsbuilder.go | 30 ++- provisioner/echo/serve.go | 29 +-- 13 files changed, 368 insertions(+), 83 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 6138e8ec58cf2..8010a7f854185 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -13,9 +13,11 @@ import ( "cdr.dev/slog" "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/db2sdk" "github.com/coder/coder/coderd/database/dbauthz" "github.com/coder/coder/coderd/schedule" "github.com/coder/coder/coderd/wsbuilder" + "github.com/coder/coder/codersdk" ) // Executor automatically starts or stops workspaces. @@ -108,7 +110,7 @@ func (e *Executor) runOnce(t time.Time) Stats { // NOTE: If a workspace build is created with a given TTL and then the user either // changes or unsets the TTL, the deadline for the workspace build will not // have changed. This behavior is as expected per #2229. - workspaces, err := e.db.GetWorkspacesEligibleForAutoStartStop(e.ctx, t) + workspaces, err := e.db.GetWorkspacesEligibleForTransition(e.ctx, t) if err != nil { e.log.Error(e.ctx, "get workspaces for autostart or autostop", slog.Error(err)) return stats @@ -198,24 +200,6 @@ func (e *Executor) runOnce(t time.Time) Stats { return stats } -// isEligibleForTransition returns true if the workspace meets basic criteria -// for transitioning to a new state. -func isEligibleForTransition(ws database.Workspace, latestBuild database.WorkspaceBuild, templateSchedule schedule.TemplateScheduleOptions) bool { - if ws.Deleted { - return false - } - if templateSchedule.UserAutostartEnabled && ws.AutostartSchedule.Valid && ws.AutostartSchedule.String != "" { - return true - } - // Don't check the template schedule to see whether it allows autostop, this - // is done during the build when determining the deadline. - if latestBuild.Transition == database.WorkspaceTransitionStart && !latestBuild.Deadline.IsZero() { - return true - } - - return false -} - func getNextTransition( ws database.Workspace, latestBuild database.WorkspaceBuild, @@ -227,32 +211,37 @@ func getNextTransition( database.BuildReason, error, ) { - if !isEligibleForTransition(ws, latestBuild, templateSchedule) { - return "", "", xerrors.Errorf("workspace ineligible for transition") - } - - if !latestJob.CompletedAt.Valid || latestJob.Error.String != "" { - return "", "", xerrors.Errorf("last workspace build did not complete successfully") - } - switch { - case isEligibleForAutostop(latestBuild, currentTick): + case isEligibleForAutostop(latestBuild, latestJob, currentTick): return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil - case isEligibleForAutostart(ws, latestBuild, currentTick): + case isEligibleForAutostart(ws, latestBuild, latestJob, templateSchedule, currentTick): return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil + case isEligibleForFailedStop(latestJob, templateSchedule): + return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil default: return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") } } // isEligibleForAutostart returns true if the workspace should be autostarted. -func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, currentTick time.Time) bool { +func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions, currentTick time.Time) bool { + // Don't attempt to autostart failed workspaces. + if !job.CompletedAt.Valid || job.Error.String != "" { + return false + } + // If the last transition for the workspace was not 'stop' then the workspace // cannot be started. if build.Transition != database.WorkspaceTransitionStop { return false } + // If autostart isn't enabled, or the schedule isn't valid/populated we can't + // autostart the workspace. + if !templateSchedule.UserAutostartEnabled || !ws.AutostartSchedule.Valid || ws.AutostartSchedule.String == "" { + return false + } + sched, err := schedule.Weekly(ws.AutostartSchedule.String) if err != nil { return false @@ -265,10 +254,30 @@ func isEligibleForAutostart(ws database.Workspace, build database.WorkspaceBuild } // isEligibleForAutostart returns true if the workspace should be autostopped. -func isEligibleForAutostop(build database.WorkspaceBuild, currentTick time.Time) bool { +func isEligibleForAutostop(build database.WorkspaceBuild, job database.ProvisionerJob, currentTick time.Time) bool { + // Don't attempt to autostop failed workspaces. + if !job.CompletedAt.Valid || job.Error.String != "" { + return false + } + // A workspace must be started in order for it to be auto-stopped. return build.Transition == database.WorkspaceTransitionStart && !build.Deadline.IsZero() && // We do not want to stop a workspace prior to it breaching its deadline. !currentTick.Before(build.Deadline) } + +// isEligibleForFailedStop returns true if the workspace is eligible to be stopped +// due to a failed build. +func isEligibleForFailedStop(job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { + // If the template has specified a failure TLL. + return templateSchedule.FailureTTL > 0 && + // And the job resulted in failure. + db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && + // And sufficient time has elapsed since the job has completed. + (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || + // Or sufficient time has elapsed since the job was canceled. + job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL || + // Or the job is stuck/abandoned. + database.Now().Sub(job.UpdatedAt) > 30*time.Second) +} diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 0be450528fa5d..b485c85fa7659 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -11,6 +11,8 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/goleak" + "cdr.dev/slog/sloggers/slogtest" + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" @@ -19,6 +21,7 @@ import ( "github.com/coder/coder/codersdk" "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/provisionersdk/proto" + "github.com/coder/coder/testutil" ) func TestExecutorAutostartOK(t *testing.T) { @@ -648,6 +651,191 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { assert.Len(t, stats.Transitions, 0) } +// TesetExecutorFailedWorkspace tests that failed workspaces that breach +// their template failed_ttl threshold trigger a stop job. +func TestExecutorFailedWorkspace(t *testing.T) { + t.Parallel() + + t.Run("AGPLOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: schedule.NewAGPLTemplateScheduleStore(), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since we're using AGPL. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("EnterpriseOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect workspace to transition to stopped state for breaching + // failure TTL. + require.Len(t, stats.Transitions, 1) + require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) + }) + + t.Run("TooEarly", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Minute + + client = coderdtest.New(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since not enough time has elapsed. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("StuckJob", func(t *testing.T) { + t.Parallel() + + var ( + ctx = testutil.Context(t, testutil.WaitMedium) + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Hour + client, _, api = coderdtest.NewWithAPI(t, &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), + }) + ) + user := coderdtest.CreateFirstUser(t, client) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + + // Emulate a stuck job. + err := api.Database.UpdateProvisionerJobByID(ctx, database.UpdateProvisionerJobByIDParams{ + ID: build.Job.ID, + UpdatedAt: database.Now().Add(-time.Hour), + }) + require.NoError(t, err) + ticker <- time.Now() + stats := <-statCh + // Expect a transition since we should detect that the job stopped + // updating. + require.Len(t, stats.Transitions, 1) + }) +} + func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { t.Helper() user := coderdtest.CreateFirstUser(t, client) @@ -705,3 +893,68 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } + +func mockEnterpriseTemplateScheduler(t *testing.T) schedule.TemplateScheduleStore { + return schedule.MockTemplateScheduleStore{ + GetFn: func(ctx context.Context, store database.Store, id uuid.UUID) (schedule.TemplateScheduleOptions, error) { + t.Helper() + + template, err := store.GetTemplateByID(ctx, id) + require.NoError(t, err) + + return schedule.TemplateScheduleOptions{ + UserAutostartEnabled: template.AllowUserAutostart, + UserAutostopEnabled: template.AllowUserAutostop, + DefaultTTL: time.Duration(template.DefaultTTL), + MaxTTL: time.Duration(template.MaxTTL), + FailureTTL: time.Duration(template.FailureTTL), + }, nil + }, + SetFn: func(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { + t.Helper() + + if int64(opts.DefaultTTL) == tpl.DefaultTTL && + int64(opts.MaxTTL) == tpl.MaxTTL && + int64(opts.FailureTTL) == tpl.FailureTTL && + int64(opts.InactivityTTL) == tpl.InactivityTTL && + int64(opts.LockedTTL) == tpl.LockedTTL && + opts.UserAutostartEnabled == tpl.AllowUserAutostart && + opts.UserAutostopEnabled == tpl.AllowUserAutostop { + // Avoid updating the UpdatedAt timestamp if nothing will be changed. + return tpl, nil + } + + template, err := db.UpdateTemplateScheduleByID(ctx, database.UpdateTemplateScheduleByIDParams{ + ID: tpl.ID, + UpdatedAt: database.Now(), + AllowUserAutostart: opts.UserAutostartEnabled, + AllowUserAutostop: opts.UserAutostopEnabled, + DefaultTTL: int64(opts.DefaultTTL), + MaxTTL: int64(opts.MaxTTL), + FailureTTL: int64(opts.FailureTTL), + InactivityTTL: int64(opts.InactivityTTL), + LockedTTL: int64(opts.LockedTTL), + }) + require.NoError(t, err) + // Update all workspaces using the template to set the user defined schedule + // to be within the new bounds. This essentially does the following for each + // workspace using the template. + // if (template.ttl != NULL) { + // workspace.ttl = min(workspace.ttl, template.ttl) + // } + // + // NOTE: this does not apply to currently running workspaces as their + // schedule information is committed to the workspace_build during start. + // This limitation is displayed to the user while editing the template. + if opts.MaxTTL > 0 { + err = db.UpdateWorkspaceTTLToBeWithinTemplateMax(ctx, database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams{ + TemplateID: template.ID, + TemplateMaxTTL: int64(opts.MaxTTL), + }) + require.NoError(t, err) + } + + return template, nil + }, + } +} diff --git a/coderd/autobuild/notify/notifier_test.go b/coderd/autobuild/notify/notifier_test.go index e241e19f2a7a4..fb547defd5b36 100644 --- a/coderd/autobuild/notify/notifier_test.go +++ b/coderd/autobuild/notify/notifier_test.go @@ -9,7 +9,7 @@ import ( "go.uber.org/atomic" "go.uber.org/goleak" - "github.com/coder/coder/coderd/wsactions/notify" + "github.com/coder/coder/coderd/autobuild/notify" ) func TestNotifier(t *testing.T) { diff --git a/coderd/coderdtest/coderdtest.go b/coderd/coderdtest/coderdtest.go index a28a52cda0d4e..b355c7e08c199 100644 --- a/coderd/coderdtest/coderdtest.go +++ b/coderd/coderdtest/coderdtest.go @@ -136,6 +136,9 @@ type Options struct { ConfigSSH codersdk.SSHConfigResponse SwaggerEndpoint bool + // Logger should only be overridden if you expect errors + // as part of your test. + Logger *slog.Logger } // New constructs a codersdk client connected to an in-memory API instance. @@ -310,6 +313,11 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can require.NoError(t, err) } + if options.Logger == nil { + logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug) + options.Logger = &logger + } + return func(h http.Handler) { mutex.Lock() defer mutex.Unlock() @@ -322,7 +330,7 @@ func NewOptions(t testing.TB, options *Options) (func(http.Handler), context.Can AccessURL: accessURL, AppHostname: options.AppHostname, AppHostnameRegex: appHostnameRegex, - Logger: slogtest.Make(t, nil).Leveled(slog.LevelDebug), + Logger: *options.Logger, CacheDir: t.TempDir(), Database: options.Database, Pubsub: options.Pubsub, @@ -427,7 +435,7 @@ func NewProvisionerDaemon(t testing.TB, coderAPI *coderd.API) io.Closer { return coderAPI.CreateInMemoryProvisionerDaemon(ctx, 0) }, &provisionerd.Options{ Filesystem: fs, - Logger: slogtest.Make(t, nil).Named("provisionerd").Leveled(slog.LevelDebug), + Logger: coderAPI.Logger.Named("provisionerd").Leveled(slog.LevelDebug), JobPollInterval: 50 * time.Millisecond, UpdateInterval: 250 * time.Millisecond, ForceCancelInterval: time.Second, diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index d9e82c5b6e3e4..4211204f1917d 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1682,8 +1682,8 @@ func (q *querier) GetWorkspaces(ctx context.Context, arg database.GetWorkspacesP return q.db.GetAuthorizedWorkspaces(ctx, arg, prep) } -func (q *querier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { - return q.db.GetWorkspacesEligibleForAutoStartStop(ctx, now) +func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { + return q.db.GetWorkspacesEligibleForTransition(ctx, now) } func (q *querier) InsertAPIKey(ctx context.Context, arg database.InsertAPIKeyParams) (database.APIKey, error) { diff --git a/coderd/database/dbfake/dbfake.go b/coderd/database/dbfake/dbfake.go index da623da8620f9..ad274c54372ca 100644 --- a/coderd/database/dbfake/dbfake.go +++ b/coderd/database/dbfake/dbfake.go @@ -3404,7 +3404,7 @@ func (q *fakeQuerier) GetWorkspaces(ctx context.Context, arg database.GetWorkspa return workspaceRows, err } -func (q *fakeQuerier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { +func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { q.mutex.RLock() defer q.mutex.RUnlock() diff --git a/coderd/database/dbmetrics/dbmetrics.go b/coderd/database/dbmetrics/dbmetrics.go index 71171ed6ff630..71ab27bc1d69c 100644 --- a/coderd/database/dbmetrics/dbmetrics.go +++ b/coderd/database/dbmetrics/dbmetrics.go @@ -962,9 +962,9 @@ func (m metricsStore) GetWorkspaces(ctx context.Context, arg database.GetWorkspa return workspaces, err } -func (m metricsStore) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]database.Workspace, error) { +func (m metricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.Workspace, error) { start := time.Now() - workspaces, err := m.s.GetWorkspacesEligibleForAutoStartStop(ctx, now) + workspaces, err := m.s.GetWorkspacesEligibleForTransition(ctx, now) m.queryLatencies.WithLabelValues("GetWorkspacesEligibleForAutoStartStop").Observe(time.Since(start).Seconds()) return workspaces, err } diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 610795142ba07..3e5cc26e3f4e0 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -1933,19 +1933,19 @@ func (mr *MockStoreMockRecorder) GetWorkspaces(arg0, arg1 interface{}) *gomock.C return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaces", reflect.TypeOf((*MockStore)(nil).GetWorkspaces), arg0, arg1) } -// GetWorkspacesEligibleForAutoStartStop mocks base method. -func (m *MockStore) GetWorkspacesEligibleForAutoStartStop(arg0 context.Context, arg1 time.Time) ([]database.Workspace, error) { +// GetWorkspacesEligibleForTransition mocks base method. +func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.Workspace, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetWorkspacesEligibleForAutoStartStop", arg0, arg1) + ret := m.ctrl.Call(m, "GetWorkspacesEligibleForTransition", arg0, arg1) ret0, _ := ret[0].([]database.Workspace) ret1, _ := ret[1].(error) return ret0, ret1 } -// GetWorkspacesEligibleForAutoStartStop indicates an expected call of GetWorkspacesEligibleForAutoStartStop. -func (mr *MockStoreMockRecorder) GetWorkspacesEligibleForAutoStartStop(arg0, arg1 interface{}) *gomock.Call { +// GetWorkspacesEligibleForTransition indicates an expected call of GetWorkspacesEligibleForTransition. +func (mr *MockStoreMockRecorder) GetWorkspacesEligibleForTransition(arg0, arg1 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspacesEligibleForAutoStartStop", reflect.TypeOf((*MockStore)(nil).GetWorkspacesEligibleForAutoStartStop), arg0, arg1) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspacesEligibleForTransition", reflect.TypeOf((*MockStore)(nil).GetWorkspacesEligibleForTransition), arg0, arg1) } // InTx mocks base method. diff --git a/coderd/database/querier.go b/coderd/database/querier.go index f2e13580040a8..acd434adb465b 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -166,7 +166,7 @@ type sqlcQuerier interface { GetWorkspaceResourcesByJobIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceResource, error) GetWorkspaceResourcesCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceResource, error) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]GetWorkspacesRow, error) - GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]Workspace, error) + GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]Workspace, error) InsertAPIKey(ctx context.Context, arg InsertAPIKeyParams) (APIKey, error) // We use the organization_id as the id // for simplicity since all users is diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 3f6baeb2c50a4..7301c208c22e8 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8272,13 +8272,15 @@ func (q *sqlQuerier) GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) return items, nil } -const getWorkspacesEligibleForAutoStartStop = `-- name: GetWorkspacesEligibleForAutoStartStop :many +const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at FROM workspaces LEFT JOIN workspace_builds ON workspace_builds.workspace_id = workspaces.id +INNER JOIN + provisioner_jobs ON workspace_builds.job_id = provisioner_jobs.id WHERE workspace_builds.build_number = ( SELECT @@ -8308,12 +8310,19 @@ WHERE ( workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL + ) OR + + -- If the workspace's most recent job resulted in an error + -- it may be eligible for failed stop. + ( + provisioner_jobs.error IS NOT NULL AND + provisioner_jobs.error != '' ) - ) + ) AND workspaces.deleted = 'false' ` -func (q *sqlQuerier) GetWorkspacesEligibleForAutoStartStop(ctx context.Context, now time.Time) ([]Workspace, error) { - rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForAutoStartStop, now) +func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]Workspace, error) { + rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForTransition, now) if err != nil { return nil, err } diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index f5d7c35cfd929..98cfcf0bcab25 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -405,13 +405,15 @@ SELECT stopped_workspaces.count AS stopped_workspaces FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspaces, stopped_workspaces; --- name: GetWorkspacesEligibleForAutoStartStop :many +-- name: GetWorkspacesEligibleForTransition :many SELECT workspaces.* FROM workspaces LEFT JOIN workspace_builds ON workspace_builds.workspace_id = workspaces.id +INNER JOIN + provisioner_jobs ON workspace_builds.job_id = provisioner_jobs.id WHERE workspace_builds.build_number = ( SELECT @@ -441,5 +443,12 @@ WHERE ( workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL + ) OR + + -- If the workspace's most recent job resulted in an error + -- it may be eligible for failed stop. + ( + provisioner_jobs.error IS NOT NULL AND + provisioner_jobs.error != '' ) - ); + ) AND workspaces.deleted = 'false'; diff --git a/coderd/wsbuilder/wsbuilder.go b/coderd/wsbuilder/wsbuilder.go index d9288485fdd1e..e6980788cb9e8 100644 --- a/coderd/wsbuilder/wsbuilder.go +++ b/coderd/wsbuilder/wsbuilder.go @@ -617,11 +617,12 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object case database.WorkspaceTransitionStart, database.WorkspaceTransitionStop: action = rbac.ActionUpdate default: - return BuildError{http.StatusBadRequest, fmt.Sprintf("Transition %q not supported.", b.trans), xerrors.New("")} + msg := fmt.Sprintf("Transition %q not supported.", b.trans) + return BuildError{http.StatusBadRequest, msg, xerrors.New(msg)} } if !authFunc(action, b.workspace) { // We use the same wording as the httpapi to avoid leaking the existence of the workspace - return BuildError{http.StatusNotFound, httpapi.ResourceNotFoundResponse.Message, xerrors.New("")} + return BuildError{http.StatusNotFound, httpapi.ResourceNotFoundResponse.Message, xerrors.New(httpapi.ResourceNotFoundResponse.Message)} } template, err := b.getTemplate() @@ -633,7 +634,7 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object // cloud state. if b.state.explicit != nil || b.state.orphan { if !authFunc(rbac.ActionUpdate, template.RBACObject()) { - return BuildError{http.StatusForbidden, "Only template managers may provide custom state", xerrors.New("")} + return BuildError{http.StatusForbidden, "Only template managers may provide custom state", xerrors.New("Only template managers may provide custom state")} } } @@ -641,7 +642,7 @@ func (b *Builder) authorize(authFunc func(action rbac.Action, object rbac.Object return BuildError{ http.StatusBadRequest, "Workspace builds with a custom log level are restricted to template authors only.", - xerrors.New(""), + xerrors.New("Workspace builds with a custom log level are restricted to template authors only."), } } return nil @@ -686,22 +687,26 @@ func (b *Builder) checkTemplateJobStatus() error { templateVersionJobStatus := db2sdk.ProvisionerJobStatus(*templateVersionJob) switch templateVersionJobStatus { case codersdk.ProvisionerJobPending, codersdk.ProvisionerJobRunning: + msg := fmt.Sprintf("The provided template version is %s. Wait for it to complete importing!", templateVersionJobStatus) + return BuildError{ http.StatusNotAcceptable, - fmt.Sprintf("The provided template version is %s. Wait for it to complete importing!", templateVersionJobStatus), - xerrors.New(""), + msg, + xerrors.New(msg), } case codersdk.ProvisionerJobFailed: + msg := fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String) return BuildError{ http.StatusBadRequest, - fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String), - xerrors.New(""), + msg, + xerrors.New(msg), } case codersdk.ProvisionerJobCanceled: + msg := fmt.Sprintf("The provided template version %q has failed to import: %q. You cannot build workspaces with it!", templateVersion.Name, templateVersionJob.Error.String) return BuildError{ http.StatusBadRequest, - "The provided template version was canceled during import. You cannot build workspaces with it!", - xerrors.New(""), + msg, + xerrors.New(msg), } } return nil @@ -717,10 +722,11 @@ func (b *Builder) checkRunningBuild() error { return BuildError{http.StatusInternalServerError, "failed to fetch prior build", err} } if db2sdk.ProvisionerJobStatus(*job).Active() { + msg := "A workspace build is already active." return BuildError{ http.StatusConflict, - "A workspace build is already active.", - xerrors.New(""), + msg, + xerrors.New(msg), } } return nil diff --git a/provisioner/echo/serve.go b/provisioner/echo/serve.go index 0da18a1e6392d..6df498a9af495 100644 --- a/provisioner/echo/serve.go +++ b/provisioner/echo/serve.go @@ -18,21 +18,6 @@ import ( "github.com/coder/coder/provisionersdk/proto" ) -const ( - ParameterExecKey = "echo.exec" - - errorKey = "error" - successKey = "success" -) - -func ParameterError(s string) string { - return formatExecValue(errorKey, s) -} - -func ParameterSucceed() string { - return formatExecValue(successKey, "") -} - // ProvisionApplyWithAgent returns provision responses that will mock a fake // "aws_instance" resource with an agent that has the given auth token. func ProvisionApplyWithAgent(authToken string) []*proto.Provision_Response { @@ -55,10 +40,6 @@ func ProvisionApplyWithAgent(authToken string) []*proto.Provision_Response { }} } -func formatExecValue(key, value string) string { - return fmt.Sprintf("%s=%s", key, value) -} - var ( // ParseComplete is a helper to indicate an empty parse completion. ParseComplete = []*proto.Parse_Response{{ @@ -72,6 +53,16 @@ var ( Complete: &proto.Provision_Complete{}, }, }} + + // ProvisionFailed is a helper to convey a failed provision + // operation. + ProvisionFailed = []*proto.Provision_Response{{ + Type: &proto.Provision_Response_Complete{ + Complete: &proto.Provision_Complete{ + Error: "failed!", + }, + }, + }} ) // Serve starts the echo provisioner. From 190c4bcd9667a51f8baeb3ddc102ec9a0d529317 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Tue, 20 Jun 2023 20:23:02 +0000 Subject: [PATCH 3/9] fix fake --- coderd/database/dbfake/dbfake.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/coderd/database/dbfake/dbfake.go b/coderd/database/dbfake/dbfake.go index ad274c54372ca..8b9ae6d74fdf2 100644 --- a/coderd/database/dbfake/dbfake.go +++ b/coderd/database/dbfake/dbfake.go @@ -20,9 +20,11 @@ import ( "golang.org/x/xerrors" "github.com/coder/coder/coderd/database" + "github.com/coder/coder/coderd/database/db2sdk" "github.com/coder/coder/coderd/httpapi" "github.com/coder/coder/coderd/rbac" "github.com/coder/coder/coderd/util/slice" + "github.com/coder/coder/codersdk" ) var validProxyByHostnameRegex = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) @@ -3424,6 +3426,15 @@ func (q *fakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no workspaces = append(workspaces, workspace) continue } + + job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) + if err != nil { + return nil, xerrors.Errorf("get provisioner job by ID: %w", err) + } + if db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed { + workspaces = append(workspaces, workspace) + continue + } } return workspaces, nil From a8fa3f001ac61643a39ac7a637ad6855cf0fc831 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 01:14:26 +0000 Subject: [PATCH 4/9] pr comments --- coderd/autobuild/lifecycle_executor.go | 9 ++-- coderd/autobuild/lifecycle_executor_test.go | 47 --------------------- coderd/database/queries/workspaces.sql | 3 +- 3 files changed, 6 insertions(+), 53 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 8010a7f854185..c43487d291a29 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -216,7 +216,7 @@ func getNextTransition( return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil case isEligibleForAutostart(ws, latestBuild, latestJob, templateSchedule, currentTick): return database.WorkspaceTransitionStart, database.BuildReasonAutostart, nil - case isEligibleForFailedStop(latestJob, templateSchedule): + case isEligibleForFailedStop(latestBuild, latestJob, templateSchedule): return database.WorkspaceTransitionStop, database.BuildReasonAutostop, nil default: return "", "", xerrors.Errorf("last transition not valid for autostart or autostop") @@ -269,15 +269,14 @@ func isEligibleForAutostop(build database.WorkspaceBuild, job database.Provision // isEligibleForFailedStop returns true if the workspace is eligible to be stopped // due to a failed build. -func isEligibleForFailedStop(job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { +func isEligibleForFailedStop(build database.WorkspaceBuild, job database.ProvisionerJob, templateSchedule schedule.TemplateScheduleOptions) bool { // If the template has specified a failure TLL. return templateSchedule.FailureTTL > 0 && // And the job resulted in failure. db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && + build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || // Or sufficient time has elapsed since the job was canceled. - job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL || - // Or the job is stuck/abandoned. - database.Now().Sub(job.UpdatedAt) > 30*time.Second) + job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL) } diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index b485c85fa7659..9087f8dc4ab04 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -787,53 +787,6 @@ func TestExecutorFailedWorkspace(t *testing.T) { // Expect no transitions since not enough time has elapsed. require.Len(t, stats.Transitions, 0) }) - - t.Run("StuckJob", func(t *testing.T) { - t.Parallel() - - var ( - ctx = testutil.Context(t, testutil.WaitMedium) - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Hour - client, _, api = coderdtest.NewWithAPI(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - - // Emulate a stuck job. - err := api.Database.UpdateProvisionerJobByID(ctx, database.UpdateProvisionerJobByIDParams{ - ID: build.Job.ID, - UpdatedAt: database.Now().Add(-time.Hour), - }) - require.NoError(t, err) - ticker <- time.Now() - stats := <-statCh - // Expect a transition since we should detect that the job stopped - // updating. - require.Len(t, stats.Transitions, 1) - }) } func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 98cfcf0bcab25..90368b7e8f5c6 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -449,6 +449,7 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' + provisioner_jobs.error != '' AND + workspace_builds.transition = "start"::workspace_transition ) ) AND workspaces.deleted = 'false'; From 4fdaf19cd579fbd6ec5708b58209bfc858482379 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 01:45:33 +0000 Subject: [PATCH 5/9] move enterprise tests --- coderd/autobuild/lifecycle_executor.go | 1 - coderd/autobuild/lifecycle_executor_test.go | 157 +------------------- enterprise/coderd/coderd.go | 2 +- enterprise/coderd/provisionerdaemons.go | 8 +- enterprise/coderd/workspaces_test.go | 156 +++++++++++++++++++ 5 files changed, 165 insertions(+), 159 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index c43487d291a29..e2ab82d8077b7 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -142,7 +142,6 @@ func (e *Executor) runOnce(t time.Time) Stats { log.Warn(e.ctx, "get latest workspace build", slog.Error(err)) return nil } - templateSchedule, err := (*(e.templateScheduleStore.Load())).GetTemplateScheduleOptions(e.ctx, tx, ws.TemplateID) if err != nil { log.Warn(e.ctx, "get template schedule options", slog.Error(err)) diff --git a/coderd/autobuild/lifecycle_executor_test.go b/coderd/autobuild/lifecycle_executor_test.go index 9087f8dc4ab04..3da342066ad12 100644 --- a/coderd/autobuild/lifecycle_executor_test.go +++ b/coderd/autobuild/lifecycle_executor_test.go @@ -653,10 +653,13 @@ func TestExecutorAutostartTemplateDisabled(t *testing.T) { // TesetExecutorFailedWorkspace tests that failed workspaces that breach // their template failed_ttl threshold trigger a stop job. +// For enterprise functionality see enterprise/coderd/workspaces_test.go func TestExecutorFailedWorkspace(t *testing.T) { t.Parallel() - t.Run("AGPLOK", func(t *testing.T) { + // Test that an AGPL TemplateScheduleStore properly disables + // functionality. + t.Run("OK", func(t *testing.T) { t.Parallel() var ( @@ -700,93 +703,6 @@ func TestExecutorFailedWorkspace(t *testing.T) { // Expect no transitions since we're using AGPL. require.Len(t, stats.Transitions, 0) }) - - t.Run("EnterpriseOK", func(t *testing.T) { - t.Parallel() - - var ( - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Millisecond - - client = coderdtest.New(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) - require.Eventually(t, - func() bool { - return database.Now().Sub(*build.Job.CompletedAt) > failureTTL - }, - testutil.IntervalMedium, testutil.IntervalFast) - ticker <- time.Now() - stats := <-statCh - // Expect workspace to transition to stopped state for breaching - // failure TTL. - require.Len(t, stats.Transitions, 1) - require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) - }) - - t.Run("TooEarly", func(t *testing.T) { - t.Parallel() - - var ( - ticker = make(chan time.Time) - statCh = make(chan autobuild.Stats) - logger = slogtest.Make(t, &slogtest.Options{ - // We ignore errors here since we expect to fail - // builds. - IgnoreErrors: true, - }) - failureTTL = time.Minute - - client = coderdtest.New(t, &coderdtest.Options{ - Logger: &logger, - AutobuildTicker: ticker, - IncludeProvisionerDaemon: true, - AutobuildStats: statCh, - TemplateScheduleStore: mockEnterpriseTemplateScheduler(t), - }) - ) - user := coderdtest.CreateFirstUser(t, client) - version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ - Parse: echo.ParseComplete, - ProvisionPlan: echo.ProvisionComplete, - ProvisionApply: echo.ProvisionFailed, - }) - template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { - ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) - }) - coderdtest.AwaitTemplateVersionJob(t, client, version.ID) - ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) - build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) - require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) - ticker <- time.Now() - stats := <-statCh - // Expect no transitions since not enough time has elapsed. - require.Len(t, stats.Transitions, 0) - }) } func mustProvisionWorkspace(t *testing.T, client *codersdk.Client, mut ...func(*codersdk.CreateWorkspaceRequest)) codersdk.Workspace { @@ -846,68 +762,3 @@ func mustWorkspaceParameters(t *testing.T, client *codersdk.Client, workspaceID func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } - -func mockEnterpriseTemplateScheduler(t *testing.T) schedule.TemplateScheduleStore { - return schedule.MockTemplateScheduleStore{ - GetFn: func(ctx context.Context, store database.Store, id uuid.UUID) (schedule.TemplateScheduleOptions, error) { - t.Helper() - - template, err := store.GetTemplateByID(ctx, id) - require.NoError(t, err) - - return schedule.TemplateScheduleOptions{ - UserAutostartEnabled: template.AllowUserAutostart, - UserAutostopEnabled: template.AllowUserAutostop, - DefaultTTL: time.Duration(template.DefaultTTL), - MaxTTL: time.Duration(template.MaxTTL), - FailureTTL: time.Duration(template.FailureTTL), - }, nil - }, - SetFn: func(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { - t.Helper() - - if int64(opts.DefaultTTL) == tpl.DefaultTTL && - int64(opts.MaxTTL) == tpl.MaxTTL && - int64(opts.FailureTTL) == tpl.FailureTTL && - int64(opts.InactivityTTL) == tpl.InactivityTTL && - int64(opts.LockedTTL) == tpl.LockedTTL && - opts.UserAutostartEnabled == tpl.AllowUserAutostart && - opts.UserAutostopEnabled == tpl.AllowUserAutostop { - // Avoid updating the UpdatedAt timestamp if nothing will be changed. - return tpl, nil - } - - template, err := db.UpdateTemplateScheduleByID(ctx, database.UpdateTemplateScheduleByIDParams{ - ID: tpl.ID, - UpdatedAt: database.Now(), - AllowUserAutostart: opts.UserAutostartEnabled, - AllowUserAutostop: opts.UserAutostopEnabled, - DefaultTTL: int64(opts.DefaultTTL), - MaxTTL: int64(opts.MaxTTL), - FailureTTL: int64(opts.FailureTTL), - InactivityTTL: int64(opts.InactivityTTL), - LockedTTL: int64(opts.LockedTTL), - }) - require.NoError(t, err) - // Update all workspaces using the template to set the user defined schedule - // to be within the new bounds. This essentially does the following for each - // workspace using the template. - // if (template.ttl != NULL) { - // workspace.ttl = min(workspace.ttl, template.ttl) - // } - // - // NOTE: this does not apply to currently running workspaces as their - // schedule information is committed to the workspace_build during start. - // This limitation is displayed to the user while editing the template. - if opts.MaxTTL > 0 { - err = db.UpdateWorkspaceTTLToBeWithinTemplateMax(ctx, database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams{ - TemplateID: template.ID, - TemplateMaxTTL: int64(opts.MaxTTL), - }) - require.NoError(t, err) - } - - return template, nil - }, - } -} diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 2cc54bd3fa705..aa460629cd929 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -402,7 +402,7 @@ func (api *API) updateEntitlements(ctx context.Context) error { if changed, enabled := featureChanged(codersdk.FeatureAdvancedTemplateScheduling); changed { if enabled { - store := &enterpriseTemplateScheduleStore{} + store := &EnterpriseTemplateScheduleStore{} ptr := schedule.TemplateScheduleStore(store) api.AGPL.TemplateScheduleStore.Store(&ptr) } else { diff --git a/enterprise/coderd/provisionerdaemons.go b/enterprise/coderd/provisionerdaemons.go index 36b98cde73004..5bb35bdbd3b22 100644 --- a/enterprise/coderd/provisionerdaemons.go +++ b/enterprise/coderd/provisionerdaemons.go @@ -310,11 +310,11 @@ func websocketNetConn(ctx context.Context, conn *websocket.Conn, msgType websock } } -type enterpriseTemplateScheduleStore struct{} +type EnterpriseTemplateScheduleStore struct{} -var _ schedule.TemplateScheduleStore = &enterpriseTemplateScheduleStore{} +var _ schedule.TemplateScheduleStore = &EnterpriseTemplateScheduleStore{} -func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) { +func (*EnterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.Context, db database.Store, templateID uuid.UUID) (schedule.TemplateScheduleOptions, error) { tpl, err := db.GetTemplateByID(ctx, templateID) if err != nil { return schedule.TemplateScheduleOptions{}, err @@ -331,7 +331,7 @@ func (*enterpriseTemplateScheduleStore) GetTemplateScheduleOptions(ctx context.C }, nil } -func (*enterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { +func (*EnterpriseTemplateScheduleStore) SetTemplateScheduleOptions(ctx context.Context, db database.Store, tpl database.Template, opts schedule.TemplateScheduleOptions) (database.Template, error) { if int64(opts.DefaultTTL) == tpl.DefaultTTL && int64(opts.MaxTTL) == tpl.MaxTTL && int64(opts.FailureTTL) == tpl.FailureTTL && diff --git a/enterprise/coderd/workspaces_test.go b/enterprise/coderd/workspaces_test.go index 2c1a84a6f8dff..770f3ace3fe3b 100644 --- a/enterprise/coderd/workspaces_test.go +++ b/enterprise/coderd/workspaces_test.go @@ -10,12 +10,17 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "cdr.dev/slog/sloggers/slogtest" + + "github.com/coder/coder/coderd/autobuild" "github.com/coder/coder/coderd/coderdtest" "github.com/coder/coder/coderd/database" "github.com/coder/coder/coderd/util/ptr" "github.com/coder/coder/codersdk" + "github.com/coder/coder/enterprise/coderd" "github.com/coder/coder/enterprise/coderd/coderdenttest" "github.com/coder/coder/enterprise/coderd/license" + "github.com/coder/coder/provisioner/echo" "github.com/coder/coder/testutil" ) @@ -74,6 +79,157 @@ func TestCreateWorkspace(t *testing.T) { }) } +func TestWorkspaceAutobuild(t *testing.T) { + t.Parallel() + + t.Run("FailureTTLOK", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Millisecond + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + require.Eventually(t, + func() bool { + return database.Now().Sub(*build.Job.CompletedAt) > failureTTL + }, + testutil.IntervalMedium, testutil.IntervalFast) + ticker <- time.Now() + stats := <-statCh + // Expect workspace to transition to stopped state for breaching + // failure TTL. + require.Len(t, stats.Transitions, 1) + require.Equal(t, stats.Transitions[ws.ID], database.WorkspaceTransitionStop) + }) + + t.Run("FailureTTLTooEarly", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + failureTTL = time.Minute + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID, func(ctr *codersdk.CreateTemplateRequest) { + ctr.FailureTTLMillis = ptr.Ref[int64](failureTTL.Milliseconds()) + }) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since not enough time has elapsed. + require.Len(t, stats.Transitions, 0) + }) + + t.Run("FailureTTLUnset", func(t *testing.T) { + t.Parallel() + + var ( + ticker = make(chan time.Time) + statCh = make(chan autobuild.Stats) + logger = slogtest.Make(t, &slogtest.Options{ + // We ignore errors here since we expect to fail + // builds. + IgnoreErrors: true, + }) + + client = coderdenttest.New(t, &coderdenttest.Options{ + Options: &coderdtest.Options{ + Logger: &logger, + AutobuildTicker: ticker, + IncludeProvisionerDaemon: true, + AutobuildStats: statCh, + TemplateScheduleStore: &coderd.EnterpriseTemplateScheduleStore{}, + }, + }) + ) + user := coderdtest.CreateFirstUser(t, client) + _ = coderdenttest.AddLicense(t, client, coderdenttest.LicenseOptions{ + Features: license.Features{ + codersdk.FeatureAdvancedTemplateScheduling: 1, + }, + }) + version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{ + Parse: echo.ParseComplete, + ProvisionPlan: echo.ProvisionComplete, + ProvisionApply: echo.ProvisionFailed, + }) + // Create a template without setting a failure_ttl. + template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID) + coderdtest.AwaitTemplateVersionJob(t, client, version.ID) + ws := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID) + build := coderdtest.AwaitWorkspaceBuildJob(t, client, ws.LatestBuild.ID) + require.Equal(t, codersdk.WorkspaceStatusFailed, build.Status) + ticker <- time.Now() + stats := <-statCh + // Expect no transitions since the field is unset on the template. + require.Len(t, stats.Transitions, 0) + }) +} + func TestWorkspacesFiltering(t *testing.T) { t.Parallel() From be1e37beac711f17fa088bc91610d824ac11542c Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 02:00:53 +0000 Subject: [PATCH 6/9] make gen --- coderd/database/queries.sql.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 7301c208c22e8..21aadf5e758d7 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8316,7 +8316,8 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' + provisioner_jobs.error != '' AND + workspace_builds.transition = "start"::workspace_transition ) ) AND workspaces.deleted = 'false' ` From a418d3087eff9f96b64ad8fd18829bed000c88ed Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 02:08:24 +0000 Subject: [PATCH 7/9] quotes --- coderd/database/queries.sql.go | 4 ++-- coderd/database/queries/workspaces.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 21aadf5e758d7..efec848d63814 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -8316,8 +8316,8 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = "start"::workspace_transition + provisioner_jobs.error != '' AND + workspace_builds.transition = 'start'::workspace_transition ) ) AND workspaces.deleted = 'false' ` diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 90368b7e8f5c6..0327ff5420fec 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -449,7 +449,7 @@ WHERE -- it may be eligible for failed stop. ( provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = "start"::workspace_transition + provisioner_jobs.error != '' AND + workspace_builds.transition = 'start'::workspace_transition ) ) AND workspaces.deleted = 'false'; From 7b6e5d1e8a9f9c41c83fb431c5dc367089fc072e Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 18:26:06 +0000 Subject: [PATCH 8/9] ignore canceled jobs --- coderd/autobuild/lifecycle_executor.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index e2ab82d8077b7..ca31ae5ad13e6 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -275,7 +275,5 @@ func isEligibleForFailedStop(build database.WorkspaceBuild, job database.Provisi db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. - (job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL || - // Or sufficient time has elapsed since the job was canceled. - job.CanceledAt.Valid && database.Now().Sub(job.CanceledAt.Time) > templateSchedule.FailureTTL) + job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL } From 194e9ebd7658e1eccef7c50655e7791f5829b8c3 Mon Sep 17 00:00:00 2001 From: Jon Ayers Date: Wed, 21 Jun 2023 18:30:33 +0000 Subject: [PATCH 9/9] fmt --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index ca31ae5ad13e6..4e0ae5aacfa14 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -275,5 +275,5 @@ func isEligibleForFailedStop(build database.WorkspaceBuild, job database.Provisi db2sdk.ProvisionerJobStatus(job) == codersdk.ProvisionerJobFailed && build.Transition == database.WorkspaceTransitionStart && // And sufficient time has elapsed since the job has completed. - job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL + job.CompletedAt.Valid && database.Now().Sub(job.CompletedAt.Time) > templateSchedule.FailureTTL } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy