From fb857e09b0df01482ebfc29b81eaace9831e3414 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 09:29:25 +0000 Subject: [PATCH 1/7] fix: rewrite GetWorkspacesEligibleForTransition query --- coderd/database/dbauthz/dbauthz.go | 2 +- coderd/database/dbmem/dbmem.go | 34 +++++-- coderd/database/dbmetrics/querymetrics.go | 2 +- coderd/database/dbmock/dbmock.go | 4 +- coderd/database/querier.go | 2 +- coderd/database/queries.sql.go | 103 +++++++++++----------- coderd/database/queries/workspaces.sql | 76 +++++++++------- 7 files changed, 128 insertions(+), 95 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index c855d5a1984df..218d6681dc771 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2801,7 +2801,7 @@ func (q *querier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerID u return q.db.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, prep) } -func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (q *querier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { return q.db.GetWorkspacesEligibleForTransition(ctx, now) } diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 9a306db09785e..5caf644257c82 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -6868,11 +6868,11 @@ func (q *FakeQuerier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, owner return q.GetAuthorizedWorkspacesAndAgentsByOwnerID(ctx, ownerID, nil) } -func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { q.mutex.RLock() defer q.mutex.RUnlock() - workspaces := []database.WorkspaceTable{} + workspaces := []database.GetWorkspacesEligibleForTransitionRow{} for _, workspace := range q.workspaces { build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspace.ID) if err != nil { @@ -6883,14 +6883,20 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no !build.Deadline.IsZero() && build.Deadline.Before(now) && !workspace.DormantAt.Valid { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } if build.Transition == database.WorkspaceTransitionStop && workspace.AutostartSchedule.Valid && !workspace.DormantAt.Valid { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6899,7 +6905,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get provisioner job by ID: %w", err) } if codersdk.ProvisionerJobStatus(job.JobStatus) == codersdk.ProvisionerJobFailed { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6908,11 +6917,17 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get template by ID: %w", err) } if !workspace.DormantAt.Valid && template.TimeTilDormant > 0 { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } if workspace.DormantAt.Valid && template.TimeTilDormantAutoDelete > 0 { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } @@ -6921,7 +6936,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no return nil, xerrors.Errorf("get user by ID: %w", err) } if user.Status == database.UserStatusSuspended && build.Transition == database.WorkspaceTransitionStart { - workspaces = append(workspaces, workspace) + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ + ID: workspace.ID, + Name: workspace.Name, + }) continue } } diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index cee25e482bbaa..b2edda7a8fff4 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1659,7 +1659,7 @@ func (m queryMetricsStore) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, return r0, r1 } -func (m queryMetricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.WorkspaceTable, error) { +func (m queryMetricsStore) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { start := time.Now() workspaces, err := m.s.GetWorkspacesEligibleForTransition(ctx, now) m.queryLatencies.WithLabelValues("GetWorkspacesEligibleForAutoStartStop").Observe(time.Since(start).Seconds()) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index d8721f56d3f4e..cb5ebd69f89e7 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -3503,10 +3503,10 @@ func (mr *MockStoreMockRecorder) GetWorkspacesAndAgentsByOwnerID(arg0, arg1 any) } // GetWorkspacesEligibleForTransition mocks base method. -func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.WorkspaceTable, error) { +func (m *MockStore) GetWorkspacesEligibleForTransition(arg0 context.Context, arg1 time.Time) ([]database.GetWorkspacesEligibleForTransitionRow, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetWorkspacesEligibleForTransition", arg0, arg1) - ret0, _ := ret[0].([]database.WorkspaceTable) + ret0, _ := ret[0].([]database.GetWorkspacesEligibleForTransitionRow) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 46d1b1ae5b322..297e549f1e300 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -346,7 +346,7 @@ type sqlcQuerier interface { // be used in a WHERE clause. GetWorkspaces(ctx context.Context, arg GetWorkspacesParams) ([]GetWorkspacesRow, error) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerID uuid.UUID) ([]GetWorkspacesAndAgentsByOwnerIDRow, error) - GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]WorkspaceTable, error) + GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) InsertAPIKey(ctx context.Context, arg InsertAPIKeyParams) (APIKey, error) // We use the organization_id as the id // for simplicity since all users is diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 87d3c17f5400f..27612573a3c2e 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15363,7 +15363,8 @@ func (q *sqlQuerier) GetWorkspacesAndAgentsByOwnerID(ctx context.Context, ownerI const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many SELECT - workspaces.id, workspaces.created_at, workspaces.updated_at, workspaces.owner_id, workspaces.organization_id, workspaces.template_id, workspaces.deleted, workspaces.name, workspaces.autostart_schedule, workspaces.ttl, workspaces.last_used_at, workspaces.dormant_at, workspaces.deleting_at, workspaces.automatic_updates, workspaces.favorite + workspaces.id, + workspaces.name FROM workspaces LEFT JOIN @@ -15385,82 +15386,84 @@ WHERE ) AND ( - -- If the workspace build was a start transition, the workspace is - -- potentially eligible for autostop if it's past the deadline. The - -- deadline is computed at build time upon success and is bumped based - -- on activity (up the max deadline if set). We don't need to check - -- license here since that's done when the values are written to the build. + -- isEligibleForAutostop ( - workspace_builds.transition = 'start'::workspace_transition AND - workspace_builds.deadline IS NOT NULL AND - workspace_builds.deadline < $1 :: timestamptz + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND + workspaces.dormant_at IS NULL AND + workspace_builds.transition = 'start'::workspace_transition AND ( + users.status = 'suspended'::user_status OR ( + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline < $1 :: timestamptz + ) + ) ) OR - -- If the workspace build was a stop transition, the workspace is - -- potentially eligible for autostart if it has a schedule set. The - -- caller must check if the template allows autostart in a license-aware - -- fashion as we cannot check it here. + -- isEligibleForAutostart ( + users.status = 'active'::user_status AND + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL ) OR - -- If the workspace's most recent job resulted in an error - -- it may be eligible for failed stop. - ( - provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = 'start'::workspace_transition - ) OR - - -- If the workspace's template has an inactivity_ttl set - -- it may be eligible for dormancy. + -- isEligibleForDormantStop ( + workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND - workspaces.dormant_at IS NULL + ($1 :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- If the workspace's template has a time_til_dormant_autodelete set - -- and the workspace is already dormant. + -- isEligibleForDelete ( + workspaces.dormant_at IS NOT NULL AND + workspaces.deleting_at IS NOT NULL AND + workspaces.deleting_at < $1 :: timestamptz AND templates.time_til_dormant_autodelete > 0 AND - workspaces.dormant_at IS NOT NULL + CASE + WHEN ( + workspace_builds.transition = 'delete'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status + ) THEN ( + ( + provisioner_jobs.canceled_at IS NOT NULL OR + provisioner_jobs.completed_at IS NOT NULL + ) AND ( + ($1 :: timestamptz) - (CASE + WHEN provisioner_jobs.canceled_at IS NOT NULL THEN provisioner_jobs.canceled_at + ELSE provisioner_jobs.completed_at + END) > INTERVAL '24 hours' + ) + ) + ELSE true + END ) OR - -- If the user account is suspended, and the workspace is running. + -- isEligibleForFailedStop ( - users.status = 'suspended'::user_status AND - workspace_builds.transition = 'start'::workspace_transition + templates.failure_ttl > 0 AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND + workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.completed_at IS NOT NULL AND + ($1 :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) ) AND workspaces.deleted = 'false' ` -func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]WorkspaceTable, error) { +type GetWorkspacesEligibleForTransitionRow struct { + ID uuid.UUID `db:"id" json:"id"` + Name string `db:"name" json:"name"` +} + +func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) { rows, err := q.db.QueryContext(ctx, getWorkspacesEligibleForTransition, now) if err != nil { return nil, err } defer rows.Close() - var items []WorkspaceTable + var items []GetWorkspacesEligibleForTransitionRow for rows.Next() { - var i WorkspaceTable - if err := rows.Scan( - &i.ID, - &i.CreatedAt, - &i.UpdatedAt, - &i.OwnerID, - &i.OrganizationID, - &i.TemplateID, - &i.Deleted, - &i.Name, - &i.AutostartSchedule, - &i.Ttl, - &i.LastUsedAt, - &i.DormantAt, - &i.DeletingAt, - &i.AutomaticUpdates, - &i.Favorite, - ); err != nil { + var i GetWorkspacesEligibleForTransitionRow + if err := rows.Scan(&i.ID, &i.Name); err != nil { return nil, err } items = append(items, i) diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index a1f41eb84d603..91315170c619c 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -557,7 +557,8 @@ FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspa -- name: GetWorkspacesEligibleForTransition :many SELECT - workspaces.* + workspaces.id, + workspaces.name FROM workspaces LEFT JOIN @@ -579,52 +580,65 @@ WHERE ) AND ( - -- If the workspace build was a start transition, the workspace is - -- potentially eligible for autostop if it's past the deadline. The - -- deadline is computed at build time upon success and is bumped based - -- on activity (up the max deadline if set). We don't need to check - -- license here since that's done when the values are written to the build. + -- isEligibleForAutostop ( - workspace_builds.transition = 'start'::workspace_transition AND - workspace_builds.deadline IS NOT NULL AND - workspace_builds.deadline < @now :: timestamptz + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND + workspaces.dormant_at IS NULL AND + workspace_builds.transition = 'start'::workspace_transition AND ( + users.status = 'suspended'::user_status OR ( + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline < @now :: timestamptz + ) + ) ) OR - -- If the workspace build was a stop transition, the workspace is - -- potentially eligible for autostart if it has a schedule set. The - -- caller must check if the template allows autostart in a license-aware - -- fashion as we cannot check it here. + -- isEligibleForAutostart ( + users.status = 'active'::user_status AND + provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspace_builds.transition = 'stop'::workspace_transition AND workspaces.autostart_schedule IS NOT NULL ) OR - -- If the workspace's most recent job resulted in an error - -- it may be eligible for failed stop. - ( - provisioner_jobs.error IS NOT NULL AND - provisioner_jobs.error != '' AND - workspace_builds.transition = 'start'::workspace_transition - ) OR - - -- If the workspace's template has an inactivity_ttl set - -- it may be eligible for dormancy. + -- isEligibleForDormantStop ( + workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND - workspaces.dormant_at IS NULL + (@now :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- If the workspace's template has a time_til_dormant_autodelete set - -- and the workspace is already dormant. + -- isEligibleForDelete ( + workspaces.dormant_at IS NOT NULL AND + workspaces.deleting_at IS NOT NULL AND + workspaces.deleting_at < @now :: timestamptz AND templates.time_til_dormant_autodelete > 0 AND - workspaces.dormant_at IS NOT NULL + CASE + WHEN ( + workspace_builds.transition = 'delete'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status + ) THEN ( + ( + provisioner_jobs.canceled_at IS NOT NULL OR + provisioner_jobs.completed_at IS NOT NULL + ) AND ( + (@now :: timestamptz) - (CASE + WHEN provisioner_jobs.canceled_at IS NOT NULL THEN provisioner_jobs.canceled_at + ELSE provisioner_jobs.completed_at + END) > INTERVAL '24 hours' + ) + ) + ELSE true + END ) OR - -- If the user account is suspended, and the workspace is running. + -- isEligibleForFailedStop ( - users.status = 'suspended'::user_status AND - workspace_builds.transition = 'start'::workspace_transition + templates.failure_ttl > 0 AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND + workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.completed_at IS NOT NULL AND + (@now :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) ) AND workspaces.deleted = 'false'; @@ -727,5 +741,3 @@ WHERE -- Authorize Filter clause will be injected below in GetAuthorizedWorkspacesAndAgentsByOwnerID -- @authorize_filter GROUP BY workspaces.id, workspaces.name, latest_build.job_status, latest_build.job_id, latest_build.transition; - - From de6e6af41f3302e20564a71938dfd70b71db8591 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 12:26:30 +0000 Subject: [PATCH 2/7] feat: add caching to reduce db calls --- coderd/autobuild/cache.go | 41 +++++++++++++ coderd/autobuild/cache_test.go | 84 ++++++++++++++++++++++++++ coderd/autobuild/lifecycle_executor.go | 26 ++++++-- 3 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 coderd/autobuild/cache.go create mode 100644 coderd/autobuild/cache_test.go diff --git a/coderd/autobuild/cache.go b/coderd/autobuild/cache.go new file mode 100644 index 0000000000000..7d6122ef2fada --- /dev/null +++ b/coderd/autobuild/cache.go @@ -0,0 +1,41 @@ +package autobuild + +import ( + "errors" + "sync" +) + +var errCacheLoaderNil = errors.New("loader is nil") + +type cacheOf[K comparable, V any] struct { + mu sync.Mutex + m map[K]V +} + +func newCacheOf[K comparable, V any]() *cacheOf[K, V] { + return &cacheOf[K, V]{ + m: make(map[K]V), + } +} + +func (c *cacheOf[K, V]) LoadOrStore(key K, loader func() (V, error)) (V, error) { + c.mu.Lock() + defer c.mu.Unlock() + + value, found := c.m[key] + if !found { + if loader == nil { + return *new(V), errCacheLoaderNil + } + + loaded, err := loader() + if err != nil { + return *new(V), err + } + + c.m[key] = loaded + return loaded, nil + } + + return value, nil +} diff --git a/coderd/autobuild/cache_test.go b/coderd/autobuild/cache_test.go new file mode 100644 index 0000000000000..2a335a65294a2 --- /dev/null +++ b/coderd/autobuild/cache_test.go @@ -0,0 +1,84 @@ +package autobuild + +import ( + "errors" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/require" +) + +func TestCache(t *testing.T) { + t.Parallel() + + t.Run("CallsLoaderOnce", func(t *testing.T) { + callCount := 0 + cache := newCacheOf[uuid.UUID, int]() + key := uuid.New() + + // Call LoadOrStore for key `key` for the first time. + // We expect this to call our loader function. + value, err := cache.LoadOrStore(key, func() (int, error) { + callCount += 1 + return 1, nil + }) + require.NoError(t, err) + require.Equal(t, 1, value) + require.Equal(t, 1, callCount) + + // Call LoadOrStore for key `key` for the second time. + // We expect this to return data from the previous load. + value, err = cache.LoadOrStore(key, func() (int, error) { + callCount += 1 + + // We return a different value to further check + // that this function isn't called. + return 2, nil + }) + require.NoError(t, err) + require.Equal(t, 1, value) + require.Equal(t, 1, callCount) + }) + + t.Run("ReturnsErrOnLoaderErr", func(t *testing.T) { + exampleErr := errors.New("example error") + cache := newCacheOf[uuid.UUID, int]() + key := uuid.New() + + _, err := cache.LoadOrStore(key, func() (int, error) { + return 0, exampleErr + }) + require.Error(t, err) + require.Equal(t, exampleErr, err) + }) + + t.Run("CanCacheWithMultipleKeys", func(t *testing.T) { + cache := newCacheOf[uuid.UUID, int]() + keyA := uuid.New() + keyB := uuid.New() + + // We first insert data with our first key + value, err := cache.LoadOrStore(keyA, func() (int, error) { + return 10, nil + }) + require.NoError(t, err) + require.Equal(t, 10, value) + + // Next we insert data with a different key + value, err = cache.LoadOrStore(keyB, func() (int, error) { + return 20, nil + }) + require.NoError(t, err) + require.Equal(t, 20, value) + + // Now we check the data is still available for the first key + value, err = cache.LoadOrStore(keyA, nil) + require.NoError(t, err) + require.Equal(t, 10, value) + + // And that the data is also still available for the second key + value, err = cache.LoadOrStore(keyB, nil) + require.NoError(t, err) + require.Equal(t, 20, value) + }) +} diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index ac2930c9e32c8..3737da035c3b3 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -154,6 +154,16 @@ func (e *Executor) runOnce(t time.Time) Stats { // Limit the concurrency to avoid overloading the database. eg.SetLimit(10) + // We cache these values to help reduce load on the database. + // These could be outdated during our execution, but this is + // unlikely to be noticed or cause any unwanted behaviour. + var ( + userCache = newCacheOf[uuid.UUID, database.User]() + templateCache = newCacheOf[uuid.UUID, database.Template]() + templateVersionCache = newCacheOf[uuid.UUID, database.TemplateVersion]() + templateScheduleCache = newCacheOf[uuid.UUID, schedule.TemplateScheduleOptions]() + ) + for _, ws := range workspaces { wsID := ws.ID wsName := ws.Name @@ -184,7 +194,9 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get workspace by id: %w", err) } - user, err := tx.GetUserByID(e.ctx, ws.OwnerID) + user, err := userCache.LoadOrStore(ws.OwnerID, func() (database.User, error) { + return tx.GetUserByID(e.ctx, ws.OwnerID) + }) if err != nil { return xerrors.Errorf("get user by id: %w", err) } @@ -200,17 +212,23 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get latest provisioner job: %w", err) } - templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) + templateSchedule, err := templateScheduleCache.LoadOrStore(ws.TemplateID, func() (schedule.TemplateScheduleOptions, error) { + return (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) + }) if err != nil { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err = tx.GetTemplateByID(e.ctx, ws.TemplateID) + tmpl, err := templateCache.LoadOrStore(ws.TemplateID, func() (database.Template, error) { + return tx.GetTemplateByID(e.ctx, ws.TemplateID) + }) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } - activeTemplateVersion, err = tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) + activeTemplateVersion, err = templateVersionCache.LoadOrStore(tmpl.ActiveVersionID, func() (database.TemplateVersion, error) { + return tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) + }) if err != nil { return xerrors.Errorf("get active template version by ID: %w", err) } From e81a985a94abee23ae2d5776148536b18d88126c Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 14:28:43 +0000 Subject: [PATCH 3/7] revert: cache --- coderd/autobuild/cache.go | 41 ------------- coderd/autobuild/cache_test.go | 84 -------------------------- coderd/autobuild/lifecycle_executor.go | 26 ++------ 3 files changed, 4 insertions(+), 147 deletions(-) delete mode 100644 coderd/autobuild/cache.go delete mode 100644 coderd/autobuild/cache_test.go diff --git a/coderd/autobuild/cache.go b/coderd/autobuild/cache.go deleted file mode 100644 index 7d6122ef2fada..0000000000000 --- a/coderd/autobuild/cache.go +++ /dev/null @@ -1,41 +0,0 @@ -package autobuild - -import ( - "errors" - "sync" -) - -var errCacheLoaderNil = errors.New("loader is nil") - -type cacheOf[K comparable, V any] struct { - mu sync.Mutex - m map[K]V -} - -func newCacheOf[K comparable, V any]() *cacheOf[K, V] { - return &cacheOf[K, V]{ - m: make(map[K]V), - } -} - -func (c *cacheOf[K, V]) LoadOrStore(key K, loader func() (V, error)) (V, error) { - c.mu.Lock() - defer c.mu.Unlock() - - value, found := c.m[key] - if !found { - if loader == nil { - return *new(V), errCacheLoaderNil - } - - loaded, err := loader() - if err != nil { - return *new(V), err - } - - c.m[key] = loaded - return loaded, nil - } - - return value, nil -} diff --git a/coderd/autobuild/cache_test.go b/coderd/autobuild/cache_test.go deleted file mode 100644 index 2a335a65294a2..0000000000000 --- a/coderd/autobuild/cache_test.go +++ /dev/null @@ -1,84 +0,0 @@ -package autobuild - -import ( - "errors" - "testing" - - "github.com/google/uuid" - "github.com/stretchr/testify/require" -) - -func TestCache(t *testing.T) { - t.Parallel() - - t.Run("CallsLoaderOnce", func(t *testing.T) { - callCount := 0 - cache := newCacheOf[uuid.UUID, int]() - key := uuid.New() - - // Call LoadOrStore for key `key` for the first time. - // We expect this to call our loader function. - value, err := cache.LoadOrStore(key, func() (int, error) { - callCount += 1 - return 1, nil - }) - require.NoError(t, err) - require.Equal(t, 1, value) - require.Equal(t, 1, callCount) - - // Call LoadOrStore for key `key` for the second time. - // We expect this to return data from the previous load. - value, err = cache.LoadOrStore(key, func() (int, error) { - callCount += 1 - - // We return a different value to further check - // that this function isn't called. - return 2, nil - }) - require.NoError(t, err) - require.Equal(t, 1, value) - require.Equal(t, 1, callCount) - }) - - t.Run("ReturnsErrOnLoaderErr", func(t *testing.T) { - exampleErr := errors.New("example error") - cache := newCacheOf[uuid.UUID, int]() - key := uuid.New() - - _, err := cache.LoadOrStore(key, func() (int, error) { - return 0, exampleErr - }) - require.Error(t, err) - require.Equal(t, exampleErr, err) - }) - - t.Run("CanCacheWithMultipleKeys", func(t *testing.T) { - cache := newCacheOf[uuid.UUID, int]() - keyA := uuid.New() - keyB := uuid.New() - - // We first insert data with our first key - value, err := cache.LoadOrStore(keyA, func() (int, error) { - return 10, nil - }) - require.NoError(t, err) - require.Equal(t, 10, value) - - // Next we insert data with a different key - value, err = cache.LoadOrStore(keyB, func() (int, error) { - return 20, nil - }) - require.NoError(t, err) - require.Equal(t, 20, value) - - // Now we check the data is still available for the first key - value, err = cache.LoadOrStore(keyA, nil) - require.NoError(t, err) - require.Equal(t, 10, value) - - // And that the data is also still available for the second key - value, err = cache.LoadOrStore(keyB, nil) - require.NoError(t, err) - require.Equal(t, 20, value) - }) -} diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 3737da035c3b3..35f3ba106507a 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -154,16 +154,6 @@ func (e *Executor) runOnce(t time.Time) Stats { // Limit the concurrency to avoid overloading the database. eg.SetLimit(10) - // We cache these values to help reduce load on the database. - // These could be outdated during our execution, but this is - // unlikely to be noticed or cause any unwanted behaviour. - var ( - userCache = newCacheOf[uuid.UUID, database.User]() - templateCache = newCacheOf[uuid.UUID, database.Template]() - templateVersionCache = newCacheOf[uuid.UUID, database.TemplateVersion]() - templateScheduleCache = newCacheOf[uuid.UUID, schedule.TemplateScheduleOptions]() - ) - for _, ws := range workspaces { wsID := ws.ID wsName := ws.Name @@ -194,9 +184,7 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get workspace by id: %w", err) } - user, err := userCache.LoadOrStore(ws.OwnerID, func() (database.User, error) { - return tx.GetUserByID(e.ctx, ws.OwnerID) - }) + user, err := tx.GetUserByID(e.ctx, ws.OwnerID) if err != nil { return xerrors.Errorf("get user by id: %w", err) } @@ -212,23 +200,17 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get latest provisioner job: %w", err) } - templateSchedule, err := templateScheduleCache.LoadOrStore(ws.TemplateID, func() (schedule.TemplateScheduleOptions, error) { - return (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) - }) + templateSchedule, err := (*(e.templateScheduleStore.Load())).Get(e.ctx, tx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err := templateCache.LoadOrStore(ws.TemplateID, func() (database.Template, error) { - return tx.GetTemplateByID(e.ctx, ws.TemplateID) - }) + tmpl, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } - activeTemplateVersion, err = templateVersionCache.LoadOrStore(tmpl.ActiveVersionID, func() (database.TemplateVersion, error) { - return tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) - }) + activeTemplateVersion, err = tx.GetTemplateVersionByID(e.ctx, tmpl.ActiveVersionID) if err != nil { return xerrors.Errorf("get active template version by ID: %w", err) } From 190617c5dbb84503d5e4ecf2b5cd1aa57d7b6f67 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Thu, 7 Nov 2024 15:15:17 +0000 Subject: [PATCH 4/7] revert: mistake --- coderd/autobuild/lifecycle_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coderd/autobuild/lifecycle_executor.go b/coderd/autobuild/lifecycle_executor.go index 35f3ba106507a..ac2930c9e32c8 100644 --- a/coderd/autobuild/lifecycle_executor.go +++ b/coderd/autobuild/lifecycle_executor.go @@ -205,7 +205,7 @@ func (e *Executor) runOnce(t time.Time) Stats { return xerrors.Errorf("get template scheduling options: %w", err) } - tmpl, err := tx.GetTemplateByID(e.ctx, ws.TemplateID) + tmpl, err = tx.GetTemplateByID(e.ctx, ws.TemplateID) if err != nil { return xerrors.Errorf("get template by ID: %w", err) } From 18e9550d24f62f73ed9ea620e1c633ca61376a2b Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 8 Nov 2024 10:02:13 +0000 Subject: [PATCH 5/7] chore: comment query --- coderd/database/queries.sql.go | 32 +++++++++++++++++++++----- coderd/database/queries/workspaces.sql | 32 +++++++++++++++++++++----- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 27612573a3c2e..cc7a9166a729b 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15386,7 +15386,11 @@ WHERE ) AND ( - -- isEligibleForAutostop + -- A workspace may be eligible for autostop if the following are true: + -- * The provisioner job has not failed. + -- * The workspace is not dormant. + -- * The workspace build was a start transition. + -- * The workspace's owner is suspended OR the workspace build deadline has passed. ( provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspaces.dormant_at IS NULL AND @@ -15398,7 +15402,11 @@ WHERE ) ) OR - -- isEligibleForAutostart + -- A workspace may be eligible for autostart if the following are true: + -- * The workspace's owner is active. + -- * The provisioner job did not failed. + -- * The workspace build was a stop transition. + -- * The workspace has an autostart schedule. ( users.status = 'active'::user_status AND provisioner_jobs.job_status != 'failed'::provisioner_job_status AND @@ -15406,14 +15414,21 @@ WHERE workspaces.autostart_schedule IS NOT NULL ) OR - -- isEligibleForDormantStop + -- A workspace may be eligible for dormant stop if the following are true: + -- * The workspace is not dormant. + -- * The template has set a time til dormant. + -- * The workspace has been unused for longer than the time til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND ($1 :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- isEligibleForDelete + -- A workspace may be eligible for deletion if the following are true: + -- * The workspace is dormant. + -- * The workspace is scheduled to be deleted. + -- * If there was a prior attempt to delete the workspace that failed: + -- * This attempt was at least 24 hours ago> ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND @@ -15438,11 +15453,16 @@ WHERE END ) OR - -- isEligibleForFailedStop + -- A workspace may be eligible for failed stop if the following are true: + -- * The template has a failure ttl set. + -- * The workspace build was a start transition. + -- * The provisioner job failed. + -- * The provisioner job had completed. + -- * The provisioner job has been completed for longer than the failure ttl. ( templates.failure_ttl > 0 AND - provisioner_jobs.job_status = 'failed'::provisioner_job_status AND workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND provisioner_jobs.completed_at IS NOT NULL AND ($1 :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index 91315170c619c..d27a2569dc216 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -580,7 +580,11 @@ WHERE ) AND ( - -- isEligibleForAutostop + -- A workspace may be eligible for autostop if the following are true: + -- * The provisioner job has not failed. + -- * The workspace is not dormant. + -- * The workspace build was a start transition. + -- * The workspace's owner is suspended OR the workspace build deadline has passed. ( provisioner_jobs.job_status != 'failed'::provisioner_job_status AND workspaces.dormant_at IS NULL AND @@ -592,7 +596,11 @@ WHERE ) ) OR - -- isEligibleForAutostart + -- A workspace may be eligible for autostart if the following are true: + -- * The workspace's owner is active. + -- * The provisioner job did not failed. + -- * The workspace build was a stop transition. + -- * The workspace has an autostart schedule. ( users.status = 'active'::user_status AND provisioner_jobs.job_status != 'failed'::provisioner_job_status AND @@ -600,14 +608,21 @@ WHERE workspaces.autostart_schedule IS NOT NULL ) OR - -- isEligibleForDormantStop + -- A workspace may be eligible for dormant stop if the following are true: + -- * The workspace is not dormant. + -- * The template has set a time til dormant. + -- * The workspace has been unused for longer than the time til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND (@now :: timestamptz) - workspaces.last_used_at > (INTERVAL '1 millisecond' * (templates.time_til_dormant / 1000000)) ) OR - -- isEligibleForDelete + -- A workspace may be eligible for deletion if the following are true: + -- * The workspace is dormant. + -- * The workspace is scheduled to be deleted. + -- * If there was a prior attempt to delete the workspace that failed: + -- * This attempt was at least 24 hours ago> ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND @@ -632,11 +647,16 @@ WHERE END ) OR - -- isEligibleForFailedStop + -- A workspace may be eligible for failed stop if the following are true: + -- * The template has a failure ttl set. + -- * The workspace build was a start transition. + -- * The provisioner job failed. + -- * The provisioner job had completed. + -- * The provisioner job has been completed for longer than the failure ttl. ( templates.failure_ttl > 0 AND - provisioner_jobs.job_status = 'failed'::provisioner_job_status AND workspace_builds.transition = 'start'::workspace_transition AND + provisioner_jobs.job_status = 'failed'::provisioner_job_status AND provisioner_jobs.completed_at IS NOT NULL AND (@now :: timestamptz) - provisioner_jobs.completed_at > (INTERVAL '1 millisecond' * (templates.failure_ttl / 1000000)) ) From 5aff6e3f1240b0de5c0e189064158a8339e9269a Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Fri, 8 Nov 2024 10:22:56 +0000 Subject: [PATCH 6/7] fix: update dbmem.go to match new query --- coderd/database/dbmem/dbmem.go | 77 +++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index 5caf644257c82..b465adb96e96d 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -6876,23 +6876,32 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no for _, workspace := range q.workspaces { build, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspace.ID) if err != nil { - return nil, err + return nil, xerrors.Errorf("get workspace build by ID: %w", err) } - if build.Transition == database.WorkspaceTransitionStart && - !build.Deadline.IsZero() && - build.Deadline.Before(now) && - !workspace.DormantAt.Valid { - workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ - ID: workspace.ID, - Name: workspace.Name, - }) + user, err := q.getUserByIDNoLock(workspace.OwnerID) + if err != nil { + return nil, xerrors.Errorf("get user by ID: %w", err) + } + + job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) + if err != nil { + return nil, xerrors.Errorf("get provisioner job by ID: %w", err) + } + + template, err := q.getTemplateByIDNoLock(ctx, workspace.TemplateID) + if err != nil { + return nil, xerrors.Errorf("get template by ID: %w", err) + } + + if workspace.Deleted { continue } - if build.Transition == database.WorkspaceTransitionStop && - workspace.AutostartSchedule.Valid && - !workspace.DormantAt.Valid { + if job.JobStatus != database.ProvisionerJobStatusFailed && + !workspace.DormantAt.Valid && + build.Transition == database.WorkspaceTransitionStart && + (user.Status == database.UserStatusSuspended || (!build.Deadline.IsZero() && build.Deadline.Before(now))) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6900,11 +6909,10 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - job, err := q.getProvisionerJobByIDNoLock(ctx, build.JobID) - if err != nil { - return nil, xerrors.Errorf("get provisioner job by ID: %w", err) - } - if codersdk.ProvisionerJobStatus(job.JobStatus) == codersdk.ProvisionerJobFailed { + if user.Status == database.UserStatusActive && + job.JobStatus != database.ProvisionerJobStatusFailed && + build.Transition == database.WorkspaceTransitionStop && + workspace.AutostartSchedule.Valid { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6912,18 +6920,31 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - template, err := q.getTemplateByIDNoLock(ctx, workspace.TemplateID) - if err != nil { - return nil, xerrors.Errorf("get template by ID: %w", err) - } - if !workspace.DormantAt.Valid && template.TimeTilDormant > 0 { + if !workspace.DormantAt.Valid && + template.TimeTilDormant > 0 && + now.Sub(workspace.LastUsedAt) > time.Duration(template.TimeTilDormant) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, }) continue } - if workspace.DormantAt.Valid && template.TimeTilDormantAutoDelete > 0 { + + if workspace.DormantAt.Valid && + workspace.DeletingAt.Valid && + workspace.DeletingAt.Time.Before(now) && + template.TimeTilDormantAutoDelete > 0 { + if build.Transition == database.WorkspaceTransitionDelete && + job.JobStatus == database.ProvisionerJobStatusFailed { + if job.CanceledAt.Valid && now.Sub(job.CanceledAt.Time) <= 24*time.Hour { + continue + } + + if job.CompletedAt.Valid && now.Sub(job.CompletedAt.Time) <= 24*time.Hour { + continue + } + } + workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, @@ -6931,11 +6952,11 @@ func (q *FakeQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, no continue } - user, err := q.getUserByIDNoLock(workspace.OwnerID) - if err != nil { - return nil, xerrors.Errorf("get user by ID: %w", err) - } - if user.Status == database.UserStatusSuspended && build.Transition == database.WorkspaceTransitionStart { + if template.FailureTTL > 0 && + build.Transition == database.WorkspaceTransitionStart && + job.JobStatus == database.ProvisionerJobStatusFailed && + job.CompletedAt.Valid && + now.Sub(job.CompletedAt.Time) > time.Duration(template.FailureTTL) { workspaces = append(workspaces, database.GetWorkspacesEligibleForTransitionRow{ ID: workspace.ID, Name: workspace.Name, From b9539027faa6191dbd0acf1321bb1f288f399d22 Mon Sep 17 00:00:00 2001 From: Danielle Maywood Date: Mon, 11 Nov 2024 09:39:59 +0000 Subject: [PATCH 7/7] fix: typo --- coderd/database/queries.sql.go | 10 +++++----- coderd/database/queries/workspaces.sql | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index cc7a9166a729b..14afd75403c89 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -15396,7 +15396,7 @@ WHERE workspaces.dormant_at IS NULL AND workspace_builds.transition = 'start'::workspace_transition AND ( users.status = 'suspended'::user_status OR ( - workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamptz AND workspace_builds.deadline < $1 :: timestamptz ) ) @@ -15404,7 +15404,7 @@ WHERE -- A workspace may be eligible for autostart if the following are true: -- * The workspace's owner is active. - -- * The provisioner job did not failed. + -- * The provisioner job did not fail. -- * The workspace build was a stop transition. -- * The workspace has an autostart schedule. ( @@ -15416,8 +15416,8 @@ WHERE -- A workspace may be eligible for dormant stop if the following are true: -- * The workspace is not dormant. - -- * The template has set a time til dormant. - -- * The workspace has been unused for longer than the time til dormancy. + -- * The template has set a time 'til dormant. + -- * The workspace has been unused for longer than the time 'til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND @@ -15428,7 +15428,7 @@ WHERE -- * The workspace is dormant. -- * The workspace is scheduled to be deleted. -- * If there was a prior attempt to delete the workspace that failed: - -- * This attempt was at least 24 hours ago> + -- * This attempt was at least 24 hours ago. ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND diff --git a/coderd/database/queries/workspaces.sql b/coderd/database/queries/workspaces.sql index d27a2569dc216..4d200a33f1620 100644 --- a/coderd/database/queries/workspaces.sql +++ b/coderd/database/queries/workspaces.sql @@ -590,7 +590,7 @@ WHERE workspaces.dormant_at IS NULL AND workspace_builds.transition = 'start'::workspace_transition AND ( users.status = 'suspended'::user_status OR ( - workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamp AND + workspace_builds.deadline != '0001-01-01 00:00:00+00'::timestamptz AND workspace_builds.deadline < @now :: timestamptz ) ) @@ -598,7 +598,7 @@ WHERE -- A workspace may be eligible for autostart if the following are true: -- * The workspace's owner is active. - -- * The provisioner job did not failed. + -- * The provisioner job did not fail. -- * The workspace build was a stop transition. -- * The workspace has an autostart schedule. ( @@ -610,8 +610,8 @@ WHERE -- A workspace may be eligible for dormant stop if the following are true: -- * The workspace is not dormant. - -- * The template has set a time til dormant. - -- * The workspace has been unused for longer than the time til dormancy. + -- * The template has set a time 'til dormant. + -- * The workspace has been unused for longer than the time 'til dormancy. ( workspaces.dormant_at IS NULL AND templates.time_til_dormant > 0 AND @@ -622,7 +622,7 @@ WHERE -- * The workspace is dormant. -- * The workspace is scheduled to be deleted. -- * If there was a prior attempt to delete the workspace that failed: - -- * This attempt was at least 24 hours ago> + -- * This attempt was at least 24 hours ago. ( workspaces.dormant_at IS NOT NULL AND workspaces.deleting_at IS NOT NULL AND pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy