From 6fb6800d0d0b4357aabef958c20a3f78295f4b34 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Thu, 22 May 2025 18:32:19 +0000 Subject: [PATCH 01/10] feat: add hard-limited presets metric --- .../coderd/prebuilds/metricscollector.go | 41 +++++++++++++++++++ enterprise/coderd/prebuilds/reconcile.go | 2 + enterprise/coderd/prebuilds/reconcile_test.go | 38 ++++++++++++++++- 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 7a7734b6f8093..8d1d3185b7feb 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -27,6 +27,7 @@ const ( MetricDesiredGauge = namespace + "desired" MetricRunningGauge = namespace + "running" MetricEligibleGauge = namespace + "eligible" + MetricPresetHardLimitedGauge = namespace + "preset_hard_limited" MetricLastUpdatedGauge = namespace + "metrics_last_updated" ) @@ -82,6 +83,12 @@ var ( labels, nil, ) + presetHardLimitedDesc = prometheus.NewDesc( + MetricPresetHardLimitedGauge, + "Indicates whether a given preset has reached the hard failure limit (1 for hard-limited, 0 otherwise).", + labels, + nil, + ) lastUpdateDesc = prometheus.NewDesc( MetricLastUpdatedGauge, "The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.", @@ -104,17 +111,22 @@ type MetricsCollector struct { replacementsCounter map[replacementKey]float64 replacementsCounterMu sync.Mutex + + isPresetHardLimited map[hardLimitedPresetKey]bool + isPresetHardLimitedMu sync.Mutex } var _ prometheus.Collector = new(MetricsCollector) func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector { log := logger.Named("prebuilds_metrics_collector") + return &MetricsCollector{ database: db, logger: log, snapshotter: snapshotter, replacementsCounter: make(map[replacementKey]float64), + isPresetHardLimited: make(map[hardLimitedPresetKey]bool), } } @@ -126,6 +138,7 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) { descCh <- desiredPrebuildsDesc descCh <- runningPrebuildsDesc descCh <- eligiblePrebuildsDesc + descCh <- presetHardLimitedDesc descCh <- lastUpdateDesc } @@ -173,6 +186,17 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName) } + mc.isPresetHardLimitedMu.Lock() + for key, isHardLimited := range mc.isPresetHardLimited { + var val float64 + if isHardLimited { + val = 1 + } + + metricsCh <- prometheus.MustNewConstMetric(presetHardLimitedDesc, prometheus.GaugeValue, val, key.templateName, key.presetName, key.orgName) + } + mc.isPresetHardLimitedMu.Unlock() + metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix())) } @@ -247,3 +271,20 @@ func (mc *MetricsCollector) trackResourceReplacement(orgName, templateName, pres // cause an issue (or indeed if either would), so we just track the replacement. mc.replacementsCounter[key]++ } + +type hardLimitedPresetKey struct { + orgName, templateName, presetName string +} + +func (k hardLimitedPresetKey) String() string { + return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName) +} + +func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) { + mc.isPresetHardLimitedMu.Lock() + defer mc.isPresetHardLimitedMu.Unlock() + + key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName} + + mc.isPresetHardLimited[key] = isHardLimited +} diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index 7796e43777951..f4a7887b6033a 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -361,6 +361,8 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres slog.F("preset_name", ps.Preset.Name), ) + c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) + // If the preset was previously hard-limited, log it and exit early. if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited { logger.Warn(ctx, "skipping hard limited preset") diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index f52a77ca500b9..681a557bd3fe7 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -696,7 +696,8 @@ func TestSkippingHardLimitedPresets(t *testing.T) { ).Leveled(slog.LevelDebug) db, pubSub := dbtestutil.NewDB(t) fakeEnqueuer := newFakeEnqueuer() - controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry(), fakeEnqueuer) + registry := prometheus.NewRegistry() + controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, registry, fakeEnqueuer) // Template admin to receive a notification. templateAdmin := dbgen.User(t, db, database.User{ @@ -732,6 +733,17 @@ func TestSkippingHardLimitedPresets(t *testing.T) { workspaceCount := len(workspaces) require.Equal(t, 1, workspaceCount) + // Verify initial state: metric is not set - meaning preset is not hard limited. + require.NoError(t, controller.ForceMetricsUpdate(ctx)) + mf, err := registry.Gather() + require.NoError(t, err) + metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.Nil(t, metric) + // We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called. // Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond. clock.Advance(time.Nanosecond).MustWait(ctx) @@ -755,6 +767,18 @@ func TestSkippingHardLimitedPresets(t *testing.T) { // When hard limit is not reached, a new workspace should be created. require.Equal(t, 2, len(workspaces)) require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus) + + // When hard limit is not reached, metric is set to 0. + mf, err = registry.Gather() + require.NoError(t, err) + metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.NotNil(t, metric) + require.NotNil(t, metric.GetGauge()) + require.EqualValues(t, 0, metric.GetGauge().GetValue()) return } @@ -775,6 +799,18 @@ func TestSkippingHardLimitedPresets(t *testing.T) { return true }) require.Len(t, matching, 1) + + // When hard limit is reached, metric is set to 1. + mf, err = registry.Gather() + require.NoError(t, err) + metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.NotNil(t, metric) + require.NotNil(t, metric.GetGauge()) + require.EqualValues(t, 1, metric.GetGauge().GetValue()) }) } } From a1a4f2a2a7b1be28b3d93228183ccf6f7df1e618 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 15:59:45 +0000 Subject: [PATCH 02/10] fix: report metric only for active template version --- enterprise/coderd/prebuilds/reconcile.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index f4a7887b6033a..d90316d567170 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -361,7 +361,9 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres slog.F("preset_name", ps.Preset.Name), ) - c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) + if !ps.Preset.Deleted && ps.Preset.UsingActiveVersion { + c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) + } // If the preset was previously hard-limited, log it and exit early. if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited { From 26676840a5d970a345a59dd1732d2433b0af4009 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 16:43:38 +0000 Subject: [PATCH 03/10] fix: hard-limiting should only apply to creation of prebuilds --- enterprise/coderd/prebuilds/reconcile.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index d90316d567170..b18d2c1b0c3f4 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -365,16 +365,10 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) } - // If the preset was previously hard-limited, log it and exit early. - if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited { - logger.Warn(ctx, "skipping hard limited preset") - return nil - } - // If the preset reached the hard failure limit for the first time during this iteration: // - Mark it as hard-limited in the database // - Send notifications to template admins - if ps.IsHardLimited { + if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited { logger.Warn(ctx, "skipping hard limited preset") err := c.store.UpdatePresetPrebuildStatus(ctx, database.UpdatePresetPrebuildStatusParams{ @@ -388,10 +382,7 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres err = c.notifyPrebuildFailureLimitReached(ctx, ps) if err != nil { logger.Error(ctx, "failed to notify that number of prebuild failures reached the limit", slog.Error(err)) - return nil } - - return nil } state := ps.CalculateState() @@ -456,6 +447,14 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres actions.Create = desired } + if actions.Create > 0 { + // If the preset is hard-limited, log it and exit early. + if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited || ps.IsHardLimited { + logger.Warn(ctx, "skipping hard limited preset") + return nil + } + } + var multiErr multierror.Error for range actions.Create { From 6a14a8b2564f7491ce971181b2d330c2a7cfe4d2 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 20:26:09 +0000 Subject: [PATCH 04/10] test: improve test coverage for hard-limited presets --- enterprise/coderd/prebuilds/reconcile_test.go | 207 ++++++++++++++++++ 1 file changed, 207 insertions(+) diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 681a557bd3fe7..1d4a47fc442ad 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -815,6 +815,213 @@ func TestSkippingHardLimitedPresets(t *testing.T) { } } +func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("This test requires postgres") + } + + // Test cases verify the behavior of prebuild creation depending on configured failure limits. + testCases := []struct { + name string + hardLimit int64 + isHardLimitHit bool + }{ + { + name: "hard limit is hit - skip creation of prebuilt workspace", + hardLimit: 1, + isHardLimitHit: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + clock := quartz.NewMock(t) + ctx := testutil.Context(t, testutil.WaitShort) + cfg := codersdk.PrebuildsConfig{ + FailureHardLimit: serpent.Int64(tc.hardLimit), + ReconciliationBackoffInterval: 0, + } + logger := slogtest.Make( + t, &slogtest.Options{IgnoreErrors: true}, + ).Leveled(slog.LevelDebug) + db, pubSub := dbtestutil.NewDB(t) + fakeEnqueuer := newFakeEnqueuer() + registry := prometheus.NewRegistry() + controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, registry, fakeEnqueuer) + + // Template admin to receive a notification. + templateAdmin := dbgen.User(t, db, database.User{ + RBACRoles: []string{codersdk.RoleTemplateAdmin}, + }) + + // Set up test environment with a template, version, and preset. + ownerID := uuid.New() + dbgen.User(t, db, database.User{ + ID: ownerID, + }) + org, template := setupTestDBTemplate(t, db, ownerID, false) + templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID) + preset := setupTestDBPreset(t, db, templateVersionID, 2, uuid.New().String()) + + // Create a successful prebuilt workspace. + successfulWorkspace, _ := setupTestDBPrebuild( + t, + clock, + db, + pubSub, + database.WorkspaceTransitionStart, + database.ProvisionerJobStatusSucceeded, + org.ID, + preset, + template.ID, + templateVersionID, + ) + + // Make sure that prebuilt workspaces created in such order: [successful, failed]. + clock.Advance(time.Second).MustWait(ctx) + + // Create a failed prebuilt workspace that counts toward the hard failure limit. + setupTestDBPrebuild( + t, + clock, + db, + pubSub, + database.WorkspaceTransitionStart, + database.ProvisionerJobStatusFailed, + org.ID, + preset, + template.ID, + templateVersionID, + ) + + getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int { + jobStatusMap := make(map[database.ProvisionerJobStatus]int) + for _, workspace := range workspaces { + workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{ + WorkspaceID: workspace.ID, + }) + require.NoError(t, err) + + for _, workspaceBuild := range workspaceBuilds { + job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID) + require.NoError(t, err) + jobStatusMap[job.JobStatus]++ + } + } + return jobStatusMap + } + + // Verify initial state: two workspaces exist, one successful, one failed. + workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID) + require.NoError(t, err) + require.Equal(t, 2, len(workspaces)) + jobStatusMap := getJobStatusMap(workspaces) + require.Len(t, jobStatusMap, 2) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded]) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed]) + + //Verify initial state: metric is not set - meaning preset is not hard limited. + require.NoError(t, controller.ForceMetricsUpdate(ctx)) + mf, err := registry.Gather() + require.NoError(t, err) + metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.Nil(t, metric) + + // We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called. + // Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond. + clock.Advance(time.Nanosecond).MustWait(ctx) + + // Trigger reconciliation to attempt creating a new prebuild. + // The outcome depends on whether the hard limit has been reached. + require.NoError(t, controller.ReconcileAll(ctx)) + + // These two additional calls to ReconcileAll should not trigger any notifications. + // A notification is only sent once. + require.NoError(t, controller.ReconcileAll(ctx)) + require.NoError(t, controller.ReconcileAll(ctx)) + + // Verify the final state after reconciliation. + // When hard limit is reached, no new workspace should be created. + workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID) + require.NoError(t, err) + require.Equal(t, 2, len(workspaces)) + jobStatusMap = getJobStatusMap(workspaces) + require.Len(t, jobStatusMap, 2) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded]) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed]) + + updatedPreset, err := db.GetPresetByID(ctx, preset.ID) + require.NoError(t, err) + require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus) + + // When hard limit is reached, a notification should be sent. + matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool { + if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") { + return false + } + + if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") { + return false + } + + return true + }) + require.Len(t, matching, 1) + + // When hard limit is reached, metric is set to 1. + mf, err = registry.Gather() + require.NoError(t, err) + metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.NotNil(t, metric) + require.NotNil(t, metric.GetGauge()) + require.EqualValues(t, 1, metric.GetGauge().GetValue()) + + // When: the template is deleted. + require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ + ID: template.ID, + Deleted: true, + UpdatedAt: dbtime.Now(), + })) + + // Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted. + require.NoError(t, controller.ReconcileAll(ctx)) + + workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID) + require.NoError(t, err) + require.Equal(t, 2, len(workspaces)) + + jobStatusMap = getJobStatusMap(workspaces) + require.Len(t, jobStatusMap, 3) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded]) + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed]) + // Pending job should be the job that deletes successful, but outdated prebuilt workspace. + // Prebuilt workspace MUST be deleted, despite the fact that preset is marked as hard limited. + require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusPending]) + + workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{ + WorkspaceID: successfulWorkspace.ID, + }) + require.NoError(t, err) + require.Equal(t, 2, len(workspaceBuilds)) + // Make sure that successfully created, but outdated prebuilt workspace was scheduled for deletion. + require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) + require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) + }) + } +} + func TestRunLoop(t *testing.T) { t.Parallel() From 0882626f6d04fe11dc349ff37fa4a5b7a626027b Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 20:54:21 +0000 Subject: [PATCH 05/10] fix: make fmt --- enterprise/coderd/prebuilds/reconcile_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 1d4a47fc442ad..22d1318208e68 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -924,7 +924,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded]) require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed]) - //Verify initial state: metric is not set - meaning preset is not hard limited. + // Verify initial state: metric is not set - meaning preset is not hard limited. require.NoError(t, controller.ForceMetricsUpdate(ctx)) mf, err := registry.Gather() require.NoError(t, err) From 1c530741d401a105f728a26868f52303bfb5b0c9 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 23:04:56 +0000 Subject: [PATCH 06/10] refactor: improve documentation --- enterprise/coderd/prebuilds/reconcile.go | 26 ++++++++++++------- enterprise/coderd/prebuilds/reconcile_test.go | 12 +++++++++ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index b18d2c1b0c3f4..603c0da3588a6 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -361,15 +361,22 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres slog.F("preset_name", ps.Preset.Name), ) - if !ps.Preset.Deleted && ps.Preset.UsingActiveVersion { - c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) - } + // Report a preset as hard-limited only if all the following conditions are met: + // - The preset is marked as hard-limited + // - The preset is using the active version of its template, and the template has not been deleted + // + // The second condition is important because a hard-limited preset that has become outdated is no longer relevant. + // Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it + // as hard-limited to the admin. + reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted + c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited) // If the preset reached the hard failure limit for the first time during this iteration: // - Mark it as hard-limited in the database // - Send notifications to template admins + // - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed. if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited { - logger.Warn(ctx, "skipping hard limited preset") + logger.Warn(ctx, "preset is hard limited, notifying template admins") err := c.store.UpdatePresetPrebuildStatus(ctx, database.UpdatePresetPrebuildStatusParams{ Status: database.PrebuildStatusHardLimited, @@ -447,12 +454,11 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres actions.Create = desired } - if actions.Create > 0 { - // If the preset is hard-limited, log it and exit early. - if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited || ps.IsHardLimited { - logger.Warn(ctx, "skipping hard limited preset") - return nil - } + // If preset is hard-limited, and it's a create operation, log it and exit early. + // Creation operation is disallowed for hard-limited preset. + if ps.IsHardLimited && actions.Create > 0 { + logger.Warn(ctx, "skipping hard limited preset for create operation") + return nil } var multiErr multierror.Error diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 22d1318208e68..42a5af552c4b5 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1018,6 +1018,18 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { // Make sure that successfully created, but outdated prebuilt workspace was scheduled for deletion. require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) + + // Metric is reset to zero after preset became outdated. + mf, err = registry.Gather() + require.NoError(t, err) + metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ + "template_name": template.Name, + "preset_name": preset.Name, + "org_name": org.Name, + }) + require.NotNil(t, metric) + require.NotNil(t, metric.GetGauge()) + require.EqualValues(t, 0, metric.GetGauge().GetValue()) }) } } From 3100e01a3d404cde877088c1dacf7de14193d1da Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Fri, 23 May 2025 23:32:25 +0000 Subject: [PATCH 07/10] test: improve testing coverage --- enterprise/coderd/prebuilds/reconcile_test.go | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 42a5af552c4b5..391fb21022bb7 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -824,14 +824,24 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { // Test cases verify the behavior of prebuild creation depending on configured failure limits. testCases := []struct { - name string - hardLimit int64 - isHardLimitHit bool + name string + hardLimit int64 + createNewTemplateVersion bool + deleteTemplate bool }{ { - name: "hard limit is hit - skip creation of prebuilt workspace", - hardLimit: 1, - isHardLimitHit: true, + // hard limit is hit - but we allow deletion of prebuilt workspace because it's outdated (new template version was created) + name: "new template version is created", + hardLimit: 1, + createNewTemplateVersion: true, + deleteTemplate: false, + }, + { + // hard limit is hit - but we allow deletion of prebuilt workspace because template is deleted + name: "template is deleted", + hardLimit: 1, + createNewTemplateVersion: false, + deleteTemplate: true, }, } @@ -988,12 +998,19 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { require.NotNil(t, metric.GetGauge()) require.EqualValues(t, 1, metric.GetGauge().GetValue()) - // When: the template is deleted. - require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ - ID: template.ID, - Deleted: true, - UpdatedAt: dbtime.Now(), - })) + if tc.createNewTemplateVersion { + // Create a new template version and mark it as active + // This marks the template version that we care about as inactive + setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID) + } + + if tc.deleteTemplate { + require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{ + ID: template.ID, + Deleted: true, + UpdatedAt: dbtime.Now(), + })) + } // Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted. require.NoError(t, controller.ReconcileAll(ctx)) From 80c89fea87f181454af812f3df147f3ad4c42f42 Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Mon, 26 May 2025 12:39:58 +0000 Subject: [PATCH 08/10] refactor: CR's fixes --- enterprise/coderd/prebuilds/metricscollector.go | 6 +++++- enterprise/coderd/prebuilds/reconcile_test.go | 6 ++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 8d1d3185b7feb..2109362867a8a 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -286,5 +286,9 @@ func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, preset key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName} - mc.isPresetHardLimited[key] = isHardLimited + if isHardLimited { + mc.isPresetHardLimited[key] = true + } else { + delete(mc.isPresetHardLimited, key) + } } diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 391fb21022bb7..7bc0ac1cd641d 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1036,7 +1036,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) - // Metric is reset to zero after preset became outdated. + // Metric is deleted after preset became outdated. mf, err = registry.Gather() require.NoError(t, err) metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ @@ -1044,9 +1044,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { "preset_name": preset.Name, "org_name": org.Name, }) - require.NotNil(t, metric) - require.NotNil(t, metric.GetGauge()) - require.EqualValues(t, 0, metric.GetGauge().GetValue()) + require.Nil(t, metric) }) } } From 1b9593b3ae895012f1062fa1c801a4ea4b7fd59f Mon Sep 17 00:00:00 2001 From: evgeniy-scherbina Date: Mon, 26 May 2025 13:04:01 +0000 Subject: [PATCH 09/10] fix: fix tests & linter --- enterprise/coderd/prebuilds/metricscollector.go | 1 + enterprise/coderd/prebuilds/reconcile_test.go | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 2109362867a8a..e4aca7c989033 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -280,6 +280,7 @@ func (k hardLimitedPresetKey) String() string { return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName) } +// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus. func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) { mc.isPresetHardLimitedMu.Lock() defer mc.isPresetHardLimitedMu.Unlock() diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 7bc0ac1cd641d..b7f5974197150 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -768,7 +768,7 @@ func TestSkippingHardLimitedPresets(t *testing.T) { require.Equal(t, 2, len(workspaces)) require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus) - // When hard limit is not reached, metric is set to 0. + // When hard limit is not reached, metric is not set. mf, err = registry.Gather() require.NoError(t, err) metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ @@ -776,9 +776,7 @@ func TestSkippingHardLimitedPresets(t *testing.T) { "preset_name": preset.Name, "org_name": org.Name, }) - require.NotNil(t, metric) - require.NotNil(t, metric.GetGauge()) - require.EqualValues(t, 0, metric.GetGauge().GetValue()) + require.Nil(t, metric) return } From 00a2fcb379a979d3a2e10657dfef161375d71555 Mon Sep 17 00:00:00 2001 From: Yevhenii Shcherbina Date: Mon, 26 May 2025 11:27:36 -0400 Subject: [PATCH 10/10] Update metric description Co-authored-by: Susana Ferreira --- enterprise/coderd/prebuilds/metricscollector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index e4aca7c989033..90257c26dd580 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -85,7 +85,7 @@ var ( ) presetHardLimitedDesc = prometheus.NewDesc( MetricPresetHardLimitedGauge, - "Indicates whether a given preset has reached the hard failure limit (1 for hard-limited, 0 otherwise).", + "Indicates whether a given preset has reached the hard failure limit (1 = hard-limited). Metric is omitted otherwise.", labels, nil, ) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy