diff --git a/coderd/prebuilds/global_snapshot.go b/coderd/prebuilds/global_snapshot.go index f4c094289b54e..976461780fd07 100644 --- a/coderd/prebuilds/global_snapshot.go +++ b/coderd/prebuilds/global_snapshot.go @@ -12,11 +12,11 @@ import ( // GlobalSnapshot represents a full point-in-time snapshot of state relating to prebuilds across all templates. type GlobalSnapshot struct { - Presets []database.GetTemplatePresetsWithPrebuildsRow - RunningPrebuilds []database.GetRunningPrebuiltWorkspacesRow - PrebuildsInProgress []database.CountInProgressPrebuildsRow - Backoffs []database.GetPresetsBackoffRow - HardLimitedPresets []database.GetPresetsAtFailureLimitRow + Presets []database.GetTemplatePresetsWithPrebuildsRow + RunningPrebuilds []database.GetRunningPrebuiltWorkspacesRow + PrebuildsInProgress []database.CountInProgressPrebuildsRow + Backoffs []database.GetPresetsBackoffRow + HardLimitedPresetsMap map[uuid.UUID]database.GetPresetsAtFailureLimitRow } func NewGlobalSnapshot( @@ -26,12 +26,17 @@ func NewGlobalSnapshot( backoffs []database.GetPresetsBackoffRow, hardLimitedPresets []database.GetPresetsAtFailureLimitRow, ) GlobalSnapshot { + hardLimitedPresetsMap := make(map[uuid.UUID]database.GetPresetsAtFailureLimitRow, len(hardLimitedPresets)) + for _, preset := range hardLimitedPresets { + hardLimitedPresetsMap[preset.PresetID] = preset + } + return GlobalSnapshot{ - Presets: presets, - RunningPrebuilds: runningPrebuilds, - PrebuildsInProgress: prebuildsInProgress, - Backoffs: backoffs, - HardLimitedPresets: hardLimitedPresets, + Presets: presets, + RunningPrebuilds: runningPrebuilds, + PrebuildsInProgress: prebuildsInProgress, + Backoffs: backoffs, + HardLimitedPresetsMap: hardLimitedPresetsMap, } } @@ -66,9 +71,7 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err backoffPtr = &backoff } - _, isHardLimited := slice.Find(s.HardLimitedPresets, func(row database.GetPresetsAtFailureLimitRow) bool { - return row.PresetID == preset.ID - }) + _, isHardLimited := s.HardLimitedPresetsMap[preset.ID] return &PresetSnapshot{ Preset: preset, @@ -80,6 +83,12 @@ func (s GlobalSnapshot) FilterByPreset(presetID uuid.UUID) (*PresetSnapshot, err }, nil } +func (s GlobalSnapshot) IsHardLimited(presetID uuid.UUID) bool { + _, isHardLimited := s.HardLimitedPresetsMap[presetID] + + return isHardLimited +} + // filterExpiredWorkspaces splits running workspaces into expired and non-expired // based on the preset's TTL. // If TTL is missing or zero, all workspaces are considered non-expired. diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 90257c26dd580..4499849ffde0a 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -280,16 +280,9 @@ func (k hardLimitedPresetKey) String() string { return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName) } -// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus. -func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) { +func (mc *MetricsCollector) registerHardLimitedPresets(isPresetHardLimited map[hardLimitedPresetKey]bool) { mc.isPresetHardLimitedMu.Lock() defer mc.isPresetHardLimitedMu.Unlock() - key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName} - - if isHardLimited { - mc.isPresetHardLimited[key] = true - } else { - delete(mc.isPresetHardLimited, key) - } + mc.isPresetHardLimited = isPresetHardLimited } diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index 90c97afa26d69..ebfcfaf2b3182 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -256,6 +256,9 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error { if err != nil { return xerrors.Errorf("determine current snapshot: %w", err) } + + c.reportHardLimitedPresets(snapshot) + if len(snapshot.Presets) == 0 { logger.Debug(ctx, "no templates found with prebuilds configured") return nil @@ -296,6 +299,49 @@ func (c *StoreReconciler) ReconcileAll(ctx context.Context) error { return err } +func (c *StoreReconciler) reportHardLimitedPresets(snapshot *prebuilds.GlobalSnapshot) { + // presetsMap is a map from key (orgName:templateName:presetName) to list of corresponding presets. + // Multiple versions of a preset can exist with the same orgName, templateName, and presetName, + // because templates can have multiple versions — or deleted templates can share the same name. + presetsMap := make(map[hardLimitedPresetKey][]database.GetTemplatePresetsWithPrebuildsRow) + for _, preset := range snapshot.Presets { + key := hardLimitedPresetKey{ + orgName: preset.OrganizationName, + templateName: preset.TemplateName, + presetName: preset.Name, + } + + presetsMap[key] = append(presetsMap[key], preset) + } + + // Report a preset as hard-limited only if all the following conditions are met: + // - The preset is marked as hard-limited + // - The preset is using the active version of its template, and the template has not been deleted + // + // The second condition is important because a hard-limited preset that has become outdated is no longer relevant. + // Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it + // as hard-limited to the admin. + // + // This approach accounts for all relevant scenarios: + // Scenario #1: The admin created a new template version with the same preset names. + // Scenario #2: The admin created a new template version and renamed the presets. + // Scenario #3: The admin deleted a template version that contained hard-limited presets. + // + // In all of these cases, only the latest and non-deleted presets will be reported. + // All other presets will be ignored and eventually removed from Prometheus. + isPresetHardLimited := make(map[hardLimitedPresetKey]bool) + for key, presets := range presetsMap { + for _, preset := range presets { + if preset.UsingActiveVersion && !preset.Deleted && snapshot.IsHardLimited(preset.ID) { + isPresetHardLimited[key] = true + break + } + } + } + + c.metrics.registerHardLimitedPresets(isPresetHardLimited) +} + // SnapshotState captures the current state of all prebuilds across templates. func (c *StoreReconciler) SnapshotState(ctx context.Context, store database.Store) (*prebuilds.GlobalSnapshot, error) { if err := ctx.Err(); err != nil { @@ -361,24 +407,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres slog.F("preset_name", ps.Preset.Name), ) - // Report a metric only if the preset uses the latest version of the template and the template is not deleted. - // This avoids conflicts between metrics from old and new template versions. - // - // NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName, - // because templates can have multiple versions — or deleted templates can share the same name. - // - // The safest approach is to report the metric only for the latest version of the preset. - // When a new template version is released, the metric for the new preset should overwrite - // the old value in Prometheus. - // - // However, there’s one edge case: if an admin creates a template, it becomes hard-limited, - // then deletes the template and never creates another with the same name, - // the old preset will continue to be reported as hard-limited — - // even though it’s deleted. This will persist until `coderd` is restarted. - if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted { - c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) - } - // If the preset reached the hard failure limit for the first time during this iteration: // - Mark it as hard-limited in the database // - Send notifications to template admins diff --git a/enterprise/coderd/prebuilds/reconcile_test.go b/enterprise/coderd/prebuilds/reconcile_test.go index 7de22db64c8be..a0e1f9726d7d5 100644 --- a/enterprise/coderd/prebuilds/reconcile_test.go +++ b/enterprise/coderd/prebuilds/reconcile_test.go @@ -1034,8 +1034,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) - // The metric is still set to 1, even though the preset has become outdated. - // This happens because the old value hasn't been overwritten by a newer preset yet. + // Metric is deleted after preset became outdated. mf, err = registry.Gather() require.NoError(t, err) metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ @@ -1043,9 +1042,7 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) { "preset_name": preset.Name, "org_name": org.Name, }) - require.NotNil(t, metric) - require.NotNil(t, metric.GetGauge()) - require.EqualValues(t, 1, metric.GetGauge().GetValue()) + require.Nil(t, metric) }) } }
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: