Skip to content

Commit 2a15aa8

Browse files
feat: add hard-limited presets metric (#18008)
Closes #17988 Define `preset_hard_limited` metric which for every preset indicates whether a given preset has reached the hard failure limit (1 for hard-limited, 0 otherwise). CLI example: ``` curl -X GET localhost:2118/metrics | grep preset_hard_limited # HELP coderd_prebuilt_workspaces_preset_hard_limited Indicates whether a given preset has reached the hard failure limit (1 for hard-limited, 0 otherwise). # TYPE coderd_prebuilt_workspaces_preset_hard_limited gauge coderd_prebuilt_workspaces_preset_hard_limited{organization_name="coder",preset_name="GoLand: Large",template_name="Test7"} 1 coderd_prebuilt_workspaces_preset_hard_limited{organization_name="coder",preset_name="GoLand: Large",template_name="ValidTemplate"} 0 coderd_prebuilt_workspaces_preset_hard_limited{organization_name="coder",preset_name="IU: Medium",template_name="Test7"} 1 coderd_prebuilt_workspaces_preset_hard_limited{organization_name="coder",preset_name="IU: Medium",template_name="ValidTemplate"} 0 coderd_prebuilt_workspaces_preset_hard_limited{organization_name="coder",preset_name="WS: Small",template_name="Test7"} 1 ``` NOTE: ```go if !ps.Preset.Deleted && ps.Preset.UsingActiveVersion { c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited) } ``` Only active template version is tracked. If admin creates new template version - old value of metric (for previous template version) will be overwritten with new value of metric (for active template version). Because `template_version` is not part of metric: ```go labels = []string{"template_name", "preset_name", "organization_name"} ``` Implementation is similar to implementation of `MetricResourceReplacementsCount` metric --------- Co-authored-by: Susana Ferreira <ssncferreira@gmail.com>
1 parent 0731304 commit 2a15aa8

File tree

3 files changed

+334
-11
lines changed

3 files changed

+334
-11
lines changed

enterprise/coderd/prebuilds/metricscollector.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
MetricDesiredGauge = namespace + "desired"
2828
MetricRunningGauge = namespace + "running"
2929
MetricEligibleGauge = namespace + "eligible"
30+
MetricPresetHardLimitedGauge = namespace + "preset_hard_limited"
3031
MetricLastUpdatedGauge = namespace + "metrics_last_updated"
3132
)
3233

@@ -82,6 +83,12 @@ var (
8283
labels,
8384
nil,
8485
)
86+
presetHardLimitedDesc = prometheus.NewDesc(
87+
MetricPresetHardLimitedGauge,
88+
"Indicates whether a given preset has reached the hard failure limit (1 = hard-limited). Metric is omitted otherwise.",
89+
labels,
90+
nil,
91+
)
8592
lastUpdateDesc = prometheus.NewDesc(
8693
MetricLastUpdatedGauge,
8794
"The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.",
@@ -104,17 +111,22 @@ type MetricsCollector struct {
104111

105112
replacementsCounter map[replacementKey]float64
106113
replacementsCounterMu sync.Mutex
114+
115+
isPresetHardLimited map[hardLimitedPresetKey]bool
116+
isPresetHardLimitedMu sync.Mutex
107117
}
108118

109119
var _ prometheus.Collector = new(MetricsCollector)
110120

111121
func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector {
112122
log := logger.Named("prebuilds_metrics_collector")
123+
113124
return &MetricsCollector{
114125
database: db,
115126
logger: log,
116127
snapshotter: snapshotter,
117128
replacementsCounter: make(map[replacementKey]float64),
129+
isPresetHardLimited: make(map[hardLimitedPresetKey]bool),
118130
}
119131
}
120132

@@ -126,6 +138,7 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
126138
descCh <- desiredPrebuildsDesc
127139
descCh <- runningPrebuildsDesc
128140
descCh <- eligiblePrebuildsDesc
141+
descCh <- presetHardLimitedDesc
129142
descCh <- lastUpdateDesc
130143
}
131144

@@ -173,6 +186,17 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
173186
metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName)
174187
}
175188

189+
mc.isPresetHardLimitedMu.Lock()
190+
for key, isHardLimited := range mc.isPresetHardLimited {
191+
var val float64
192+
if isHardLimited {
193+
val = 1
194+
}
195+
196+
metricsCh <- prometheus.MustNewConstMetric(presetHardLimitedDesc, prometheus.GaugeValue, val, key.templateName, key.presetName, key.orgName)
197+
}
198+
mc.isPresetHardLimitedMu.Unlock()
199+
176200
metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix()))
177201
}
178202

@@ -247,3 +271,25 @@ func (mc *MetricsCollector) trackResourceReplacement(orgName, templateName, pres
247271
// cause an issue (or indeed if either would), so we just track the replacement.
248272
mc.replacementsCounter[key]++
249273
}
274+
275+
type hardLimitedPresetKey struct {
276+
orgName, templateName, presetName string
277+
}
278+
279+
func (k hardLimitedPresetKey) String() string {
280+
return fmt.Sprintf("%s:%s:%s", k.orgName, k.templateName, k.presetName)
281+
}
282+
283+
// nolint:revive // isHardLimited determines if the preset should be reported as hard-limited in Prometheus.
284+
func (mc *MetricsCollector) trackHardLimitedStatus(orgName, templateName, presetName string, isHardLimited bool) {
285+
mc.isPresetHardLimitedMu.Lock()
286+
defer mc.isPresetHardLimitedMu.Unlock()
287+
288+
key := hardLimitedPresetKey{orgName: orgName, templateName: templateName, presetName: presetName}
289+
290+
if isHardLimited {
291+
mc.isPresetHardLimited[key] = true
292+
} else {
293+
delete(mc.isPresetHardLimited, key)
294+
}
295+
}

enterprise/coderd/prebuilds/reconcile.go

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -361,17 +361,22 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
361361
slog.F("preset_name", ps.Preset.Name),
362362
)
363363

364-
// If the preset was previously hard-limited, log it and exit early.
365-
if ps.Preset.PrebuildStatus == database.PrebuildStatusHardLimited {
366-
logger.Warn(ctx, "skipping hard limited preset")
367-
return nil
368-
}
364+
// Report a preset as hard-limited only if all the following conditions are met:
365+
// - The preset is marked as hard-limited
366+
// - The preset is using the active version of its template, and the template has not been deleted
367+
//
368+
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant.
369+
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it
370+
// as hard-limited to the admin.
371+
reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted
372+
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited)
369373

370374
// If the preset reached the hard failure limit for the first time during this iteration:
371375
// - Mark it as hard-limited in the database
372376
// - Send notifications to template admins
373-
if ps.IsHardLimited {
374-
logger.Warn(ctx, "skipping hard limited preset")
377+
// - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed.
378+
if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited {
379+
logger.Warn(ctx, "preset is hard limited, notifying template admins")
375380

376381
err := c.store.UpdatePresetPrebuildStatus(ctx, database.UpdatePresetPrebuildStatusParams{
377382
Status: database.PrebuildStatusHardLimited,
@@ -384,10 +389,7 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
384389
err = c.notifyPrebuildFailureLimitReached(ctx, ps)
385390
if err != nil {
386391
logger.Error(ctx, "failed to notify that number of prebuild failures reached the limit", slog.Error(err))
387-
return nil
388392
}
389-
390-
return nil
391393
}
392394

393395
state := ps.CalculateState()
@@ -452,6 +454,13 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
452454
actions.Create = desired
453455
}
454456

457+
// If preset is hard-limited, and it's a create operation, log it and exit early.
458+
// Creation operation is disallowed for hard-limited preset.
459+
if ps.IsHardLimited && actions.Create > 0 {
460+
logger.Warn(ctx, "skipping hard limited preset for create operation")
461+
return nil
462+
}
463+
455464
var multiErr multierror.Error
456465

457466
for range actions.Create {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy