From 562b56dd322009b3cb7f2e55383b632886a2cd67 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 13 May 2025 13:40:52 +0200 Subject: [PATCH 1/3] feat: fetch prebuilds metrics state in background Signed-off-by: Danny Kopping --- .../coderd/prebuilds/metricscollector.go | 91 +++++++++++++++---- .../coderd/prebuilds/metricscollector_test.go | 5 + enterprise/coderd/prebuilds/reconcile.go | 5 + 3 files changed, 84 insertions(+), 17 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 7b55227effffa..3452f9af0426b 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -2,11 +2,13 @@ package prebuilds import ( "context" + "sync/atomic" "time" - "cdr.dev/slog" - "github.com/prometheus/client_golang/prometheus" + "golang.org/x/xerrors" + + "cdr.dev/slog" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" @@ -57,18 +59,27 @@ var ( ) ) +const ( + metricsUpdateInterval = time.Second * 15 + metricsUpdateTimeout = time.Second * 10 +) + type MetricsCollector struct { database database.Store logger slog.Logger snapshotter prebuilds.StateSnapshotter + + latestState atomic.Pointer[state] } var _ prometheus.Collector = new(MetricsCollector) +// NewMetricsCollector returns a func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector { + log := logger.Named("prebuilds_metrics_collector") return &MetricsCollector{ database: db, - logger: logger.Named("prebuilds_metrics_collector"), + logger: log, snapshotter: snapshotter, } } @@ -82,34 +93,31 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) { descCh <- eligiblePrebuildsDesc } +// Collect uses the cached state to set configured metrics. +// The state is cached because this function can be called multiple times per second and retrieving the current state +// is an expensive operation. func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { // nolint:gocritic // We need to set an authz context to read metrics from the db. - ctx, cancel := context.WithTimeout(dbauthz.AsPrebuildsOrchestrator(context.Background()), 10*time.Second) - defer cancel() - prebuildMetrics, err := mc.database.GetPrebuildMetrics(ctx) - if err != nil { - mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err)) + ctx := dbauthz.AsPrebuildsOrchestrator(context.Background()) + + currentState := mc.latestState.Load() + if currentState == nil { + mc.logger.Warn(ctx, "failed to set prebuilds metrics; state not set") return } - for _, metric := range prebuildMetrics { + for _, metric := range currentState.prebuildMetrics { metricsCh <- prometheus.MustNewConstMetric(createdPrebuildsDesc, prometheus.CounterValue, float64(metric.CreatedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) metricsCh <- prometheus.MustNewConstMetric(failedPrebuildsDesc, prometheus.CounterValue, float64(metric.FailedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) metricsCh <- prometheus.MustNewConstMetric(claimedPrebuildsDesc, prometheus.CounterValue, float64(metric.ClaimedCount), metric.TemplateName, metric.PresetName, metric.OrganizationName) } - snapshot, err := mc.snapshotter.SnapshotState(ctx, mc.database) - if err != nil { - mc.logger.Error(ctx, "failed to get latest prebuild state", slog.Error(err)) - return - } - - for _, preset := range snapshot.Presets { + for _, preset := range currentState.snapshot.Presets { if !preset.UsingActiveVersion { continue } - presetSnapshot, err := snapshot.FilterByPreset(preset.ID) + presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID) if err != nil { mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err)) continue @@ -121,3 +129,52 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName) } } + +type state struct { + prebuildMetrics []database.GetPrebuildMetricsRow + snapshot *prebuilds.GlobalSnapshot +} + +// BackgroundFetch updates the metrics state every given interval. +func (mc *MetricsCollector) BackgroundFetch(ctx context.Context, updateInterval, updateTimeout time.Duration) { + tick := time.NewTicker(time.Nanosecond) + defer tick.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-tick.C: + // Tick immediately, then set regular interval. + tick.Reset(updateInterval) + + if err := mc.UpdateState(ctx, updateTimeout); err != nil { + mc.logger.Error(ctx, "failed to update prebuilds metrics state", slog.Error(err)) + } + } + } +} + +// UpdateState builds the current metrics state. +func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Duration) error { + mc.logger.Debug(ctx, "fetching prebuilds metrics state") + fetchCtx, fetchCancel := context.WithTimeout(ctx, timeout) + defer fetchCancel() + + prebuildMetrics, err := mc.database.GetPrebuildMetrics(fetchCtx) + if err != nil { + return xerrors.Errorf("fetch prebuild metrics: %w", err) + } + + snapshot, err := mc.snapshotter.SnapshotState(fetchCtx, mc.database) + if err != nil { + return xerrors.Errorf("snapshot state: %w", err) + } + mc.logger.Debug(ctx, "fetched prebuilds metrics state") + + mc.latestState.Store(&state{ + prebuildMetrics: prebuildMetrics, + snapshot: snapshot, + }) + return nil +} diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go index 859509ced6635..de3f5d017f715 100644 --- a/enterprise/coderd/prebuilds/metricscollector_test.go +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -16,6 +16,7 @@ import ( "github.com/coder/quartz" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds" @@ -248,6 +249,10 @@ func TestMetricsCollector(t *testing.T) { setupTestDBWorkspaceAgent(t, db, workspace.ID, eligible) } + // Force an update to the metrics state to allow the collector to collect fresh metrics. + // nolint:gocritic // Authz context needed to retrieve state. + require.NoError(t, collector.UpdateState(dbauthz.AsPrebuildsOrchestrator(ctx), testutil.WaitLong)) + metricsFamilies, err := registry.Gather() require.NoError(t, err) diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index c31da695637ba..ca02ee4218678 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -97,6 +97,11 @@ func (c *StoreReconciler) Run(ctx context.Context) { ctx, cancel := context.WithCancelCause(dbauthz.AsPrebuildsOrchestrator(ctx)) c.cancelFn = cancel + // Start updating metrics in the background. + if c.metrics != nil { + go c.metrics.BackgroundFetch(ctx, metricsUpdateInterval, metricsUpdateTimeout) + } + // Everything is in place, reconciler can now be considered as running. // // NOTE: without this atomic bool, Stop might race with Run for the c.cancelFn above. From fcbfb7fed2dc80bb2918f857134c3dfc8b1e632b Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 13 May 2025 14:10:15 +0200 Subject: [PATCH 2/3] chore: improvements Signed-off-by: Danny Kopping --- .../coderd/prebuilds/metricscollector.go | 26 ++++++++++++------- enterprise/coderd/prebuilds/reconcile.go | 9 ++++++- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index 3452f9af0426b..c7ee95a04d787 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -2,6 +2,7 @@ package prebuilds import ( "context" + "fmt" "sync/atomic" "time" @@ -11,7 +12,6 @@ import ( "cdr.dev/slog" "github.com/coder/coder/v2/coderd/database" - "github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/prebuilds" ) @@ -57,6 +57,12 @@ var ( labels, nil, ) + lastUpdateDesc = prometheus.NewDesc( + "coderd_prebuilt_workspaces_metrics_last_updated", + "The unix timestamp when the metrics related to prebuilt workspaces were last updated; these metrics are cached.", + []string{}, + nil, + ) ) const ( @@ -74,7 +80,6 @@ type MetricsCollector struct { var _ prometheus.Collector = new(MetricsCollector) -// NewMetricsCollector returns a func NewMetricsCollector(db database.Store, logger slog.Logger, snapshotter prebuilds.StateSnapshotter) *MetricsCollector { log := logger.Named("prebuilds_metrics_collector") return &MetricsCollector{ @@ -91,18 +96,16 @@ func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) { descCh <- desiredPrebuildsDesc descCh <- runningPrebuildsDesc descCh <- eligiblePrebuildsDesc + descCh <- lastUpdateDesc } // Collect uses the cached state to set configured metrics. // The state is cached because this function can be called multiple times per second and retrieving the current state // is an expensive operation. func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { - // nolint:gocritic // We need to set an authz context to read metrics from the db. - ctx := dbauthz.AsPrebuildsOrchestrator(context.Background()) - - currentState := mc.latestState.Load() + currentState := mc.latestState.Load() // Grab a copy; it's ok if it goes stale during the course of this func. if currentState == nil { - mc.logger.Warn(ctx, "failed to set prebuilds metrics; state not set") + mc.logger.Warn(context.Background(), "failed to set prebuilds metrics; state not set") return } @@ -119,7 +122,7 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { presetSnapshot, err := currentState.snapshot.FilterByPreset(preset.ID) if err != nil { - mc.logger.Error(ctx, "failed to filter by preset", slog.Error(err)) + mc.logger.Error(context.Background(), "failed to filter by preset", slog.Error(err)) continue } state := presetSnapshot.CalculateState() @@ -128,11 +131,14 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { metricsCh <- prometheus.MustNewConstMetric(runningPrebuildsDesc, prometheus.GaugeValue, float64(state.Actual), preset.TemplateName, preset.Name, preset.OrganizationName) metricsCh <- prometheus.MustNewConstMetric(eligiblePrebuildsDesc, prometheus.GaugeValue, float64(state.Eligible), preset.TemplateName, preset.Name, preset.OrganizationName) } + + metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix())) } type state struct { prebuildMetrics []database.GetPrebuildMetricsRow snapshot *prebuilds.GlobalSnapshot + createdAt time.Time } // BackgroundFetch updates the metrics state every given interval. @@ -157,6 +163,7 @@ func (mc *MetricsCollector) BackgroundFetch(ctx context.Context, updateInterval, // UpdateState builds the current metrics state. func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Duration) error { + start := time.Now() mc.logger.Debug(ctx, "fetching prebuilds metrics state") fetchCtx, fetchCancel := context.WithTimeout(ctx, timeout) defer fetchCancel() @@ -170,11 +177,12 @@ func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Durati if err != nil { return xerrors.Errorf("snapshot state: %w", err) } - mc.logger.Debug(ctx, "fetched prebuilds metrics state") + mc.logger.Debug(ctx, "fetched prebuilds metrics state", slog.F("duration_secs", fmt.Sprintf("%.2f", time.Since(start).Seconds()))) mc.latestState.Store(&state{ prebuildMetrics: prebuildMetrics, snapshot: snapshot, + createdAt: time.Now(), }) return nil } diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index ca02ee4218678..df0007246bdc6 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -5,6 +5,7 @@ import ( "database/sql" "fmt" "math" + "sync" "sync/atomic" "time" @@ -87,10 +88,12 @@ func (c *StoreReconciler) Run(ctx context.Context) { slog.F("backoff_interval", c.cfg.ReconciliationBackoffInterval.String()), slog.F("backoff_lookback", c.cfg.ReconciliationBackoffLookback.String())) + var wg sync.WaitGroup ticker := c.clock.NewTicker(reconciliationInterval) defer ticker.Stop() defer func() { c.done <- struct{}{} + wg.Wait() }() // nolint:gocritic // Reconciliation Loop needs Prebuilds Orchestrator permissions. @@ -99,7 +102,11 @@ func (c *StoreReconciler) Run(ctx context.Context) { // Start updating metrics in the background. if c.metrics != nil { - go c.metrics.BackgroundFetch(ctx, metricsUpdateInterval, metricsUpdateTimeout) + wg.Add(1) + go func() { + defer wg.Done() + c.metrics.BackgroundFetch(ctx, metricsUpdateInterval, metricsUpdateTimeout) + }() } // Everything is in place, reconciler can now be considered as running. From 35cebf6c5fc314d26de89459a9308f2e1c2452ca Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 13 May 2025 17:03:07 +0200 Subject: [PATCH 3/3] chore: review feedback Signed-off-by: Danny Kopping --- enterprise/coderd/prebuilds/metricscollector.go | 11 ++++++----- enterprise/coderd/prebuilds/reconcile.go | 12 +++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go index c7ee95a04d787..76089c025243d 100644 --- a/enterprise/coderd/prebuilds/metricscollector.go +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -12,6 +12,7 @@ import ( "cdr.dev/slog" "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbtime" "github.com/coder/coder/v2/coderd/prebuilds" ) @@ -75,7 +76,7 @@ type MetricsCollector struct { logger slog.Logger snapshotter prebuilds.StateSnapshotter - latestState atomic.Pointer[state] + latestState atomic.Pointer[metricsState] } var _ prometheus.Collector = new(MetricsCollector) @@ -106,6 +107,7 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { currentState := mc.latestState.Load() // Grab a copy; it's ok if it goes stale during the course of this func. if currentState == nil { mc.logger.Warn(context.Background(), "failed to set prebuilds metrics; state not set") + metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, 0) return } @@ -135,7 +137,7 @@ func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { metricsCh <- prometheus.MustNewConstMetric(lastUpdateDesc, prometheus.GaugeValue, float64(currentState.createdAt.Unix())) } -type state struct { +type metricsState struct { prebuildMetrics []database.GetPrebuildMetricsRow snapshot *prebuilds.GlobalSnapshot createdAt time.Time @@ -164,7 +166,6 @@ func (mc *MetricsCollector) BackgroundFetch(ctx context.Context, updateInterval, // UpdateState builds the current metrics state. func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Duration) error { start := time.Now() - mc.logger.Debug(ctx, "fetching prebuilds metrics state") fetchCtx, fetchCancel := context.WithTimeout(ctx, timeout) defer fetchCancel() @@ -179,10 +180,10 @@ func (mc *MetricsCollector) UpdateState(ctx context.Context, timeout time.Durati } mc.logger.Debug(ctx, "fetched prebuilds metrics state", slog.F("duration_secs", fmt.Sprintf("%.2f", time.Since(start).Seconds()))) - mc.latestState.Store(&state{ + mc.latestState.Store(&metricsState{ prebuildMetrics: prebuildMetrics, snapshot: snapshot, - createdAt: time.Now(), + createdAt: dbtime.Now(), }) return nil } diff --git a/enterprise/coderd/prebuilds/reconcile.go b/enterprise/coderd/prebuilds/reconcile.go index df0007246bdc6..79a8baa337e72 100644 --- a/enterprise/coderd/prebuilds/reconcile.go +++ b/enterprise/coderd/prebuilds/reconcile.go @@ -68,10 +68,12 @@ func NewStoreReconciler(store database.Store, provisionNotifyCh: make(chan database.ProvisionerJob, 10), } - reconciler.metrics = NewMetricsCollector(store, logger, reconciler) - if err := registerer.Register(reconciler.metrics); err != nil { - // If the registerer fails to register the metrics collector, it's not fatal. - logger.Error(context.Background(), "failed to register prometheus metrics", slog.Error(err)) + if registerer != nil { + reconciler.metrics = NewMetricsCollector(store, logger, reconciler) + if err := registerer.Register(reconciler.metrics); err != nil { + // If the registerer fails to register the metrics collector, it's not fatal. + logger.Error(context.Background(), "failed to register prometheus metrics", slog.Error(err)) + } } return reconciler @@ -92,8 +94,8 @@ func (c *StoreReconciler) Run(ctx context.Context) { ticker := c.clock.NewTicker(reconciliationInterval) defer ticker.Stop() defer func() { - c.done <- struct{}{} wg.Wait() + c.done <- struct{}{} }() // nolint:gocritic // Reconciliation Loop needs Prebuilds Orchestrator permissions. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy