Skip to content

Commit 3de98c2

Browse files
authored
feat: add prometheus metric for tracking user statuses (coder#15281)
1 parent e9fbfcc commit 3de98c2

File tree

3 files changed

+158
-3
lines changed

3 files changed

+158
-3
lines changed

cli/server.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,16 @@ func enablePrometheus(
212212
options.PrometheusRegistry.MustRegister(collectors.NewGoCollector())
213213
options.PrometheusRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
214214

215-
closeUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0)
215+
closeActiveUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.Logger.Named("active_user_metrics"), options.PrometheusRegistry, options.Database, 0)
216216
if err != nil {
217217
return nil, xerrors.Errorf("register active users prometheus metric: %w", err)
218218
}
219+
afterCtx(ctx, closeActiveUsersFunc)
220+
221+
closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), quartz.NewReal(), options.PrometheusRegistry, options.Database, 0)
222+
if err != nil {
223+
return nil, xerrors.Errorf("register users prometheus metric: %w", err)
224+
}
219225
afterCtx(ctx, closeUsersFunc)
220226

221227
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)

coderd/prometheusmetrics/prometheusmetrics.go

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/google/uuid"
1414
"github.com/prometheus/client_golang/prometheus"
15+
"golang.org/x/xerrors"
1516
"tailscale.com/tailcfg"
1617

1718
"cdr.dev/slog"
@@ -22,12 +23,13 @@ import (
2223
"github.com/coder/coder/v2/coderd/database/dbtime"
2324
"github.com/coder/coder/v2/codersdk"
2425
"github.com/coder/coder/v2/tailnet"
26+
"github.com/coder/quartz"
2527
)
2628

2729
const defaultRefreshRate = time.Minute
2830

2931
// ActiveUsers tracks the number of users that have authenticated within the past hour.
30-
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
32+
func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
3133
if duration == 0 {
3234
duration = defaultRefreshRate
3335
}
@@ -58,6 +60,7 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
5860

5961
apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour))
6062
if err != nil {
63+
logger.Error(ctx, "get api keys for active users prometheus metric", slog.Error(err))
6164
continue
6265
}
6366
distinctUsers := map[uuid.UUID]struct{}{}
@@ -73,6 +76,57 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
7376
}, nil
7477
}
7578

79+
// Users tracks the total number of registered users, partitioned by status.
80+
func Users(ctx context.Context, logger slog.Logger, clk quartz.Clock, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
81+
if duration == 0 {
82+
// It's not super important this tracks real-time.
83+
duration = defaultRefreshRate * 5
84+
}
85+
86+
gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
87+
Namespace: "coderd",
88+
Subsystem: "api",
89+
Name: "total_user_count",
90+
Help: "The total number of registered users, partitioned by status.",
91+
}, []string{"status"})
92+
err := registerer.Register(gauge)
93+
if err != nil {
94+
return nil, xerrors.Errorf("register total_user_count gauge: %w", err)
95+
}
96+
97+
ctx, cancelFunc := context.WithCancel(ctx)
98+
done := make(chan struct{})
99+
ticker := clk.NewTicker(duration)
100+
go func() {
101+
defer close(done)
102+
defer ticker.Stop()
103+
for {
104+
select {
105+
case <-ctx.Done():
106+
return
107+
case <-ticker.C:
108+
}
109+
110+
gauge.Reset()
111+
//nolint:gocritic // This is a system service that needs full access
112+
//to the users table.
113+
users, err := db.GetUsers(dbauthz.AsSystemRestricted(ctx), database.GetUsersParams{})
114+
if err != nil {
115+
logger.Error(ctx, "get all users for prometheus metrics", slog.Error(err))
116+
continue
117+
}
118+
119+
for _, user := range users {
120+
gauge.WithLabelValues(string(user.Status)).Inc()
121+
}
122+
}
123+
}()
124+
return func() {
125+
cancelFunc()
126+
<-done
127+
}, nil
128+
}
129+
76130
// Workspaces tracks the total number of workspaces with labels on status.
77131
func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
78132
if duration == 0 {

coderd/prometheusmetrics/prometheusmetrics_test.go

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"github.com/coder/coder/v2/tailnet"
3939
"github.com/coder/coder/v2/tailnet/tailnettest"
4040
"github.com/coder/coder/v2/testutil"
41+
"github.com/coder/quartz"
4142
)
4243

4344
func TestActiveUsers(t *testing.T) {
@@ -98,7 +99,7 @@ func TestActiveUsers(t *testing.T) {
9899
t.Run(tc.Name, func(t *testing.T) {
99100
t.Parallel()
100101
registry := prometheus.NewRegistry()
101-
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), registry, tc.Database(t), time.Millisecond)
102+
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond)
102103
require.NoError(t, err)
103104
t.Cleanup(closeFunc)
104105

@@ -112,6 +113,100 @@ func TestActiveUsers(t *testing.T) {
112113
}
113114
}
114115

116+
func TestUsers(t *testing.T) {
117+
t.Parallel()
118+
119+
for _, tc := range []struct {
120+
Name string
121+
Database func(t *testing.T) database.Store
122+
Count map[database.UserStatus]int
123+
}{{
124+
Name: "None",
125+
Database: func(t *testing.T) database.Store {
126+
return dbmem.New()
127+
},
128+
Count: map[database.UserStatus]int{},
129+
}, {
130+
Name: "One",
131+
Database: func(t *testing.T) database.Store {
132+
db := dbmem.New()
133+
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
134+
return db
135+
},
136+
Count: map[database.UserStatus]int{database.UserStatusActive: 1},
137+
}, {
138+
Name: "MultipleStatuses",
139+
Database: func(t *testing.T) database.Store {
140+
db := dbmem.New()
141+
142+
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
143+
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
144+
145+
return db
146+
},
147+
Count: map[database.UserStatus]int{database.UserStatusActive: 1, database.UserStatusDormant: 1},
148+
}, {
149+
Name: "MultipleActive",
150+
Database: func(t *testing.T) database.Store {
151+
db := dbmem.New()
152+
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
153+
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
154+
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
155+
return db
156+
},
157+
Count: map[database.UserStatus]int{database.UserStatusActive: 3},
158+
}} {
159+
tc := tc
160+
t.Run(tc.Name, func(t *testing.T) {
161+
t.Parallel()
162+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
163+
defer cancel()
164+
165+
registry := prometheus.NewRegistry()
166+
mClock := quartz.NewMock(t)
167+
db := tc.Database(t)
168+
closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), mClock, registry, db, time.Millisecond)
169+
require.NoError(t, err)
170+
t.Cleanup(closeFunc)
171+
172+
_, w := mClock.AdvanceNext()
173+
w.MustWait(ctx)
174+
175+
checkFn := func() bool {
176+
metrics, err := registry.Gather()
177+
if err != nil {
178+
return false
179+
}
180+
181+
// If we get no metrics and we know none should exist, bail
182+
// early. If we get no metrics but we expect some, retry.
183+
if len(metrics) == 0 {
184+
return len(tc.Count) == 0
185+
}
186+
187+
for _, metric := range metrics[0].Metric {
188+
if tc.Count[database.UserStatus(*metric.Label[0].Value)] != int(metric.Gauge.GetValue()) {
189+
return false
190+
}
191+
}
192+
193+
return true
194+
}
195+
196+
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
197+
198+
// Add another dormant user and ensure it updates
199+
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
200+
tc.Count[database.UserStatusDormant]++
201+
202+
_, w = mClock.AdvanceNext()
203+
w.MustWait(ctx)
204+
205+
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
206+
})
207+
}
208+
}
209+
115210
func TestWorkspaceLatestBuildTotals(t *testing.T) {
116211
t.Parallel()
117212

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy