Skip to content

Commit ccfffc6

Browse files
authored
chore: add tx metrics and logs for serialization errors (#15215)
Before db_metrics were all or nothing. Now `InTx` metrics are always recorded, and query metrics are opt in. Adds instrumentation & logging around serialization failures in the database.
1 parent df34858 commit ccfffc6

File tree

32 files changed

+3124
-2801
lines changed

32 files changed

+3124
-2801
lines changed

cli/server.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,9 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
718718
}
719719

720720
if options.DeploymentValues.Prometheus.Enable && options.DeploymentValues.Prometheus.CollectDBMetrics {
721-
options.Database = dbmetrics.New(options.Database, options.PrometheusRegistry)
721+
options.Database = dbmetrics.NewQueryMetrics(options.Database, options.Logger, options.PrometheusRegistry)
722+
} else {
723+
options.Database = dbmetrics.NewDBMetrics(options.Database, options.Logger, options.PrometheusRegistry)
722724
}
723725

724726
var deploymentID string

cli/testdata/coder_server_--help.golden

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ INTROSPECTION / PROMETHEUS OPTIONS:
145145
Collect agent stats (may increase charges for metrics storage).
146146

147147
--prometheus-collect-db-metrics bool, $CODER_PROMETHEUS_COLLECT_DB_METRICS (default: false)
148-
Collect database metrics (may increase charges for metrics storage).
148+
Collect database query metrics (may increase charges for metrics
149+
storage). If set to false, a reduced set of database metrics are still
150+
collected.
149151

150152
--prometheus-enable bool, $CODER_PROMETHEUS_ENABLE
151153
Serve prometheus metrics on the address defined by prometheus address.

cli/testdata/server-config.yaml.golden

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ introspection:
197197
- template_name
198198
- username
199199
- workspace_name
200-
# Collect database metrics (may increase charges for metrics storage).
200+
# Collect database query metrics (may increase charges for metrics storage). If
201+
# set to false, a reduced set of database metrics are still collected.
201202
# (default: false, type: bool)
202203
collect_db_metrics: false
203204
pprof:

coderd/autobuild/lifecycle_executor.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,10 @@ func (e *Executor) runOnce(t time.Time) Stats {
285285

286286
// Run with RepeatableRead isolation so that the build process sees the same data
287287
// as our calculation that determines whether an autobuild is necessary.
288-
}, &sql.TxOptions{Isolation: sql.LevelRepeatableRead})
288+
}, &database.TxOptions{
289+
Isolation: sql.LevelRepeatableRead,
290+
TxIdentifier: "lifecycle",
291+
})
289292
if auditLog != nil {
290293
// If the transition didn't succeed then updating the workspace
291294
// to indicate dormant didn't either.

coderd/coderdtest/promhelp/doc.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Package promhelp provides helper functions for asserting Prometheus
2+
// metric values in unit tests.
3+
package promhelp

coderd/coderdtest/promhelp/metrics.go

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package promhelp
2+
3+
import (
4+
"context"
5+
"io"
6+
"maps"
7+
"net/http"
8+
"net/http/httptest"
9+
"strings"
10+
"testing"
11+
12+
"github.com/prometheus/client_golang/prometheus"
13+
"github.com/prometheus/client_golang/prometheus/promhttp"
14+
ptestutil "github.com/prometheus/client_golang/prometheus/testutil"
15+
io_prometheus_client "github.com/prometheus/client_model/go"
16+
"github.com/stretchr/testify/require"
17+
)
18+
19+
// RegistryDump returns the http page for a given registry's metrics.
20+
// Very useful for visual debugging.
21+
func RegistryDump(reg *prometheus.Registry) string {
22+
h := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
23+
rec := httptest.NewRecorder()
24+
req, _ := http.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil)
25+
h.ServeHTTP(rec, req)
26+
resp := rec.Result()
27+
data, _ := io.ReadAll(resp.Body)
28+
_ = resp.Body.Close()
29+
return string(data)
30+
}
31+
32+
// Compare can be used to compare a registry to some prometheus formatted
33+
// text. If any values differ, an error is returned.
34+
// If metric names are passed in, only those metrics will be compared.
35+
// Usage: `Compare(reg, RegistryDump(reg))`
36+
func Compare(reg prometheus.Gatherer, compare string, metricNames ...string) error {
37+
return ptestutil.GatherAndCompare(reg, strings.NewReader(compare), metricNames...)
38+
}
39+
40+
// HistogramValue returns the value of a histogram metric with the given name and labels.
41+
func HistogramValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Histogram {
42+
t.Helper()
43+
44+
labeled := MetricValue(t, reg, metricName, labels)
45+
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
46+
return labeled.GetHistogram()
47+
}
48+
49+
// GaugeValue returns the value of a gauge metric with the given name and labels.
50+
func GaugeValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
51+
t.Helper()
52+
53+
labeled := MetricValue(t, reg, metricName, labels)
54+
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
55+
return int(labeled.GetGauge().GetValue())
56+
}
57+
58+
// CounterValue returns the value of a counter metric with the given name and labels.
59+
func CounterValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) int {
60+
t.Helper()
61+
62+
labeled := MetricValue(t, reg, metricName, labels)
63+
require.NotNilf(t, labeled, "metric %q with labels %v not found", metricName, labels)
64+
return int(labeled.GetCounter().GetValue())
65+
}
66+
67+
func MetricValue(t testing.TB, reg prometheus.Gatherer, metricName string, labels prometheus.Labels) *io_prometheus_client.Metric {
68+
t.Helper()
69+
70+
metrics, err := reg.Gather()
71+
require.NoError(t, err)
72+
73+
for _, m := range metrics {
74+
if m.GetName() == metricName {
75+
for _, labeled := range m.GetMetric() {
76+
mLabels := make(prometheus.Labels)
77+
for _, v := range labeled.GetLabel() {
78+
mLabels[v.GetName()] = v.GetValue()
79+
}
80+
if maps.Equal(mLabels, labels) {
81+
return labeled
82+
}
83+
}
84+
}
85+
}
86+
return nil
87+
}

coderd/cryptokeys/rotate.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,9 @@ func (k *rotator) rotateKeys(ctx context.Context) error {
161161
}
162162
}
163163
return nil
164-
}, &sql.TxOptions{
165-
Isolation: sql.LevelRepeatableRead,
164+
}, &database.TxOptions{
165+
Isolation: sql.LevelRepeatableRead,
166+
TxIdentifier: "rotate_keys",
166167
})
167168
}
168169

coderd/database/db.go

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type Store interface {
2828
wrapper
2929

3030
Ping(ctx context.Context) (time.Duration, error)
31-
InTx(func(Store) error, *sql.TxOptions) error
31+
InTx(func(Store) error, *TxOptions) error
3232
}
3333

3434
type wrapper interface {
@@ -57,6 +57,43 @@ func New(sdb *sql.DB) Store {
5757
}
5858
}
5959

60+
// TxOptions is used to pass some execution metadata to the callers.
61+
// Ideally we could throw this into a context, but no context is used for
62+
// transactions. So instead, the return context is attached to the options
63+
// passed in.
64+
// This metadata should not be returned in the method signature, because it
65+
// is only used for metric tracking. It should never be used by business logic.
66+
type TxOptions struct {
67+
// Isolation is the transaction isolation level.
68+
// If zero, the driver or database's default level is used.
69+
Isolation sql.IsolationLevel
70+
ReadOnly bool
71+
72+
// -- Coder specific metadata --
73+
// TxIdentifier is a unique identifier for the transaction to be used
74+
// in metrics. Can be any string.
75+
TxIdentifier string
76+
77+
// Set by InTx
78+
executionCount int
79+
}
80+
81+
// IncrementExecutionCount is a helper function for external packages
82+
// to increment the unexported count.
83+
// Mainly for `dbmem`.
84+
func IncrementExecutionCount(opts *TxOptions) {
85+
opts.executionCount++
86+
}
87+
88+
func (o TxOptions) ExecutionCount() int {
89+
return o.executionCount
90+
}
91+
92+
func (o *TxOptions) WithID(id string) *TxOptions {
93+
o.TxIdentifier = id
94+
return o
95+
}
96+
6097
// queries encompasses both are sqlc generated
6198
// queries and our custom queries.
6299
type querier interface {
@@ -80,25 +117,39 @@ func (q *sqlQuerier) Ping(ctx context.Context) (time.Duration, error) {
80117
return time.Since(start), err
81118
}
82119

83-
func (q *sqlQuerier) InTx(function func(Store) error, txOpts *sql.TxOptions) error {
120+
func DefaultTXOptions() *TxOptions {
121+
return &TxOptions{
122+
Isolation: sql.LevelDefault,
123+
ReadOnly: false,
124+
}
125+
}
126+
127+
func (q *sqlQuerier) InTx(function func(Store) error, txOpts *TxOptions) error {
84128
_, inTx := q.db.(*sqlx.Tx)
85-
isolation := sql.LevelDefault
86-
if txOpts != nil {
87-
isolation = txOpts.Isolation
129+
130+
if txOpts == nil {
131+
// create a default txOpts if left to nil
132+
txOpts = DefaultTXOptions()
133+
}
134+
135+
sqlOpts := &sql.TxOptions{
136+
Isolation: txOpts.Isolation,
137+
ReadOnly: txOpts.ReadOnly,
88138
}
89139

90140
// If we are not already in a transaction, and we are running in serializable
91141
// mode, we need to run the transaction in a retry loop. The caller should be
92142
// prepared to allow retries if using serializable mode.
93143
// If we are in a transaction already, the parent InTx call will handle the retry.
94144
// We do not want to duplicate those retries.
95-
if !inTx && isolation == sql.LevelSerializable {
145+
if !inTx && sqlOpts.Isolation == sql.LevelSerializable {
96146
// This is an arbitrarily chosen number.
97147
const retryAmount = 3
98148
var err error
99149
attempts := 0
100150
for attempts = 0; attempts < retryAmount; attempts++ {
101-
err = q.runTx(function, txOpts)
151+
txOpts.executionCount++
152+
err = q.runTx(function, sqlOpts)
102153
if err == nil {
103154
// Transaction succeeded.
104155
return nil
@@ -111,7 +162,9 @@ func (q *sqlQuerier) InTx(function func(Store) error, txOpts *sql.TxOptions) err
111162
// Transaction kept failing in serializable mode.
112163
return xerrors.Errorf("transaction failed after %d attempts: %w", attempts, err)
113164
}
114-
return q.runTx(function, txOpts)
165+
166+
txOpts.executionCount++
167+
return q.runTx(function, sqlOpts)
115168
}
116169

117170
// InTx performs database operations inside a transaction.

coderd/database/db_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ func TestSerializedRetry(t *testing.T) {
2727
db := database.New(sqlDB)
2828

2929
called := 0
30-
txOpts := &sql.TxOptions{Isolation: sql.LevelSerializable}
30+
txOpts := &database.TxOptions{Isolation: sql.LevelSerializable}
3131
err := db.InTx(func(tx database.Store) error {
3232
// Test nested error
3333
return tx.InTx(func(tx database.Store) error {

coderd/database/dbauthz/dbauthz.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ func (q *querier) Ping(ctx context.Context) (time.Duration, error) {
558558
}
559559

560560
// InTx runs the given function in a transaction.
561-
func (q *querier) InTx(function func(querier database.Store) error, txOpts *sql.TxOptions) error {
561+
func (q *querier) InTx(function func(querier database.Store) error, txOpts *database.TxOptions) error {
562562
return q.db.InTx(func(tx database.Store) error {
563563
// Wrap the transaction store in a querier.
564564
wrapped := New(tx, q.auth, q.log, q.acs)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy