Skip to content

Commit f825477

Browse files
authored
fix: add timeouts to test telemetry snapshot (#17879)
This PR ensures that waits on channels will time out according to the test context, rather than waiting indefinitely. This should alleviate the panic seen in coder/internal#645 and, if the deadlock recurs, allow the test to be retried automatically in CI.
1 parent 5a3a7fc commit f825477

File tree

2 files changed

+89
-13
lines changed

2 files changed

+89
-13
lines changed

coderd/telemetry/telemetry_test.go

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package telemetry_test
22

33
import (
4+
"context"
45
"database/sql"
56
"encoding/json"
67
"net/http"
@@ -115,7 +116,7 @@ func TestTelemetry(t *testing.T) {
115116
_ = dbgen.WorkspaceAgentMemoryResourceMonitor(t, db, database.WorkspaceAgentMemoryResourceMonitor{})
116117
_ = dbgen.WorkspaceAgentVolumeResourceMonitor(t, db, database.WorkspaceAgentVolumeResourceMonitor{})
117118

118-
_, snapshot := collectSnapshot(t, db, nil)
119+
_, snapshot := collectSnapshot(ctx, t, db, nil)
119120
require.Len(t, snapshot.ProvisionerJobs, 1)
120121
require.Len(t, snapshot.Licenses, 1)
121122
require.Len(t, snapshot.Templates, 1)
@@ -168,17 +169,19 @@ func TestTelemetry(t *testing.T) {
168169
})
169170
t.Run("HashedEmail", func(t *testing.T) {
170171
t.Parallel()
172+
ctx := testutil.Context(t, testutil.WaitMedium)
171173
db := dbmem.New()
172174
_ = dbgen.User(t, db, database.User{
173175
Email: "kyle@coder.com",
174176
})
175-
_, snapshot := collectSnapshot(t, db, nil)
177+
_, snapshot := collectSnapshot(ctx, t, db, nil)
176178
require.Len(t, snapshot.Users, 1)
177179
require.Equal(t, snapshot.Users[0].EmailHashed, "bb44bf07cf9a2db0554bba63a03d822c927deae77df101874496df5a6a3e896d@coder.com")
178180
})
179181
t.Run("HashedModule", func(t *testing.T) {
180182
t.Parallel()
181183
db, _ := dbtestutil.NewDB(t)
184+
ctx := testutil.Context(t, testutil.WaitMedium)
182185
pj := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{})
183186
_ = dbgen.WorkspaceModule(t, db, database.WorkspaceModule{
184187
JobID: pj.ID,
@@ -190,7 +193,7 @@ func TestTelemetry(t *testing.T) {
190193
Source: "https://internal-url.com/some-module",
191194
Version: "1.0.0",
192195
})
193-
_, snapshot := collectSnapshot(t, db, nil)
196+
_, snapshot := collectSnapshot(ctx, t, db, nil)
194197
require.Len(t, snapshot.WorkspaceModules, 2)
195198
modules := snapshot.WorkspaceModules
196199
sort.Slice(modules, func(i, j int) bool {
@@ -286,11 +289,11 @@ func TestTelemetry(t *testing.T) {
286289
db, _ := dbtestutil.NewDB(t)
287290

288291
// 1. No org sync settings
289-
deployment, _ := collectSnapshot(t, db, nil)
292+
deployment, _ := collectSnapshot(ctx, t, db, nil)
290293
require.False(t, *deployment.IDPOrgSync)
291294

292295
// 2. Org sync settings set in server flags
293-
deployment, _ = collectSnapshot(t, db, func(opts telemetry.Options) telemetry.Options {
296+
deployment, _ = collectSnapshot(ctx, t, db, func(opts telemetry.Options) telemetry.Options {
294297
opts.DeploymentConfig = &codersdk.DeploymentValues{
295298
OIDC: codersdk.OIDCConfig{
296299
OrganizationField: "organizations",
@@ -312,16 +315,17 @@ func TestTelemetry(t *testing.T) {
312315
AssignDefault: true,
313316
})
314317
require.NoError(t, err)
315-
deployment, _ = collectSnapshot(t, db, nil)
318+
deployment, _ = collectSnapshot(ctx, t, db, nil)
316319
require.True(t, *deployment.IDPOrgSync)
317320
})
318321
}
319322

320323
// nolint:paralleltest
321324
func TestTelemetryInstallSource(t *testing.T) {
322325
t.Setenv("CODER_TELEMETRY_INSTALL_SOURCE", "aws_marketplace")
326+
ctx := testutil.Context(t, testutil.WaitMedium)
323327
db := dbmem.New()
324-
deployment, _ := collectSnapshot(t, db, nil)
328+
deployment, _ := collectSnapshot(ctx, t, db, nil)
325329
require.Equal(t, "aws_marketplace", deployment.InstallSource)
326330
}
327331

@@ -436,7 +440,7 @@ func TestRecordTelemetryStatus(t *testing.T) {
436440
}
437441
}
438442

439-
func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
443+
func mockTelemetryServer(ctx context.Context, t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
440444
t.Helper()
441445
deployment := make(chan *telemetry.Deployment, 64)
442446
snapshot := make(chan *telemetry.Snapshot, 64)
@@ -446,7 +450,11 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
446450
dd := &telemetry.Deployment{}
447451
err := json.NewDecoder(r.Body).Decode(dd)
448452
require.NoError(t, err)
449-
deployment <- dd
453+
ok := testutil.AssertSend(ctx, t, deployment, dd)
454+
if !ok {
455+
w.WriteHeader(http.StatusInternalServerError)
456+
return
457+
}
450458
// Ensure the header is sent only after deployment is sent
451459
w.WriteHeader(http.StatusAccepted)
452460
})
@@ -455,7 +463,11 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
455463
ss := &telemetry.Snapshot{}
456464
err := json.NewDecoder(r.Body).Decode(ss)
457465
require.NoError(t, err)
458-
snapshot <- ss
466+
ok := testutil.AssertSend(ctx, t, snapshot, ss)
467+
if !ok {
468+
w.WriteHeader(http.StatusInternalServerError)
469+
return
470+
}
459471
// Ensure the header is sent only after snapshot is sent
460472
w.WriteHeader(http.StatusAccepted)
461473
})
@@ -467,10 +479,15 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch
467479
return serverURL, deployment, snapshot
468480
}
469481

470-
func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts telemetry.Options) telemetry.Options) (*telemetry.Deployment, *telemetry.Snapshot) {
482+
func collectSnapshot(
483+
ctx context.Context,
484+
t *testing.T,
485+
db database.Store,
486+
addOptionsFn func(opts telemetry.Options) telemetry.Options,
487+
) (*telemetry.Deployment, *telemetry.Snapshot) {
471488
t.Helper()
472489

473-
serverURL, deployment, snapshot := mockTelemetryServer(t)
490+
serverURL, deployment, snapshot := mockTelemetryServer(ctx, t)
474491

475492
options := telemetry.Options{
476493
Database: db,
@@ -485,5 +502,6 @@ func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts tel
485502
reporter, err := telemetry.New(options)
486503
require.NoError(t, err)
487504
t.Cleanup(reporter.Close)
488-
return <-deployment, <-snapshot
505+
506+
return testutil.RequireReceive(ctx, t, deployment), testutil.RequireReceive(ctx, t, snapshot)
489507
}

testutil/chan.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,61 @@ func RequireSend[A any](ctx context.Context, t testing.TB, c chan<- A, a A) {
5555
// OK!
5656
}
5757
}
58+
59+
// SoftTryReceive will attempt to receive a value from the chan and return it. If
60+
// the context expires before a value can be received, it will mark the test as
61+
// failed but continue execution. If the channel is closed, the zero value of the
62+
// channel type will be returned.
63+
// The second return value indicates whether the receive was successful. In
64+
// particular, if the channel is closed, the second return value will be true.
65+
//
66+
// Safety: can be called from any goroutine.
67+
func SoftTryReceive[A any](ctx context.Context, t testing.TB, c <-chan A) (A, bool) {
68+
t.Helper()
69+
select {
70+
case <-ctx.Done():
71+
t.Error("timeout")
72+
var a A
73+
return a, false
74+
case a := <-c:
75+
return a, true
76+
}
77+
}
78+
79+
// AssertReceive will receive a value from the chan and return it. If the
80+
// context expires or the channel is closed before a value can be received,
81+
// it will mark the test as failed but continue execution.
82+
// The second return value indicates whether the receive was successful.
83+
//
84+
// Safety: can be called from any goroutine.
85+
func AssertReceive[A any](ctx context.Context, t testing.TB, c <-chan A) (A, bool) {
86+
t.Helper()
87+
select {
88+
case <-ctx.Done():
89+
t.Error("timeout")
90+
var a A
91+
return a, false
92+
case a, ok := <-c:
93+
if !ok {
94+
t.Error("channel closed")
95+
}
96+
return a, ok
97+
}
98+
}
99+
100+
// AssertSend will send the given value over the chan and then return. If
101+
// the context expires before the send succeeds, it will mark the test as failed
102+
// but continue execution.
103+
// The second return value indicates whether the send was successful.
104+
//
105+
// Safety: can be called from any goroutine.
106+
func AssertSend[A any](ctx context.Context, t testing.TB, c chan<- A, a A) bool {
107+
t.Helper()
108+
select {
109+
case <-ctx.Done():
110+
t.Error("timeout")
111+
return false
112+
case c <- a:
113+
return true
114+
}
115+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy