Skip to content

Commit c01d6fd

Browse files
authored
chore: refactor apphealth and tests to use clock testing library (#13576)
Refactors the apphealth subsystem and unit tests to use `clock.Clock`. Also slightly simplifies the implementation, which wrapped a function that never returned an error in a `retry.Retry`. The retry is entirely superfluous in that case, so removed. UTs used to take a few seconds to run, and now run in milliseconds or better. No sleeps, `Eventually`, or polling. Dropped the "no spamming" test since we can directly assert the number of handler calls on the mainline test case.
1 parent fe240ad commit c01d6fd

File tree

2 files changed

+203
-169
lines changed

2 files changed

+203
-169
lines changed

agent/apphealth.go

Lines changed: 54 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,11 @@ import (
1010
"golang.org/x/xerrors"
1111

1212
"cdr.dev/slog"
13+
"github.com/coder/coder/v2/clock"
1314
"github.com/coder/coder/v2/codersdk"
1415
"github.com/coder/coder/v2/codersdk/agentsdk"
15-
"github.com/coder/retry"
1616
)
1717

18-
// WorkspaceAgentApps fetches the workspace apps.
19-
type WorkspaceAgentApps func(context.Context) ([]codersdk.WorkspaceApp, error)
20-
2118
// PostWorkspaceAgentAppHealth updates the workspace app health.
2219
type PostWorkspaceAgentAppHealth func(context.Context, agentsdk.PostAppHealthsRequest) error
2320

@@ -26,15 +23,26 @@ type WorkspaceAppHealthReporter func(ctx context.Context)
2623

2724
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
2825
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
26+
return NewAppHealthReporterWithClock(logger, apps, postWorkspaceAgentAppHealth, clock.NewReal())
27+
}
28+
29+
// NewAppHealthReporterWithClock is only called directly by test code. Product code should call
30+
// NewAppHealthReporter.
31+
func NewAppHealthReporterWithClock(
32+
logger slog.Logger,
33+
apps []codersdk.WorkspaceApp,
34+
postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth,
35+
clk clock.Clock,
36+
) WorkspaceAppHealthReporter {
2937
logger = logger.Named("apphealth")
3038

31-
runHealthcheckLoop := func(ctx context.Context) error {
39+
return func(ctx context.Context) {
3240
ctx, cancel := context.WithCancel(ctx)
3341
defer cancel()
3442

3543
// no need to run this loop if no apps for this workspace.
3644
if len(apps) == 0 {
37-
return nil
45+
return
3846
}
3947

4048
hasHealthchecksEnabled := false
@@ -49,7 +57,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
4957

5058
// no need to run this loop if no health checks are configured.
5159
if !hasHealthchecksEnabled {
52-
return nil
60+
return
5361
}
5462

5563
// run a ticker for each app health check.
@@ -61,25 +69,29 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
6169
}
6270
app := nextApp
6371
go func() {
64-
t := time.NewTicker(time.Duration(app.Healthcheck.Interval) * time.Second)
65-
defer t.Stop()
66-
67-
for {
68-
select {
69-
case <-ctx.Done():
70-
return
71-
case <-t.C:
72-
}
73-
// we set the http timeout to the healthcheck interval to prevent getting too backed up.
74-
client := &http.Client{
75-
Timeout: time.Duration(app.Healthcheck.Interval) * time.Second,
76-
}
72+
_ = clk.TickerFunc(ctx, time.Duration(app.Healthcheck.Interval)*time.Second, func() error {
73+
// We time out at the healthcheck interval to prevent getting too backed up, but
74+
// set it 1ms early so that it's not simultaneous with the next tick in testing,
75+
// which makes the test easier to understand.
76+
//
77+
// It would be idiomatic to use the http.Client.Timeout or a context.WithTimeout,
78+
// but we are passing this off to the native http library, which is not aware
79+
// of the clock library we are using. That means in testing, with a mock clock
80+
// it will compare mocked times with real times, and we will get strange results.
81+
// So, we just implement the timeout as a context we cancel with an AfterFunc
82+
reqCtx, reqCancel := context.WithCancel(ctx)
83+
timeout := clk.AfterFunc(
84+
time.Duration(app.Healthcheck.Interval)*time.Second-time.Millisecond,
85+
reqCancel,
86+
"timeout", app.Slug)
87+
defer timeout.Stop()
88+
7789
err := func() error {
78-
req, err := http.NewRequestWithContext(ctx, http.MethodGet, app.Healthcheck.URL, nil)
90+
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, app.Healthcheck.URL, nil)
7991
if err != nil {
8092
return err
8193
}
82-
res, err := client.Do(req)
94+
res, err := http.DefaultClient.Do(req)
8395
if err != nil {
8496
return err
8597
}
@@ -118,54 +130,36 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
118130
mu.Unlock()
119131
logger.Debug(ctx, "workspace app healthy", slog.F("id", app.ID.String()), slog.F("slug", app.Slug))
120132
}
121-
122-
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
123-
}
133+
return nil
134+
}, "healthcheck", app.Slug)
124135
}()
125136
}
126137

127138
mu.Lock()
128139
lastHealth := copyHealth(health)
129140
mu.Unlock()
130-
reportTicker := time.NewTicker(time.Second)
131-
defer reportTicker.Stop()
132-
// every second we check if the health values of the apps have changed
133-
// and if there is a change we will report the new values.
134-
for {
135-
select {
136-
case <-ctx.Done():
141+
reportTicker := clk.TickerFunc(ctx, time.Second, func() error {
142+
mu.RLock()
143+
changed := healthChanged(lastHealth, health)
144+
mu.RUnlock()
145+
if !changed {
137146
return nil
138-
case <-reportTicker.C:
139-
mu.RLock()
140-
changed := healthChanged(lastHealth, health)
141-
mu.RUnlock()
142-
if !changed {
143-
continue
144-
}
145-
146-
mu.Lock()
147-
lastHealth = copyHealth(health)
148-
mu.Unlock()
149-
err := postWorkspaceAgentAppHealth(ctx, agentsdk.PostAppHealthsRequest{
150-
Healths: lastHealth,
151-
})
152-
if err != nil {
153-
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
154-
} else {
155-
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
156-
}
157147
}
158-
}
159-
}
160148

161-
return func(ctx context.Context) {
162-
for r := retry.New(time.Second, 30*time.Second); r.Wait(ctx); {
163-
err := runHealthcheckLoop(ctx)
164-
if err == nil || xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
165-
return
149+
mu.Lock()
150+
lastHealth = copyHealth(health)
151+
mu.Unlock()
152+
err := postWorkspaceAgentAppHealth(ctx, agentsdk.PostAppHealthsRequest{
153+
Healths: lastHealth,
154+
})
155+
if err != nil {
156+
logger.Error(ctx, "failed to report workspace app health", slog.Error(err))
157+
} else {
158+
logger.Debug(ctx, "sent workspace app health", slog.F("health", lastHealth))
166159
}
167-
logger.Error(ctx, "failed running workspace app reporter", slog.Error(err))
168-
}
160+
return nil
161+
}, "report")
162+
_ = reportTicker.Wait() // only possible error is context done
169163
}
170164
}
171165

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy