Skip to content

Commit f016901

Browse files
coadlerpull[bot]
authored andcommitted
fix: routinely ping agent websocket to ensure liveness (#5824)
1 parent e41a389 commit f016901

File tree

4 files changed

+42
-4
lines changed

4 files changed

+42
-4
lines changed

agent/agent.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,9 @@ func (a *agent) createTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) (_
430430
// runCoordinator runs a coordinator and returns whether a reconnect
431431
// should occur.
432432
func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error {
433+
ctx, cancel := context.WithCancel(ctx)
434+
defer cancel()
435+
433436
coordinator, err := a.client.ListenWorkspaceAgent(ctx)
434437
if err != nil {
435438
return err

cli/agent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ func workspaceAgent() *cobra.Command {
8383
slog.F("version", version),
8484
)
8585
client := codersdk.New(coderURL)
86+
client.Logger = logger
8687
// Set a reasonable timeout so requests can't hang forever!
8788
client.HTTPClient.Timeout = 10 * time.Second
8889

codersdk/workspaceagents.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,42 @@ func (c *Client) ListenWorkspaceAgent(ctx context.Context) (net.Conn, error) {
340340
return nil, readBodyAsError(res)
341341
}
342342

343+
// Ping once every 30 seconds to ensure that the websocket is alive. If we
344+
// don't get a response within 30s we kill the websocket and reconnect.
345+
// See: https://github.com/coder/coder/pull/5824
346+
go func() {
347+
tick := 30 * time.Second
348+
ticker := time.NewTicker(tick)
349+
defer ticker.Stop()
350+
defer func() {
351+
c.Logger.Debug(ctx, "coordinate pinger exited")
352+
}()
353+
for {
354+
select {
355+
case <-ctx.Done():
356+
return
357+
case start := <-ticker.C:
358+
ctx, cancel := context.WithTimeout(ctx, tick)
359+
360+
err := conn.Ping(ctx)
361+
if err != nil {
362+
c.Logger.Error(ctx, "workspace agent coordinate ping", slog.Error(err))
363+
364+
err := conn.Close(websocket.StatusGoingAway, "Ping failed")
365+
if err != nil {
366+
c.Logger.Error(ctx, "close workspace agent coordinate websocket", slog.Error(err))
367+
}
368+
369+
cancel()
370+
return
371+
}
372+
373+
c.Logger.Debug(ctx, "got coordinate pong", slog.F("took", time.Since(start)))
374+
cancel()
375+
}
376+
}
377+
}()
378+
343379
return websocket.NetConn(ctx, conn, websocket.MessageBinary), nil
344380
}
345381

provisionerd/provisionerd_test.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ import (
1212
"testing"
1313
"time"
1414

15-
"github.com/coder/coder/provisionerd/runner"
16-
"github.com/coder/coder/testutil"
17-
1815
"github.com/hashicorp/yamux"
1916
"github.com/stretchr/testify/assert"
2017
"github.com/stretchr/testify/require"
@@ -26,11 +23,12 @@ import (
2623

2724
"cdr.dev/slog"
2825
"cdr.dev/slog/sloggers/slogtest"
29-
3026
"github.com/coder/coder/provisionerd"
3127
"github.com/coder/coder/provisionerd/proto"
28+
"github.com/coder/coder/provisionerd/runner"
3229
"github.com/coder/coder/provisionersdk"
3330
sdkproto "github.com/coder/coder/provisionersdk/proto"
31+
"github.com/coder/coder/testutil"
3432
)
3533

3634
func TestMain(m *testing.M) {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy