Skip to content

Commit 4c8be34

Browse files
authored
feat: add health check monitoring to workspace apps (#4114)
1 parent f160830 commit 4c8be34

File tree

64 files changed

+1592
-509
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1592
-509
lines changed

agent/agent.go

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333

3434
"cdr.dev/slog"
3535
"github.com/coder/coder/agent/usershell"
36+
"github.com/coder/coder/codersdk"
3637
"github.com/coder/coder/pty"
3738
"github.com/coder/coder/tailnet"
3839
"github.com/coder/retry"
@@ -49,55 +50,41 @@ const (
4950
MagicSessionErrorCode = 229
5051
)
5152

52-
var (
53-
// tailnetIP is a static IPv6 address with the Tailscale prefix that is used to route
54-
// connections from clients to this node. A dynamic address is not required because a Tailnet
55-
// client only dials a single agent at a time.
56-
tailnetIP = netip.MustParseAddr("fd7a:115c:a1e0:49d6:b259:b7ac:b1b2:48f4")
57-
tailnetSSHPort = 1
58-
tailnetReconnectingPTYPort = 2
59-
tailnetSpeedtestPort = 3
60-
)
61-
6253
type Options struct {
63-
CoordinatorDialer CoordinatorDialer
64-
FetchMetadata FetchMetadata
65-
66-
StatsReporter StatsReporter
67-
ReconnectingPTYTimeout time.Duration
68-
EnvironmentVariables map[string]string
69-
Logger slog.Logger
70-
}
71-
72-
type Metadata struct {
73-
DERPMap *tailcfg.DERPMap `json:"derpmap"`
74-
EnvironmentVariables map[string]string `json:"environment_variables"`
75-
StartupScript string `json:"startup_script"`
76-
Directory string `json:"directory"`
54+
CoordinatorDialer CoordinatorDialer
55+
FetchMetadata FetchMetadata
56+
StatsReporter StatsReporter
57+
WorkspaceAgentApps WorkspaceAgentApps
58+
PostWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth
59+
ReconnectingPTYTimeout time.Duration
60+
EnvironmentVariables map[string]string
61+
Logger slog.Logger
7762
}
7863

7964
// CoordinatorDialer is a function that constructs a new broker.
8065
// A dialer must be passed in to allow for reconnects.
81-
type CoordinatorDialer func(ctx context.Context) (net.Conn, error)
66+
type CoordinatorDialer func(context.Context) (net.Conn, error)
8267

8368
// FetchMetadata is a function to obtain metadata for the agent.
84-
type FetchMetadata func(ctx context.Context) (Metadata, error)
69+
type FetchMetadata func(context.Context) (codersdk.WorkspaceAgentMetadata, error)
8570

8671
func New(options Options) io.Closer {
8772
if options.ReconnectingPTYTimeout == 0 {
8873
options.ReconnectingPTYTimeout = 5 * time.Minute
8974
}
9075
ctx, cancelFunc := context.WithCancel(context.Background())
9176
server := &agent{
92-
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
93-
logger: options.Logger,
94-
closeCancel: cancelFunc,
95-
closed: make(chan struct{}),
96-
envVars: options.EnvironmentVariables,
97-
coordinatorDialer: options.CoordinatorDialer,
98-
fetchMetadata: options.FetchMetadata,
99-
stats: &Stats{},
100-
statsReporter: options.StatsReporter,
77+
reconnectingPTYTimeout: options.ReconnectingPTYTimeout,
78+
logger: options.Logger,
79+
closeCancel: cancelFunc,
80+
closed: make(chan struct{}),
81+
envVars: options.EnvironmentVariables,
82+
coordinatorDialer: options.CoordinatorDialer,
83+
fetchMetadata: options.FetchMetadata,
84+
stats: &Stats{},
85+
statsReporter: options.StatsReporter,
86+
workspaceAgentApps: options.WorkspaceAgentApps,
87+
postWorkspaceAgentAppHealth: options.PostWorkspaceAgentAppHealth,
10188
}
10289
server.init(ctx)
10390
return server
@@ -120,14 +107,16 @@ type agent struct {
120107
fetchMetadata FetchMetadata
121108
sshServer *ssh.Server
122109

123-
network *tailnet.Conn
124-
coordinatorDialer CoordinatorDialer
125-
stats *Stats
126-
statsReporter StatsReporter
110+
network *tailnet.Conn
111+
coordinatorDialer CoordinatorDialer
112+
stats *Stats
113+
statsReporter StatsReporter
114+
workspaceAgentApps WorkspaceAgentApps
115+
postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth
127116
}
128117

129118
func (a *agent) run(ctx context.Context) {
130-
var metadata Metadata
119+
var metadata codersdk.WorkspaceAgentMetadata
131120
var err error
132121
// An exponential back-off occurs when the connection is failing to dial.
133122
// This is to prevent server spam in case of a coderd outage.
@@ -168,6 +157,10 @@ func (a *agent) run(ctx context.Context) {
168157
if metadata.DERPMap != nil {
169158
go a.runTailnet(ctx, metadata.DERPMap)
170159
}
160+
161+
if a.workspaceAgentApps != nil && a.postWorkspaceAgentAppHealth != nil {
162+
go NewWorkspaceAppHealthReporter(a.logger, a.workspaceAgentApps, a.postWorkspaceAgentAppHealth)(ctx)
163+
}
171164
}
172165

173166
func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
@@ -182,7 +175,7 @@ func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
182175
}
183176
var err error
184177
a.network, err = tailnet.NewConn(&tailnet.Options{
185-
Addresses: []netip.Prefix{netip.PrefixFrom(tailnetIP, 128)},
178+
Addresses: []netip.Prefix{netip.PrefixFrom(codersdk.TailnetIP, 128)},
186179
DERPMap: derpMap,
187180
Logger: a.logger.Named("tailnet"),
188181
})
@@ -199,7 +192,7 @@ func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
199192
})
200193
go a.runCoordinator(ctx)
201194

202-
sshListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(tailnetSSHPort))
195+
sshListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(codersdk.TailnetSSHPort))
203196
if err != nil {
204197
a.logger.Critical(ctx, "listen for ssh", slog.Error(err))
205198
return
@@ -213,7 +206,7 @@ func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
213206
go a.sshServer.HandleConn(a.stats.wrapConn(conn))
214207
}
215208
}()
216-
reconnectingPTYListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(tailnetReconnectingPTYPort))
209+
reconnectingPTYListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(codersdk.TailnetReconnectingPTYPort))
217210
if err != nil {
218211
a.logger.Critical(ctx, "listen for reconnecting pty", slog.Error(err))
219212
return
@@ -239,15 +232,15 @@ func (a *agent) runTailnet(ctx context.Context, derpMap *tailcfg.DERPMap) {
239232
if err != nil {
240233
continue
241234
}
242-
var msg reconnectingPTYInit
235+
var msg codersdk.ReconnectingPTYInit
243236
err = json.Unmarshal(data, &msg)
244237
if err != nil {
245238
continue
246239
}
247240
go a.handleReconnectingPTY(ctx, msg, conn)
248241
}
249242
}()
250-
speedtestListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(tailnetSpeedtestPort))
243+
speedtestListener, err := a.network.Listen("tcp", ":"+strconv.Itoa(codersdk.TailnetSpeedtestPort))
251244
if err != nil {
252245
a.logger.Critical(ctx, "listen for speedtest", slog.Error(err))
253246
return
@@ -443,7 +436,7 @@ func (a *agent) init(ctx context.Context) {
443436

444437
go a.run(ctx)
445438
if a.statsReporter != nil {
446-
cl, err := a.statsReporter(ctx, a.logger, func() *Stats {
439+
cl, err := a.statsReporter(ctx, a.logger, func() *codersdk.AgentStats {
447440
return a.stats.Copy()
448441
})
449442
if err != nil {
@@ -478,7 +471,7 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri
478471
if rawMetadata == nil {
479472
return nil, xerrors.Errorf("no metadata was provided: %w", err)
480473
}
481-
metadata, valid := rawMetadata.(Metadata)
474+
metadata, valid := rawMetadata.(codersdk.WorkspaceAgentMetadata)
482475
if !valid {
483476
return nil, xerrors.Errorf("metadata is the wrong type: %T", metadata)
484477
}
@@ -634,7 +627,7 @@ func (a *agent) handleSSHSession(session ssh.Session) (retErr error) {
634627
return cmd.Wait()
635628
}
636629

637-
func (a *agent) handleReconnectingPTY(ctx context.Context, msg reconnectingPTYInit, conn net.Conn) {
630+
func (a *agent) handleReconnectingPTY(ctx context.Context, msg codersdk.ReconnectingPTYInit, conn net.Conn) {
638631
defer conn.Close()
639632

640633
var rpty *reconnectingPTY
@@ -775,7 +768,7 @@ func (a *agent) handleReconnectingPTY(ctx context.Context, msg reconnectingPTYIn
775768
rpty.activeConnsMutex.Unlock()
776769
}()
777770
decoder := json.NewDecoder(conn)
778-
var req ReconnectingPTYRequest
771+
var req codersdk.ReconnectingPTYRequest
779772
for {
780773
err = decoder.Decode(&req)
781774
if xerrors.Is(err, io.EOF) {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy