Skip to content

Commit ffbd488

Browse files
committed
feat: changes codersdk to use tailnet v2 for DERPMap updates
1 parent 05d21b1 commit ffbd488

File tree

1 file changed

+188
-136
lines changed

1 file changed

+188
-136
lines changed

codersdk/workspaceagents.go

Lines changed: 188 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"strings"
1515
"time"
1616

17+
"golang.org/x/sync/errgroup"
18+
1719
"github.com/google/uuid"
1820
"golang.org/x/xerrors"
1921
"nhooyr.io/websocket"
@@ -317,142 +319,28 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
317319
q := coordinateURL.Query()
318320
q.Add("version", proto.CurrentVersion.String())
319321
coordinateURL.RawQuery = q.Encode()
320-
closedCoordinator := make(chan struct{})
321-
// Must only ever be used once, send error OR close to avoid
322-
// reassignment race. Buffered so we don't hang in goroutine.
323-
firstCoordinator := make(chan error, 1)
324-
go func() {
325-
defer close(closedCoordinator)
326-
isFirst := true
327-
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
328-
options.Logger.Debug(ctx, "connecting")
329-
// nolint:bodyclose
330-
ws, res, err := websocket.Dial(ctx, coordinateURL.String(), &websocket.DialOptions{
331-
HTTPClient: c.HTTPClient,
332-
HTTPHeader: headers,
333-
// Need to disable compression to avoid a data-race.
334-
CompressionMode: websocket.CompressionDisabled,
335-
})
336-
if isFirst {
337-
if res != nil && res.StatusCode == http.StatusConflict {
338-
firstCoordinator <- ReadBodyAsError(res)
339-
return
340-
}
341-
isFirst = false
342-
close(firstCoordinator)
343-
}
344-
if err != nil {
345-
if errors.Is(err, context.Canceled) {
346-
return
347-
}
348-
options.Logger.Debug(ctx, "failed to dial", slog.Error(err))
349-
continue
350-
}
351-
client, err := tailnet.NewDRPCClient(websocket.NetConn(ctx, ws, websocket.MessageBinary))
352-
if err != nil {
353-
options.Logger.Debug(ctx, "failed to create DRPCClient", slog.Error(err))
354-
_ = ws.Close(websocket.StatusInternalError, "")
355-
continue
356-
}
357-
coordinate, err := client.Coordinate(ctx)
358-
if err != nil {
359-
options.Logger.Debug(ctx, "failed to reach the Coordinate endpoint", slog.Error(err))
360-
_ = ws.Close(websocket.StatusInternalError, "")
361-
continue
362-
}
363-
364-
coordination := tailnet.NewRemoteCoordination(options.Logger, coordinate, conn, agentID)
365-
options.Logger.Debug(ctx, "serving coordinator")
366-
err = <-coordination.Error()
367-
if errors.Is(err, context.Canceled) {
368-
_ = ws.Close(websocket.StatusGoingAway, "")
369-
return
370-
}
371-
if err != nil {
372-
options.Logger.Debug(ctx, "error serving coordinator", slog.Error(err))
373-
_ = ws.Close(websocket.StatusGoingAway, "")
374-
continue
375-
}
376-
_ = ws.Close(websocket.StatusGoingAway, "")
377-
}
378-
}()
379322

380-
derpMapURL, err := c.URL.Parse("/api/v2/derp-map")
381-
if err != nil {
382-
return nil, xerrors.Errorf("parse url: %w", err)
383-
}
384-
closedDerpMap := make(chan struct{})
385-
// Must only ever be used once, send error OR close to avoid
386-
// reassignment race. Buffered so we don't hang in goroutine.
387-
firstDerpMap := make(chan error, 1)
388-
go func() {
389-
defer close(closedDerpMap)
390-
isFirst := true
391-
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(ctx); {
392-
options.Logger.Debug(ctx, "connecting to server for derp map updates")
393-
// nolint:bodyclose
394-
ws, res, err := websocket.Dial(ctx, derpMapURL.String(), &websocket.DialOptions{
395-
HTTPClient: c.HTTPClient,
396-
HTTPHeader: headers,
397-
// Need to disable compression to avoid a data-race.
398-
CompressionMode: websocket.CompressionDisabled,
399-
})
400-
if isFirst {
401-
if res != nil && res.StatusCode == http.StatusConflict {
402-
firstDerpMap <- ReadBodyAsError(res)
403-
return
404-
}
405-
isFirst = false
406-
close(firstDerpMap)
407-
}
408-
if err != nil {
409-
if errors.Is(err, context.Canceled) {
410-
return
411-
}
412-
options.Logger.Debug(ctx, "failed to dial", slog.Error(err))
413-
continue
414-
}
415-
416-
var (
417-
nconn = websocket.NetConn(ctx, ws, websocket.MessageBinary)
418-
dec = json.NewDecoder(nconn)
419-
)
420-
for {
421-
var derpMap tailcfg.DERPMap
422-
err := dec.Decode(&derpMap)
423-
if xerrors.Is(err, context.Canceled) {
424-
_ = ws.Close(websocket.StatusGoingAway, "")
425-
return
426-
}
427-
if err != nil {
428-
options.Logger.Debug(ctx, "failed to decode derp map", slog.Error(err))
429-
_ = ws.Close(websocket.StatusGoingAway, "")
430-
return
431-
}
432-
433-
if !tailnet.CompareDERPMaps(conn.DERPMap(), &derpMap) {
434-
options.Logger.Debug(ctx, "updating derp map due to detected changes")
435-
conn.SetDERPMap(&derpMap)
436-
}
437-
}
438-
}
439-
}()
440-
441-
for firstCoordinator != nil || firstDerpMap != nil {
442-
select {
443-
case <-dialCtx.Done():
444-
return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", dialCtx.Err())
445-
case err = <-firstCoordinator:
446-
if err != nil {
447-
return nil, xerrors.Errorf("start coordinator: %w", err)
448-
}
449-
firstCoordinator = nil
450-
case err = <-firstDerpMap:
451-
if err != nil {
452-
return nil, xerrors.Errorf("receive derp map: %w", err)
453-
}
454-
firstDerpMap = nil
323+
connector := runTailnetAPIConnector(ctx, options.Logger,
324+
agentID, coordinateURL.String(),
325+
&websocket.DialOptions{
326+
HTTPClient: c.HTTPClient,
327+
HTTPHeader: headers,
328+
// Need to disable compression to avoid a data-race.
329+
CompressionMode: websocket.CompressionDisabled,
330+
},
331+
conn,
332+
)
333+
options.Logger.Debug(ctx, "running tailnet API v2+ connector")
334+
335+
select {
336+
case <-dialCtx.Done():
337+
return nil, xerrors.Errorf("timed out waiting for coordinator and derp map: %w", dialCtx.Err())
338+
case err = <-connector.connected:
339+
if err != nil {
340+
options.Logger.Error(ctx, "failed to connect to tailnet v2+ API", slog.Error(err))
341+
return nil, xerrors.Errorf("start connector: %w", err)
455342
}
343+
options.Logger.Debug(ctx, "connected to tailnet v2+ API")
456344
}
457345

458346
agentConn = NewWorkspaceAgentConn(conn, WorkspaceAgentConnOptions{
@@ -464,8 +352,7 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
464352
AgentIP: WorkspaceAgentIP,
465353
CloseFunc: func() error {
466354
cancel()
467-
<-closedCoordinator
468-
<-closedDerpMap
355+
<-connector.closed
469356
return conn.Close()
470357
},
471358
})
@@ -478,6 +365,171 @@ func (c *Client) DialWorkspaceAgent(dialCtx context.Context, agentID uuid.UUID,
478365
return agentConn, nil
479366
}
480367

368+
// tailnetAPIConnector dials the tailnet API (v2+) and then uses the API with a tailnet.Conn to
369+
//
370+
// 1) run the Coordinate API and pass node information back and forth
371+
// 2) stream DERPMap updates and program the Conn
372+
//
373+
// These functions share the same websocket, and so are combined here so that if we hit a problem
374+
// we tear the whole thing down and start over with a new websocket.
375+
//
376+
// @typescript-ignore tailnetAPIConnector
377+
type tailnetAPIConnector struct {
378+
ctx context.Context
379+
logger slog.Logger
380+
381+
agentID uuid.UUID
382+
coordinateURL string
383+
dialOptions *websocket.DialOptions
384+
conn *tailnet.Conn
385+
386+
connected chan error
387+
isFirst bool
388+
closed chan struct{}
389+
}
390+
391+
// runTailnetAPIConnector creates and runs a tailnetAPIConnector
392+
func runTailnetAPIConnector(
393+
ctx context.Context, logger slog.Logger,
394+
agentID uuid.UUID, coordinateURL string, dialOptions *websocket.DialOptions,
395+
conn *tailnet.Conn,
396+
) *tailnetAPIConnector {
397+
tac := &tailnetAPIConnector{
398+
ctx: ctx,
399+
logger: logger,
400+
agentID: agentID,
401+
coordinateURL: coordinateURL,
402+
dialOptions: dialOptions,
403+
conn: conn,
404+
connected: make(chan error, 1),
405+
closed: make(chan struct{}),
406+
}
407+
go tac.run()
408+
return tac
409+
}
410+
411+
func (tac *tailnetAPIConnector) run() {
412+
tac.isFirst = true
413+
defer close(tac.closed)
414+
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(tac.ctx); {
415+
tailnetClient, err := tac.dial()
416+
if err != nil {
417+
continue
418+
}
419+
tac.logger.Debug(tac.ctx, "obtained tailnet API v2+ client")
420+
tac.coordinateAndDERPMap(tailnetClient)
421+
tac.logger.Debug(tac.ctx, "tailnet API v2+ connection lost")
422+
}
423+
}
424+
425+
func (tac *tailnetAPIConnector) dial() (proto.DRPCTailnetClient, error) {
426+
tac.logger.Debug(tac.ctx, "dialing Coder tailnet v2+ API")
427+
// nolint:bodyclose
428+
ws, res, err := websocket.Dial(tac.ctx, tac.coordinateURL, tac.dialOptions)
429+
if tac.isFirst {
430+
if res != nil && res.StatusCode == http.StatusConflict {
431+
err = ReadBodyAsError(res)
432+
tac.connected <- err
433+
return nil, err
434+
}
435+
tac.isFirst = false
436+
close(tac.connected)
437+
}
438+
if err != nil {
439+
if !errors.Is(err, context.Canceled) {
440+
tac.logger.Error(tac.ctx, "failed to dial tailnet v2+ API", slog.Error(err))
441+
}
442+
return nil, err
443+
}
444+
client, err := tailnet.NewDRPCClient(websocket.NetConn(tac.ctx, ws, websocket.MessageBinary))
445+
if err != nil {
446+
tac.logger.Debug(tac.ctx, "failed to create DRPCClient", slog.Error(err))
447+
_ = ws.Close(websocket.StatusInternalError, "")
448+
return nil, err
449+
}
450+
return client, err
451+
}
452+
453+
// coordinateAndDERPMap uses the provided client to coordinate and stream DERP Maps. It is combined
454+
// into one function so that a problem with one tears down the other and triggers a retry (if
455+
// appropriate). We multiplex both RPCs over the same websocket, so we want them to share the same
456+
// fate.
457+
func (tac *tailnetAPIConnector) coordinateAndDERPMap(client proto.DRPCTailnetClient) {
458+
defer func() {
459+
conn := client.DRPCConn()
460+
closeErr := conn.Close()
461+
if closeErr != nil &&
462+
!xerrors.Is(closeErr, io.EOF) &&
463+
!xerrors.Is(closeErr, context.Canceled) &&
464+
!xerrors.Is(closeErr, context.DeadlineExceeded) {
465+
tac.logger.Error(tac.ctx, "error closing DRPC connection", slog.Error(closeErr))
466+
<-conn.Closed()
467+
}
468+
}()
469+
eg, egCtx := errgroup.WithContext(tac.ctx)
470+
eg.Go(func() error {
471+
return tac.coordinate(egCtx, client)
472+
})
473+
eg.Go(func() error {
474+
return tac.derpMap(egCtx, client)
475+
})
476+
err := eg.Wait()
477+
if err != nil &&
478+
!xerrors.Is(err, io.EOF) &&
479+
!xerrors.Is(err, context.Canceled) &&
480+
!xerrors.Is(err, context.DeadlineExceeded) {
481+
tac.logger.Error(tac.ctx, "error while connected to tailnet v2+ API")
482+
}
483+
}
484+
485+
func (tac *tailnetAPIConnector) coordinate(ctx context.Context, client proto.DRPCTailnetClient) error {
486+
coord, err := client.Coordinate(ctx)
487+
if err != nil {
488+
return xerrors.Errorf("failed to connect to Coordinate RPC: %w", err)
489+
}
490+
defer func() {
491+
cErr := coord.Close()
492+
if cErr != nil {
493+
tac.logger.Debug(ctx, "error closing Coordinate RPC", slog.Error(cErr))
494+
}
495+
}()
496+
coordination := tailnet.NewRemoteCoordination(tac.logger, coord, tac.conn, tac.agentID)
497+
tac.logger.Debug(ctx, "serving coordinator")
498+
err = <-coordination.Error()
499+
if err != nil &&
500+
!xerrors.Is(err, io.EOF) &&
501+
!xerrors.Is(err, context.Canceled) &&
502+
!xerrors.Is(err, context.DeadlineExceeded) {
503+
return xerrors.Errorf("remote coordination error: %w", err)
504+
}
505+
return nil
506+
}
507+
508+
func (tac *tailnetAPIConnector) derpMap(ctx context.Context, client proto.DRPCTailnetClient) error {
509+
s, err := client.StreamDERPMaps(ctx, &proto.StreamDERPMapsRequest{})
510+
if err != nil {
511+
return xerrors.Errorf("failed to connect to StreamDERPMaps RPC: %w", err)
512+
}
513+
defer func() {
514+
cErr := s.Close()
515+
if cErr != nil {
516+
tac.logger.Debug(ctx, "error closing StreamDERPMaps RPC", slog.Error(cErr))
517+
}
518+
}()
519+
for {
520+
dmp, err := s.Recv()
521+
if err != nil {
522+
if xerrors.Is(err, io.EOF) || xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
523+
return nil
524+
}
525+
return xerrors.Errorf("error receiving DERP Map: %w", err)
526+
}
527+
tac.logger.Debug(ctx, "got new DERP Map", slog.F("derp_map", dmp))
528+
dm := tailnet.DERPMapFromProto(dmp)
529+
tac.conn.SetDERPMap(dm)
530+
}
531+
}
532+
481533
// WatchWorkspaceAgentMetadata watches the metadata of a workspace agent.
482534
// The returned channel will be closed when the context is canceled. Exactly
483535
// one error will be sent on the error channel. The metadata channel is never closed.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy