Skip to content

Commit 2daadfc

Browse files
committed
feat: add setAllPeersLost to configMaps
1 parent 6ef3db5 commit 2daadfc

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

tailnet/configmaps.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,29 @@ func (c *configMaps) updatePeerLocked(update *proto.CoordinateResponse_PeerUpdat
430430
}
431431
}
432432

433+
// setAllPeersLost marks all peers as lost. Typically, this is called when we lose connection to
434+
// the Coordinator. (When we reconnect, we will get NODE updates for all peers that are still connected
435+
// and mark them as not lost.)
436+
func (c *configMaps) setAllPeersLost() {
437+
c.L.Lock()
438+
defer c.L.Unlock()
439+
for _, lc := range c.peers {
440+
if lc.lost {
441+
// skip processing already lost nodes, as this just results in timer churn
442+
continue
443+
}
444+
lc.lost = true
445+
lc.setLostTimer(c)
446+
// it's important to drop a log here so that we see it get marked lost if grepping thru
447+
// the logs for a specific peer
448+
c.logger.Debug(context.Background(),
449+
"setAllPeersLost marked peer lost",
450+
slog.F("peer_id", lc.peerID),
451+
slog.F("key_id", lc.node.Key.ShortString()),
452+
)
453+
}
454+
}
455+
433456
// peerLostTimeout is the callback that peerLifecycle uses when a peer is lost the timeout to
434457
// receive a handshake fires.
435458
func (c *configMaps) peerLostTimeout(id uuid.UUID) {

tailnet/configmaps_internal_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,91 @@ func TestConfigMaps_updatePeers_lost_and_found(t *testing.T) {
491491
_ = testutil.RequireRecvCtx(ctx, t, done)
492492
}
493493

494+
func TestConfigMaps_setAllPeersLost(t *testing.T) {
495+
t.Parallel()
496+
ctx := testutil.Context(t, testutil.WaitShort)
497+
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
498+
fEng := newFakeEngineConfigurable()
499+
nodePrivateKey := key.NewNode()
500+
nodeID := tailcfg.NodeID(5)
501+
discoKey := key.NewDisco()
502+
uut := newConfigMaps(logger, fEng, nodeID, nodePrivateKey, discoKey.Public())
503+
defer uut.close()
504+
start := time.Date(2024, time.January, 1, 8, 0, 0, 0, time.UTC)
505+
mClock := clock.NewMock()
506+
mClock.Set(start)
507+
uut.clock = mClock
508+
509+
p1ID := uuid.UUID{1}
510+
p1Node := newTestNode(1)
511+
p1n, err := NodeToProto(p1Node)
512+
require.NoError(t, err)
513+
p2ID := uuid.UUID{2}
514+
p2Node := newTestNode(2)
515+
p2n, err := NodeToProto(p2Node)
516+
require.NoError(t, err)
517+
518+
s1 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
519+
520+
updates := []*proto.CoordinateResponse_PeerUpdate{
521+
{
522+
Id: p1ID[:],
523+
Kind: proto.CoordinateResponse_PeerUpdate_NODE,
524+
Node: p1n,
525+
},
526+
{
527+
Id: p2ID[:],
528+
Kind: proto.CoordinateResponse_PeerUpdate_NODE,
529+
Node: p2n,
530+
},
531+
}
532+
uut.updatePeers(updates)
533+
nm := testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
534+
r := testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
535+
require.Len(t, nm.Peers, 2)
536+
require.Len(t, r.wg.Peers, 2)
537+
_ = testutil.RequireRecvCtx(ctx, t, s1)
538+
539+
mClock.Add(5 * time.Second)
540+
uut.setAllPeersLost()
541+
542+
// No reprogramming yet, since we keep the peer around.
543+
select {
544+
case <-fEng.setNetworkMap:
545+
t.Fatal("should not reprogram")
546+
default:
547+
// OK!
548+
}
549+
550+
// When we advance the clock, even by a few ms, the timeout for peer 2 pops
551+
// because our status only includes a handshake for peer 1
552+
s2 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
553+
mClock.Add(time.Millisecond * 10)
554+
_ = testutil.RequireRecvCtx(ctx, t, s2)
555+
556+
nm = testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
557+
r = testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
558+
require.Len(t, nm.Peers, 1)
559+
require.Len(t, r.wg.Peers, 1)
560+
561+
// Finally, advance the clock until after the timeout
562+
s3 := expectStatusWithHandshake(ctx, t, fEng, p1Node.Key, start)
563+
mClock.Add(lostTimeout)
564+
_ = testutil.RequireRecvCtx(ctx, t, s3)
565+
566+
nm = testutil.RequireRecvCtx(ctx, t, fEng.setNetworkMap)
567+
r = testutil.RequireRecvCtx(ctx, t, fEng.reconfig)
568+
require.Len(t, nm.Peers, 0)
569+
require.Len(t, r.wg.Peers, 0)
570+
571+
done := make(chan struct{})
572+
go func() {
573+
defer close(done)
574+
uut.close()
575+
}()
576+
_ = testutil.RequireRecvCtx(ctx, t, done)
577+
}
578+
494579
func TestConfigMaps_setBlockEndpoints_different(t *testing.T) {
495580
t.Parallel()
496581
ctx := testutil.Context(t, testutil.WaitShort)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy