@@ -52,6 +52,9 @@ type Stream struct {
52
52
53
53
// Shutdown signal
54
54
shutdownChan chan struct {}
55
+
56
+ // Context cancellation for BackedPipe
57
+ cancel context.CancelFunc
55
58
}
56
59
57
60
// reconnectRequest represents a pending reconnection request
@@ -67,8 +70,73 @@ type reconnectResponse struct {
67
70
err error
68
71
}
69
72
73
+ // streamReconnector implements backedpipe.Reconnector interface for Stream
74
+ type streamReconnector struct {
75
+ s * Stream
76
+ }
77
+
78
+ // Reconnect implements the backedpipe.Reconnector interface
79
+ func (r * streamReconnector ) Reconnect (ctx context.Context , writerSeqNum uint64 ) (io.ReadWriteCloser , uint64 , error ) {
80
+ r .s .mu .Lock ()
81
+
82
+ // If there's already a pending reconnect, this is a concurrent call.
83
+ // We should return an error to let the BackedPipe retry later.
84
+ if r .s .pendingReconnect != nil {
85
+ r .s .mu .Unlock ()
86
+ return nil , 0 , xerrors .New ("reconnection already in progress" )
87
+ }
88
+
89
+ // Fast path: if the stream is already shutting down, abort immediately
90
+ if r .s .closed {
91
+ r .s .mu .Unlock ()
92
+ return nil , 0 , xerrors .New ("stream is shutting down" )
93
+ }
94
+
95
+ // Wait for HandleReconnect to be called with a new connection
96
+ responseChan := make (chan reconnectResponse , 1 )
97
+ r .s .pendingReconnect = & reconnectRequest {
98
+ writerSeqNum : writerSeqNum ,
99
+ response : responseChan ,
100
+ }
101
+ r .s .handshakePending = true
102
+ // Mark disconnected if we previously had a client connection
103
+ if r .s .connected {
104
+ r .s .connected = false
105
+ r .s .lastDisconnectionAt = time .Now ()
106
+ }
107
+ r .s .logger .Info (context .Background (), "pending reconnect set" ,
108
+ slog .F ("writer_seq" , writerSeqNum ))
109
+ // Signal waiters a reconnect request is pending
110
+ r .s .reconnectCond .Broadcast ()
111
+ r .s .mu .Unlock ()
112
+
113
+ // Wait for response from HandleReconnect or context cancellation
114
+ r .s .logger .Info (context .Background (), "reconnect function waiting for response" )
115
+ select {
116
+ case resp := <- responseChan :
117
+ r .s .logger .Info (context .Background (), "reconnect function got response" ,
118
+ slog .F ("has_conn" , resp .conn != nil ),
119
+ slog .F ("read_seq" , resp .readSeq ),
120
+ slog .Error (resp .err ))
121
+ return resp .conn , resp .readSeq , resp .err
122
+ case <- ctx .Done ():
123
+ // Context was canceled, return error immediately
124
+ // The stream's Close() method will handle cleanup
125
+ r .s .logger .Info (context .Background (), "reconnect function context canceled" , slog .Error (ctx .Err ()))
126
+ return nil , 0 , ctx .Err ()
127
+ case <- r .s .shutdownChan :
128
+ // Stream is being shut down, return error immediately
129
+ // The stream's Close() method will handle cleanup
130
+ r .s .logger .Info (context .Background (), "reconnect function shutdown signal received" )
131
+ return nil , 0 , xerrors .New ("stream is shutting down" )
132
+ }
133
+ }
134
+
70
135
// NewStream creates a new immortal stream
71
136
func NewStream (id uuid.UUID , name string , port int , logger slog.Logger ) * Stream {
137
+ // Create a context that will be canceled when the stream is closed
138
+ ctx , cancel := context .WithCancel (context .Background ())
139
+
72
140
stream := & Stream {
73
141
id : id ,
74
142
name : name ,
@@ -78,72 +146,13 @@ func NewStream(id uuid.UUID, name string, port int, logger slog.Logger) *Stream
78
146
disconnectChan : make (chan struct {}, 1 ),
79
147
shutdownChan : make (chan struct {}),
80
148
reconnectReq : make (chan struct {}, 1 ),
149
+ cancel : cancel , // Store cancel function for cleanup
81
150
}
82
151
stream .reconnectCond = sync .NewCond (& stream .mu )
83
152
84
- // Create a reconnect function that waits for a client connection
85
- reconnectFn := func (ctx context.Context , writerSeqNum uint64 ) (io.ReadWriteCloser , uint64 , error ) {
86
- // Wait for HandleReconnect to be called with a new connection
87
- responseChan := make (chan reconnectResponse , 1 )
88
-
89
- stream .mu .Lock ()
90
- stream .pendingReconnect = & reconnectRequest {
91
- writerSeqNum : writerSeqNum ,
92
- response : responseChan ,
93
- }
94
- stream .handshakePending = true
95
- // Mark disconnected if we previously had a client connection
96
- if stream .connected {
97
- stream .connected = false
98
- stream .lastDisconnectionAt = time .Now ()
99
- }
100
- stream .logger .Info (context .Background (), "pending reconnect set" ,
101
- slog .F ("writer_seq" , writerSeqNum ))
102
- // Signal waiters a reconnect request is pending
103
- stream .reconnectCond .Broadcast ()
104
- stream .mu .Unlock ()
105
-
106
- // Fast path: if the stream is already shutting down, abort immediately
107
- select {
108
- case <- stream .shutdownChan :
109
- stream .mu .Lock ()
110
- // Clear the pending request since we're aborting
111
- if stream .pendingReconnect != nil {
112
- stream .pendingReconnect = nil
113
- }
114
- stream .mu .Unlock ()
115
- return nil , 0 , xerrors .New ("stream is shutting down" )
116
- default :
117
- }
118
-
119
- // Wait for response from HandleReconnect or context cancellation
120
- stream .logger .Info (context .Background (), "reconnect function waiting for response" )
121
- select {
122
- case resp := <- responseChan :
123
- stream .logger .Info (context .Background (), "reconnect function got response" ,
124
- slog .F ("has_conn" , resp .conn != nil ),
125
- slog .F ("read_seq" , resp .readSeq ),
126
- slog .Error (resp .err ))
127
- return resp .conn , resp .readSeq , resp .err
128
- case <- ctx .Done ():
129
- // Context was canceled, clear pending request and return error
130
- stream .mu .Lock ()
131
- stream .pendingReconnect = nil
132
- stream .handshakePending = false
133
- stream .mu .Unlock ()
134
- return nil , 0 , ctx .Err ()
135
- case <- stream .shutdownChan :
136
- // Stream is being shut down, clear pending request and return error
137
- stream .mu .Lock ()
138
- stream .pendingReconnect = nil
139
- stream .handshakePending = false
140
- stream .mu .Unlock ()
141
- return nil , 0 , xerrors .New ("stream is shutting down" )
142
- }
143
- }
144
-
145
- // Create BackedPipe with background context
146
- stream .pipe = backedpipe .NewBackedPipe (context .Background (), reconnectFn )
153
+ // Create BackedPipe with streamReconnector
154
+ reconnector := & streamReconnector {s : stream }
155
+ stream .pipe = backedpipe .NewBackedPipe (ctx , reconnector )
147
156
148
157
// Start reconnect worker: dedupe pokes and call ForceReconnect when safe.
149
158
go func () {
@@ -152,7 +161,7 @@ func NewStream(id uuid.UUID, name string, port int, logger slog.Logger) *Stream
152
161
case <- stream .shutdownChan :
153
162
return
154
163
case <- stream .reconnectReq :
155
- // Drain extra pokes to coalesce
164
+ // Drain extra pokes
156
165
for {
157
166
select {
158
167
case <- stream .reconnectReq :
@@ -240,22 +249,10 @@ func (s *Stream) HandleReconnect(clientConn io.ReadWriteCloser, readSeqNum uint6
240
249
s .mu .Unlock ()
241
250
respCh <- reconnectResponse {conn : clientConn , readSeq : readSeqNum , err : nil }
242
251
243
- // Wait until the pipe reports a connected state so the handshake fully completes.
244
- // Use a bounded timeout to avoid hanging forever in pathological cases.
245
- ctx , cancel := context .WithTimeout (context .Background (), 10 * time .Second )
246
- err := s .pipe .WaitForConnection (ctx )
247
- cancel ()
248
- if err != nil {
249
- s .mu .Lock ()
250
- s .connected = false
251
- if s .reconnectCond != nil {
252
- s .reconnectCond .Broadcast ()
253
- }
254
- s .mu .Unlock ()
255
- s .logger .Warn (context .Background (), "failed to connect backed pipe" , slog .Error (err ))
256
- return xerrors .Errorf ("failed to establish connection: %w" , err )
257
- }
258
-
252
+ // The connection has been provided to the BackedPipe via the response channel.
253
+ // The BackedPipe will establish the connection, and since we control the
254
+ // reconnection process, we know it will succeed (or the Reconnect method
255
+ // would have returned an error).
259
256
s .mu .Lock ()
260
257
s .lastConnectionAt = time .Now ()
261
258
s .connected = true
@@ -307,6 +304,11 @@ func (s *Stream) Close() error {
307
304
s .closed = true
308
305
s .connected = false
309
306
307
+ // Cancel the context to interrupt any pending BackedPipe operations
308
+ if s .cancel != nil {
309
+ s .cancel ()
310
+ }
311
+
310
312
// Signal shutdown to any pending reconnect attempts and listeners
311
313
// Closing the channel wakes all waiters exactly once
312
314
select {
@@ -424,15 +426,44 @@ func (s *Stream) startCopyingLocked() {
424
426
// The BackedPipe will block when no client is connected
425
427
buf := make ([]byte , 32 * 1024 )
426
428
for {
429
+ // Check if we should shut down before attempting to read
430
+ select {
431
+ case <- s .shutdownChan :
432
+ s .logger .Debug (context .Background (), "shutdown signal received, exiting copy goroutine" )
433
+ return
434
+ default :
435
+ }
436
+
427
437
// Use a buffer for copying
428
438
n , err := s .pipe .Read (buf )
429
- // Log significant events
430
- if errors .Is (err , io .EOF ) {
431
- s .logger .Debug (context .Background (), "got EOF from pipe" )
432
- s .SignalDisconnect ()
433
- } else if err != nil && ! errors .Is (err , io .ErrClosedPipe ) {
434
- s .logger .Debug (context .Background (), "error reading from pipe" , slog .Error (err ))
435
- s .SignalDisconnect ()
439
+
440
+ if err != nil {
441
+ // Check for fatal errors that should terminate the goroutine
442
+ if xerrors .Is (err , io .ErrClosedPipe ) {
443
+ // The pipe itself is closed, we're done
444
+ s .logger .Debug (context .Background (), "pipe closed, exiting copy goroutine" )
445
+ s .SignalDisconnect ()
446
+ return
447
+ }
448
+
449
+ // Check for BackedPipe specific errors
450
+ if xerrors .Is (err , backedpipe .ErrPipeClosed ) {
451
+ s .logger .Debug (context .Background (), "backed pipe closed, exiting copy goroutine" )
452
+ s .SignalDisconnect ()
453
+ return
454
+ }
455
+
456
+ // Log other errors but continue
457
+ if errors .Is (err , io .EOF ) {
458
+ s .logger .Debug (context .Background (), "got EOF from pipe" )
459
+ s .SignalDisconnect ()
460
+ } else {
461
+ s .logger .Debug (context .Background (), "error reading from pipe" , slog .Error (err ))
462
+ s .SignalDisconnect ()
463
+ }
464
+
465
+ // For non-fatal errors, continue the loop
466
+ continue
436
467
}
437
468
438
469
if n > 0 {
@@ -445,17 +476,6 @@ func (s *Stream) startCopyingLocked() {
445
476
return
446
477
}
447
478
}
448
-
449
- if err != nil {
450
- // Check if this is a fatal error
451
- if xerrors .Is (err , io .ErrClosedPipe ) {
452
- // The pipe itself is closed, we're done
453
- s .logger .Debug (context .Background (), "pipe closed, exiting copy goroutine" )
454
- s .SignalDisconnect ()
455
- return
456
- }
457
- // Any other error (including EOF) is handled by BackedPipe; continue
458
- }
459
479
}
460
480
}()
461
481
0 commit comments