From 7556402ea44048393bda99d257f60853521e077e Mon Sep 17 00:00:00 2001 From: Eugene Siegel Date: Thu, 16 Nov 2023 11:02:23 -0500 Subject: [PATCH] peer: send reestablish, shutdown messages before starting writeHandler This is to avoid a potential race on WriteMessage and Flush internals. Because there is no locking on WriteMessage and Flush, if we allow writeMessage calls in Start after the writeHandler has started, the writeMessage calls may call WriteMessage/Flush at the same time that writeMessage calls from the writeHandler does. Since there is no locking, internals like b.nextHeaderSend can race and cause panics. --- peer/brontide.go | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/peer/brontide.go b/peer/brontide.go index 4c7694085..d7d93f4fa 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -703,6 +703,23 @@ func (p *Brontide) Start() error { p.startTime = time.Now() + // Before launching the writeHandler goroutine, we send any channel + // sync messages that must be resent for borked channels. We do this to + // avoid data races with WriteMessage & Flush calls. + if len(msgs) > 0 { + p.log.Infof("Sending %d channel sync messages to peer after "+ + "loading active channels", len(msgs)) + + // Send the messages directly via writeMessage and bypass the + // writeHandler goroutine. + for _, msg := range msgs { + if err := p.writeMessage(msg); err != nil { + return fmt.Errorf("unable to send "+ + "reestablish msg: %v", err) + } + } + } + err = p.pingManager.Start() if err != nil { return fmt.Errorf("could not start ping manager %w", err) @@ -717,23 +734,6 @@ func (p *Brontide) Start() error { // Signal to any external processes that the peer is now active. close(p.activeSignal) - // Now that the peer has started up, we send any channel sync messages - // that must be resent for borked channels. - if len(msgs) > 0 { - p.log.Infof("Sending %d channel sync messages to peer after "+ - "loading active channels", len(msgs)) - - // Send the messages directly via writeMessage and bypass the - // writeHandler goroutine to avoid cases where writeHandler - // may exit and cause a deadlock. - for _, msg := range msgs { - if err := p.writeMessage(msg); err != nil { - return fmt.Errorf("unable to send reestablish"+ - "msg: %v", err) - } - } - } - // Node announcements don't propagate very well throughout the network // as there isn't a way to efficiently query for them through their // timestamp, mostly affecting nodes that were offline during the time @@ -2093,6 +2093,12 @@ func (p *Brontide) logWireMessage(msg lnwire.Message, read bool) { // message buffered on the connection. It is safe to call this method again // with a nil message iff a timeout error is returned. This will continue to // flush the pending message to the wire. +// +// NOTE: +// Besides its usage in Start, this function should not be used elsewhere +// except in writeHandler. If multiple goroutines call writeMessage at the same +// time, panics can occur because WriteMessage and Flush don't use any locking +// internally. func (p *Brontide) writeMessage(msg lnwire.Message) error { // Only log the message on the first attempt. if msg != nil {