From 325e844a96a4adcb8f97c85b6b412345f5d5ae96 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Tue, 25 Jul 2023 11:14:41 -0700 Subject: [PATCH] peer: ensure the peer has been started before Disconnect can be called In this commit, we make sure that all the `wg.Add(1)` calls succeed before we attempt to wait on the shutdown of all the goroutines. Under rare scheduling scenarios, if both `Start` and `Disconnect` are called concurrently, then this internal race error can be hit, causing the panic to occur. Fixes https://github.com/lightningnetwork/lnd/issues/7853 --- docs/release-notes/release-notes-0.17.0.md | 5 ++++ peer/brontide.go | 27 ++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/docs/release-notes/release-notes-0.17.0.md b/docs/release-notes/release-notes-0.17.0.md index 272585947..4878f1424 100644 --- a/docs/release-notes/release-notes-0.17.0.md +++ b/docs/release-notes/release-notes-0.17.0.md @@ -194,6 +194,10 @@ unlock or create. Tor](https://github.com/lightningnetwork/lnd/pull/7783), even when `tor.skip-proxy-for-clearnet-targets=true` was set. +* Fix a [concurrency issue related to rapid peer teardown and + creation](https://github.com/lightningnetwork/lnd/pull/7856) that can arise + under rare scenarios. + ### Tooling and documentation * Add support for [custom `RPCHOST` and @@ -226,6 +230,7 @@ unlock or create. * Maxwell Sayles * Michael Street * MG-ng +* Olaoluwa Osuntokun * Oliver Gugger * Pierre Beugnet * Satarupa Deb diff --git a/peer/brontide.go b/peer/brontide.go index 0f1fc9bc8..591f6601a 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -469,8 +469,9 @@ type Brontide struct { // potentially holding lots of un-consumed events. channelEventClient *subscribe.Client - quit chan struct{} - wg sync.WaitGroup + startReady chan struct{} + quit chan struct{} + wg sync.WaitGroup // log is a peer-specific logging instance. log btclog.Logger @@ -500,6 +501,7 @@ func NewBrontide(cfg Config) *Brontide { linkFailures: make(chan linkFailureReport), chanCloseMsgs: make(chan *closeMsg), resentChanSyncMsg: make(map[lnwire.ChannelID]struct{}), + startReady: make(chan struct{}), quit: make(chan struct{}), log: build.NewPrefixLog(logPrefix, peerLog), } @@ -514,6 +516,11 @@ func (p *Brontide) Start() error { return nil } + // Once we've finished starting up the peer, we'll signal to other + // goroutines that the they can move forward to tear down the peer, or + // carry out other relevant changes. + defer close(p.startReady) + p.log.Tracef("starting with conn[%v->%v]", p.cfg.Conn.LocalAddr(), p.cfg.Conn.RemoteAddr()) @@ -1050,6 +1057,14 @@ func (p *Brontide) maybeSendNodeAnn(channels []*channeldb.OpenChannel) { // calling Disconnect will signal the quit channel and the method will not // block, since no goroutines were spawned. func (p *Brontide) WaitForDisconnect(ready chan struct{}) { + // Before we try to call the `Wait` goroutine, we'll make sure the main + // set of goroutines are already active. + select { + case <-p.startReady: + case <-p.quit: + return + } + select { case <-ready: case <-p.quit: @@ -1066,6 +1081,14 @@ func (p *Brontide) Disconnect(reason error) { return } + // Make sure initialization has completed before we try to tear things + // down. + select { + case <-p.startReady: + case <-p.quit: + return + } + err := fmt.Errorf("disconnecting %s, reason: %v", p, reason) p.storeError(err)