From dedb75aea48256d0b9b45f5c3fffc345fe9df027 Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 2 Jul 2025 15:19:28 +0200 Subject: [PATCH 1/3] discovery: add comments --- discovery/gossiper.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/discovery/gossiper.go b/discovery/gossiper.go index 3b98b4caa..77fdb37c0 100644 --- a/discovery/gossiper.go +++ b/discovery/gossiper.go @@ -411,6 +411,10 @@ type processedNetworkMsg struct { // cachedNetworkMsg is a wrapper around a network message that can be used with // *lru.Cache. +// +// NOTE: This struct is not thread safe which means you need to assure no +// concurrent read write access to it and all its contents which are pointers +// as well. type cachedNetworkMsg struct { msgs []*processedNetworkMsg } @@ -3136,7 +3140,9 @@ func (d *AuthenticatedGossiper) handleChanUpdate(ctx context.Context, // NOTE: We don't return anything on the error channel for this // message, as we expect that will be done when this - // ChannelUpdate is later reprocessed. + // ChannelUpdate is later reprocessed. This might never happen + // if the corresponding ChannelAnnouncement is never received + // or the LRU cache is filled up and the entry is evicted. return nil, false default: From ed8ad3d110e35aecc26590923030cc4d72ca1b49 Mon Sep 17 00:00:00 2001 From: ziggie Date: Wed, 2 Jul 2025 16:34:23 +0200 Subject: [PATCH 2/3] brontide: remove async goroutine to process gossip process result We cannot rely on a response currently so we avoid spawning goroutines. This is just a temporary fix to avoid the goroutine leak. --- peer/brontide.go | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/peer/brontide.go b/peer/brontide.go index 213f28c76..514256420 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -1990,32 +1990,22 @@ func newDiscMsgStream(p *Brontide) *msgStream { // so that a parent context can be passed in here. ctx := context.TODO() + // Processing here means we send it to the gossiper which then + // decides whether this message is processed immediately or + // waits for dependent messages to be processed. It can also + // happen that the message is not processed at all if it is + // premature and the LRU cache fills up and the message is + // deleted. p.log.Debugf("Processing remote msg %T", msg) - errChan := p.cfg.AuthGossiper.ProcessRemoteAnnouncement( - ctx, msg, p, - ) - - // Start a goroutine to process the error channel for logging - // purposes. - // - // TODO(ziggie): Maybe use the error to potentially punish the - // peer depending on the error ? - go func() { - select { - case <-p.cg.Done(): - return - - case err := <-errChan: - if err != nil { - p.log.Warnf("Error processing remote "+ - "msg %T: %v", msg, - err) - } - } - - p.log.Debugf("Processed remote msg %T", msg) - }() + // TODO(ziggie): ProcessRemoteAnnouncement returns an error + // channel, but we cannot rely on it being written to. + // Because some messages might never be processed (e.g. + // premature channel updates). We should change the design here + // and use the actor model pattern as soon as it is available. + // So for now we should NOT use the error channel. + // See https://github.com/lightningnetwork/lnd/pull/9820. + p.cfg.AuthGossiper.ProcessRemoteAnnouncement(ctx, msg, p) } return newMsgStream( From e6aff211df58eebb713df6807b1f33e483b868e4 Mon Sep 17 00:00:00 2001 From: ziggie Date: Tue, 1 Jul 2025 21:25:02 +0200 Subject: [PATCH 3/3] docs: add release-notes --- docs/release-notes/release-notes-0.19.2.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/release-notes/release-notes-0.19.2.md b/docs/release-notes/release-notes-0.19.2.md index 440a46488..a6774661a 100644 --- a/docs/release-notes/release-notes-0.19.2.md +++ b/docs/release-notes/release-notes-0.19.2.md @@ -32,6 +32,10 @@ - [Fixed](https://github.com/lightningnetwork/lnd/pull/9978) a deadlock which can happen when the peer start-up has not yet completed but a another p2p connection attempt tries to disconnect the peer. + +- [Fixed](https://github.com/lightningnetwork/lnd/pull/10012) a case which + could lead to a memory issues due to a goroutine leak in the peer/gossiper + code. # New Features