From 3df216f6c31f160efa41a8b3e57de208a465cddb Mon Sep 17 00:00:00 2001 From: Andras Banki-Horvath Date: Sat, 6 Nov 2021 21:22:45 +0100 Subject: [PATCH 1/2] channeldb: avoid locking the graph cache while iterating channels It may happen that we do pathfinding while also attempt to change the graph. In this case changing the database, channel state, graph while reading from the same sources may create deadlocks. To resolve this we change locking policy in the graph cache when pathfinding. --- channeldb/graph_cache.go | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/channeldb/graph_cache.go b/channeldb/graph_cache.go index 3618b6d43..0af8fb591 100644 --- a/channeldb/graph_cache.go +++ b/channeldb/graph_cache.go @@ -401,10 +401,9 @@ func (c *GraphCache) UpdateChannel(info *ChannelEdgeInfo) { } } -// ForEachChannel invokes the given callback for each channel of the given node. -func (c *GraphCache) ForEachChannel(node route.Vertex, - cb func(channel *DirectedChannel) error) error { - +// getChannels returns a copy of the passed node's channels or nil if there +// isn't any. +func (c *GraphCache) getChannels(node route.Vertex) []*DirectedChannel { c.mtx.RLock() defer c.mtx.RUnlock() @@ -428,6 +427,8 @@ func (c *GraphCache) ForEachChannel(node route.Vertex, return node } + i := 0 + channelsCopy := make([]*DirectedChannel, len(channels)) for _, channel := range channels { // We need to copy the channel and policy to avoid it being // updated in the cache if the path finding algorithm sets @@ -439,9 +440,30 @@ func (c *GraphCache) ForEachChannel(node route.Vertex, channelCopy.InPolicy.ToNodeFeatures = features } - if err := cb(channelCopy); err != nil { + channelsCopy[i] = channelCopy + i++ + } + + return channelsCopy +} + +// ForEachChannel invokes the given callback for each channel of the given node. +func (c *GraphCache) ForEachChannel(node route.Vertex, + cb func(channel *DirectedChannel) error) error { + + // Obtain a copy of the node's channels. We need do this in order to + // avoid deadlocks caused by interaction with the graph cache, channel + // state and the graph database from multiple goroutines. This snapshot + // is only used for path finding where being stale is acceptable since + // the real world graph and our representation may always become + // slightly out of sync for a short time and the actual channel state + // is stored separately. + channels := c.getChannels(node) + for _, channel := range channels { + if err := cb(channel); err != nil { return err } + } return nil From 0beaee0336a3e7a3f1b25961c191869ae51976fd Mon Sep 17 00:00:00 2001 From: Andras Banki-Horvath Date: Sat, 6 Nov 2021 21:32:39 +0100 Subject: [PATCH 2/2] docs: update release notes --- docs/release-notes/release-notes-0.14.0.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release-notes/release-notes-0.14.0.md b/docs/release-notes/release-notes-0.14.0.md index c1fa4b253..c40cbd2c5 100644 --- a/docs/release-notes/release-notes-0.14.0.md +++ b/docs/release-notes/release-notes-0.14.0.md @@ -618,6 +618,9 @@ messages directly. There is no routing/path finding involved. * [Fixed an issue with external listeners and the `--noseedbackup` development flag](https://github.com/lightningnetwork/lnd/pull/5930). +* [Fix deadlock when using the graph cache]( + https://github.com/lightningnetwork/lnd/pull/5941) + ## Documentation The [code contribution guidelines have been updated to mention the new