From ea7ea33e24a6672a0aec1c3ff62b91666951a65f Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Wed, 14 Jul 2021 18:34:06 -0700 Subject: [PATCH 1/2] lntest/itest: log node name in addition to ID if waitForChannels fails --- lntest/itest/lnd_mpp_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lntest/itest/lnd_mpp_test.go b/lntest/itest/lnd_mpp_test.go index 15a95d316..8fd1f752d 100644 --- a/lntest/itest/lnd_mpp_test.go +++ b/lntest/itest/lnd_mpp_test.go @@ -346,9 +346,9 @@ func (c *mppTestContext) waitForChannels() { ctxt, _ := context.WithTimeout(ctxb, defaultTimeout) err = node.WaitForNetworkChannelOpen(ctxt, chanPoint) if err != nil { - c.t.Fatalf("(%d): timeout waiting for "+ + c.t.Fatalf("(%v:%d): timeout waiting for "+ "channel(%s) open: %v", - node.NodeID, point, err) + node.Cfg.Name, node.NodeID, point, err) } } } From 6cd981420a060ca735bc4cf5e5e728b796fdee49 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Wed, 14 Jul 2021 18:35:48 -0700 Subject: [PATCH 2/2] lntest: fix possible race condition re asserting channel propagation In this commit, we attempt to fix a race condition that may occur in the current AMP and MPP tests. It appears the following scenario is possible: * The `mppTestContext` [is used to create 6 channels back to back](https://github.com/lightningnetwork/lnd/blob/master/lntest/itest/lnd_amp_test.go#L43) * The method used to create the channel ends up calling [`openChannelAndAssert`](https://github.com/lightningnetwork/lnd/blob/edd4152682dc8f70e11da541cd3be4692db4b011/lntest/itest/lnd_mpp_test.go#L300) which'll open the channel, mine 6 blocks, [then ensure that the channel gets advertised](https://github.com/lightningnetwork/lnd/blob/edd4152682dc8f70e11da541cd3be4692db4b011/lntest/itest/assertions.go#L78) * Later on, [we wait for all nodes to hear about all channels on the network level](https://github.com/lightningnetwork/lnd/blob/master/lntest/itest/lnd_amp_test.go#L62) I think the issue here is that we'll potentially already have mined 30 or so blocks before getting to the final nodes, and those nodes may have already heard about the channel already. This may then cause their [`lightningNetworkWatcher`](https://github.com/lightningnetwork/lnd/blob/edd4152682dc8f70e11da541cd3be4692db4b011/lntest/node.go#L1213) goroutine to not properly dispatch this, since it's assumed that the channel hasn't been announced (or notified) when the method is called. One solution here is to just check if the channel is already in the node's graph or not, when we go to register the notification. If we do this in the same state machine as the watcher, then we ensure if the channel is already known, the client is immediately notified. One thing that can help us debug this more in the future is adding additional logging in some of these helper goroutines so we can more easily track the control flow. This commit implements this solution by checking to ensure that the channel isn't already known in our channel graph before attempting to wait for its notification, as we may already have missed the notification before this registration request came in. --- lntest/node.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lntest/node.go b/lntest/node.go index 27ccb15c4..a7a44df5d 100644 --- a/lntest/node.go +++ b/lntest/node.go @@ -1210,6 +1210,27 @@ func getChanPointFundingTxid(chanPoint *lnrpc.ChannelPoint) ([]byte, error) { return txid, nil } +func checkChanPointInGraph(ctx context.Context, + node *HarnessNode, chanPoint wire.OutPoint) bool { + + ctxt, cancel := context.WithTimeout(ctx, DefaultTimeout) + defer cancel() + chanGraph, err := node.DescribeGraph(ctxt, &lnrpc.ChannelGraphRequest{}) + if err != nil { + return false + } + + targetChanPoint := chanPoint.String() + for _, chanEdge := range chanGraph.Edges { + candidateChanPoint := chanEdge.ChanPoint + if targetChanPoint == candidateChanPoint { + return true + } + } + + return false +} + // lightningNetworkWatcher is a goroutine which is able to dispatch // notifications once it has been observed that a target channel has been // closed or opened within the network. In order to dispatch these @@ -1322,6 +1343,21 @@ func (hn *HarnessNode) lightningNetworkWatcher(subscribed chan error) { continue } + // Before we add the channel to our set of open + // clients, we'll check to see if the channel + // is already in the channel graph of the + // target node. This lets us handle the case + // where a node has already seen a channel + // before a notification has been requested, + // causing us to miss it. + chanFound := checkChanPointInGraph( + context.Background(), hn, targetChan, + ) + if chanFound { + close(watchRequest.eventChan) + continue + } + // Otherwise, we'll add this to the list of // watch open clients for this out point. hn.openClients[targetChan] = append(