From a067aa45ef5df64afb149b7adf02811f084ba757 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Sun, 9 Mar 2025 17:12:44 +0800 Subject: [PATCH 1/5] itest: add `flakeInconsistentHTLCView` --- itest/flakes.go | 38 ++++++++++++++++++++++++++++++++ itest/lnd_route_blinding_test.go | 1 + itest/lnd_sweep_test.go | 2 ++ 3 files changed, 41 insertions(+) diff --git a/itest/flakes.go b/itest/flakes.go index 465ef4bec..ff5db12a1 100644 --- a/itest/flakes.go +++ b/itest/flakes.go @@ -104,3 +104,41 @@ func flakeSkipPendingSweepsCheckDarwin(ht *lntest.HarnessTest, func isDarwin() bool { return runtime.GOOS == "darwin" } + +// flakeInconsistentHTLCView documents a flake found that the `ListChannels` RPC +// can give inaccurate HTLC states, which is found when we call +// `AssertHTLCNotActive` after a commitment dance is finished. Suppose Carol is +// settling an invoice with Bob, from Bob's PoV, a typical healthy settlement +// flow goes like this: +// +// [DBG] PEER brontide.go:2412: Peer([Carol]): Received UpdateFulfillHTLC +// [DBG] HSWC switch.go:1315: Closed completed SETTLE circuit for ... +// [INF] HSWC switch.go:3044: Forwarded HTLC... +// [DBG] PEER brontide.go:2412: Peer([Carol]): Received CommitSig +// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending RevokeAndAck +// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending CommitSig +// [DBG] PEER brontide.go:2412: Peer([Carol]): Received RevokeAndAck +// [DBG] HSWC link.go:3617: ChannelLink([ChanPoint: Bob=>Carol]): settle-fail-filter: count=1, filter=[0] +// [DBG] HSWC switch.go:3001: Circuit is closing for packet... +// +// Bob receives the preimage, closes the circuit, and exchanges commit sig and +// revoke msgs with Carol. Once Bob receives the `CommitSig` from Carol, the +// HTLC should be removed from his `LocalCommitment` via +// `RevokeCurrentCommitment`. +// +// However, in the test where `AssertHTLCNotActive` is called, although the +// above process is finished, the `ListChannels“ still finds the HTLC. Also note +// that the RPC makes direct call to the channeldb without any locks, which +// should be fine as the struct `OpenChannel.LocalCommitment` is passed by +// value, although we need to double check. +// +// TODO(yy): In order to fix it, we should make the RPC share the same view of +// our channel state machine. Instead of making DB queries, it should instead +// use `lnwallet.LightningChannel` instead to stay consistent. +// +//nolint:ll +func flakeInconsistentHTLCView() { + // Perform a sleep so the commiment dance can be finished before we call + // the ListChannels. + time.Sleep(2 * time.Second) +} diff --git a/itest/lnd_route_blinding_test.go b/itest/lnd_route_blinding_test.go index 072788dec..9792b0ca2 100644 --- a/itest/lnd_route_blinding_test.go +++ b/itest/lnd_route_blinding_test.go @@ -815,6 +815,7 @@ func testErrorHandlingOnChainFailure(ht *lntest.HarnessTest) { ht.MineBlocksAndAssertNumTxes(1, 1) // Assert that the HTLC has cleared. + flakeInconsistentHTLCView() ht.AssertHTLCNotActive(bob, testCase.channels[0], hash[:]) ht.AssertHTLCNotActive(alice, testCase.channels[0], hash[:]) diff --git a/itest/lnd_sweep_test.go b/itest/lnd_sweep_test.go index db3e2171e..4b74f4b9b 100644 --- a/itest/lnd_sweep_test.go +++ b/itest/lnd_sweep_test.go @@ -499,6 +499,7 @@ func testSweepCPFPAnchorIncomingTimeout(ht *lntest.HarnessTest) { carol.RPC.SettleInvoice(preimage[:]) // Bob should have settled his outgoing HTLC with Carol. + flakeInconsistentHTLCView() ht.AssertHTLCNotActive(bob, bcChanPoint, payHash[:]) // We'll now mine enough blocks to trigger Bob to force close channel @@ -850,6 +851,7 @@ func testSweepHTLCs(ht *lntest.HarnessTest) { carol.RPC.SettleInvoice(preimageSettled[:]) // Bob should have settled his outgoing HTLC with Carol. + flakeInconsistentHTLCView() ht.AssertHTLCNotActive(bob, bcChanPoint, payHashSettled[:]) // Let Carol go offline so we can focus on testing Bob's sweeping From e0296083ca4a10781d5350e0e8dcdc9462cb0184 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Sun, 9 Mar 2025 18:34:32 +0800 Subject: [PATCH 2/5] itest+lntest: fix flake in `coopCloseWithHTLCsWithRestart` Previously we'd restart Alice and then restart Bob, which means once Alice is shut down and started again before we shut down Bob, Bob will attempt to connect Alice since the connection is permanent, which could put the node in a weird state. We now make sure both nodes are shut down first, then bring them back online to avoid the above case. We may, however, create another test in the future to check the above case if needed. --- itest/lnd_coop_close_with_htlcs_test.go | 11 +++++++---- lntest/harness_assertion.go | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/itest/lnd_coop_close_with_htlcs_test.go b/itest/lnd_coop_close_with_htlcs_test.go index 8707fada7..27fcc4907 100644 --- a/itest/lnd_coop_close_with_htlcs_test.go +++ b/itest/lnd_coop_close_with_htlcs_test.go @@ -196,11 +196,14 @@ func coopCloseWithHTLCsWithRestart(ht *lntest.HarnessTest) { ht.AssertChannelInactive(bob, chanPoint) ht.AssertChannelInactive(alice, chanPoint) - // Now restart Alice and Bob. - ht.RestartNode(alice) - ht.RestartNode(bob) + // Shutdown both Alice and Bob. + restartAlice := ht.SuspendNode(alice) + restartBob := ht.SuspendNode(bob) - ht.AssertConnected(alice, bob) + // Once shutdown, restart and connect them. + require.NoError(ht, restartAlice()) + require.NoError(ht, restartBob()) + ht.EnsureConnected(alice, bob) // Show that both nodes still see the channel as waiting for close after // the restart. diff --git a/lntest/harness_assertion.go b/lntest/harness_assertion.go index fb5b03990..0523562b6 100644 --- a/lntest/harness_assertion.go +++ b/lntest/harness_assertion.go @@ -401,7 +401,7 @@ func (h *HarnessTest) assertChannelStatus(hn *node.HarnessNode, }, DefaultTimeout) require.NoErrorf(h, err, "%s: timeout checking for channel point: %v", - hn.Name(), cp) + hn.Name(), h.OutPointFromChannelPoint(cp)) return channel } From d122a2ecc60869f82d7a1ecd378d78ab14d7fb53 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Mon, 10 Mar 2025 01:42:58 +0800 Subject: [PATCH 3/5] itest+lntest: add `flakePaymentStreamReturnEarly` Fix the flake found in the `testRelayingBlindedError` and documents the flake found in other tests in one place. --- itest/flakes.go | 19 +++++++++++++++++++ itest/lnd_channel_backup_test.go | 8 +------- itest/lnd_route_blinding_test.go | 4 ++++ itest/lnd_wipe_fwdpkgs_test.go | 10 +--------- lntest/harness_assertion.go | 3 ++- 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/itest/flakes.go b/itest/flakes.go index ff5db12a1..23666cb6b 100644 --- a/itest/flakes.go +++ b/itest/flakes.go @@ -142,3 +142,22 @@ func flakeInconsistentHTLCView() { // the ListChannels. time.Sleep(2 * time.Second) } + +// flakePaymentStreamReturnEarly documents a flake found in the test which +// relies on a given payment to be settled before testing other state changes. +// The issue comes from the payment stream created from the RPC `SendPaymentV2` +// gives premature settled event for a given payment, which is found in, +// - if we force close the channel immediately, we may get an error because +// the commitment dance is not finished. +// - if we subscribe HTLC events immediately, we may get extra events, which +// is also related to the above unfinished commitment dance. +// +// TODO(yy): Make sure we only mark the payment being settled once the +// commitment dance is finished. In addition, we should also fix the exit hop +// logic in the invoice settlement flow to make sure the invoice is only marked +// as settled after the commitment dance is finished. +func flakePaymentStreamReturnEarly() { + // Sleep 2 seconds so the pending HTLCs will be removed from the + // commitment. + time.Sleep(2 * time.Second) +} diff --git a/itest/lnd_channel_backup_test.go b/itest/lnd_channel_backup_test.go index 8b12f4fdc..f84b65ce3 100644 --- a/itest/lnd_channel_backup_test.go +++ b/itest/lnd_channel_backup_test.go @@ -1224,13 +1224,7 @@ func testDataLossProtection(ht *lntest.HarnessTest) { // payment hashes. ht.CompletePaymentRequests(carol, payReqs[numInvoices/2:]) - // TODO(yy): remove the sleep once the following bug is fixed. - // - // While the payment is reported as settled, the commitment - // dance may not be finished, which leaves several HTLCs in the - // commitment. Later on, when Carol force closes this channel, - // she would have HTLCs there and the test won't pass. - time.Sleep(2 * time.Second) + flakePaymentStreamReturnEarly() // Now we shutdown Dave, copying over the its temporary // database state which has the *prior* channel state over his diff --git a/itest/lnd_route_blinding_test.go b/itest/lnd_route_blinding_test.go index 9792b0ca2..d46a84c16 100644 --- a/itest/lnd_route_blinding_test.go +++ b/itest/lnd_route_blinding_test.go @@ -667,6 +667,10 @@ func testRelayingBlindedError(ht *lntest.HarnessTest) { // so that she can't forward the payment to Dave. testCase.drainCarolLiquidity(false) + // NOTE: The above draining requires Carol to send a payment, which may + // create extra events, causing the `AssertHtlcEvents` to fail below. + flakePaymentStreamReturnEarly() + // Subscribe to Carol's HTLC events so that we can observe the payment // coming in. carolEvents := testCase.carol.RPC.SubscribeHtlcEvents() diff --git a/itest/lnd_wipe_fwdpkgs_test.go b/itest/lnd_wipe_fwdpkgs_test.go index 9aafd960a..337331fd5 100644 --- a/itest/lnd_wipe_fwdpkgs_test.go +++ b/itest/lnd_wipe_fwdpkgs_test.go @@ -1,8 +1,6 @@ package itest import ( - "time" - "github.com/lightningnetwork/lnd/chainreg" "github.com/lightningnetwork/lnd/lnrpc" "github.com/lightningnetwork/lnd/lntest" @@ -49,13 +47,7 @@ func testWipeForwardingPackages(ht *lntest.HarnessTest) { ht.CompletePaymentRequests(alice, []string{resp.PaymentRequest}) } - // TODO(yy): remove the sleep once the following bug is fixed. - // When the invoice is reported settled, the commitment dance is not - // yet finished, which can cause an error when closing the channel, - // saying there's active HTLCs. We need to investigate this issue and - // reverse the order to, first finish the commitment dance, then report - // the invoice as settled. - time.Sleep(2 * time.Second) + flakePaymentStreamReturnEarly() // Firstly, Bob force closes the channel. ht.CloseChannelAssertPending(bob, chanPointAB, true) diff --git a/lntest/harness_assertion.go b/lntest/harness_assertion.go index 0523562b6..ddbc4a325 100644 --- a/lntest/harness_assertion.go +++ b/lntest/harness_assertion.go @@ -2329,7 +2329,8 @@ func (h *HarnessTest) AssertHtlcEvents(client rpc.HtlcEventsClient, event := h.ReceiveHtlcEvent(client) require.Containsf(h, eventTypes, event.EventType, - "wrong event type, got %v", userType, event.EventType) + "wrong event type, want %v, got %v", userType, + event.EventType) events = append(events, event) From 1db110f4cb4dafafe748b933a83273c21d3ef72d Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Mon, 10 Mar 2025 15:42:23 +0800 Subject: [PATCH 4/5] itest: fix flake in `testFundingExpiryBlocksOnPending` We need to make sure to wait for the RPC to show the updated results. --- itest/lnd_open_channel_test.go | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/itest/lnd_open_channel_test.go b/itest/lnd_open_channel_test.go index e46acd682..9377fd4d8 100644 --- a/itest/lnd_open_channel_test.go +++ b/itest/lnd_open_channel_test.go @@ -837,6 +837,24 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) { param := lntest.OpenChannelParams{Amt: 100000} ht.OpenChannelAssertPending(alice, bob, param) + // assertExpiry is a helper closure to assert the FundingExpiryBlocks + // has been updated to the expected value. + assertExpiry := func(hn *node.HarnessNode, expected int32) { + err := wait.NoError(func() error { + pending := ht.AssertNumPendingOpenChannels(hn, 1) + expiry := pending[0].FundingExpiryBlocks + + // Exit early if matched. + if expected == expiry { + return nil + } + + return fmt.Errorf("want %v, got %v", expected, expiry) + }, wait.DefaultTimeout) + require.NoErrorf(ht, err, "%s: assert FundingExpiryBlocks "+ + "timeout", hn.Name()) + } + // At this point, the channel's funding transaction will have been // broadcast, but not confirmed. Alice and Bob's nodes should reflect // this when queried via RPC. FundingExpiryBlock should decrease @@ -846,15 +864,16 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) { const numEmptyBlocks = 3 for i := int32(0); i < numEmptyBlocks; i++ { expectedVal := lncfg.DefaultMaxWaitNumBlocksFundingConf - i - pending := ht.AssertNumPendingOpenChannels(alice, 1) - require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks) - pending = ht.AssertNumPendingOpenChannels(bob, 1) - require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks) + + // Assert Alice and Bob have updated the FundingExpiryBlocks. + assertExpiry(alice, expectedVal) + assertExpiry(bob, expectedVal) + ht.MineEmptyBlocks(1) } - // Mine 1 block to confirm the funding transaction, and then close the - // channel. + // Mine 1 block to confirm the funding transaction so clean up the + // mempool. ht.MineBlocksAndAssertNumTxes(1, 1) } From 49382a8f67b7de82aa2bbaa76661865d9deab7cb Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Wed, 12 Mar 2025 01:03:18 +0800 Subject: [PATCH 5/5] gomod: update `tor` to `v1.1.6` --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index d913b30cf..1953a0a6c 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,7 @@ require ( github.com/lightningnetwork/lnd/sqldb v1.0.7 github.com/lightningnetwork/lnd/ticker v1.1.1 github.com/lightningnetwork/lnd/tlv v1.3.0 - github.com/lightningnetwork/lnd/tor v1.1.5 + github.com/lightningnetwork/lnd/tor v1.1.6 github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796 github.com/miekg/dns v1.1.43 github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index 656c408ab..d7b4a4553 100644 --- a/go.sum +++ b/go.sum @@ -470,8 +470,8 @@ github.com/lightningnetwork/lnd/ticker v1.1.1 h1:J/b6N2hibFtC7JLV77ULQp++QLtCwT6 github.com/lightningnetwork/lnd/ticker v1.1.1/go.mod h1:waPTRAAcwtu7Ji3+3k+u/xH5GHovTsCoSVpho0KDvdA= github.com/lightningnetwork/lnd/tlv v1.3.0 h1:exS/KCPEgpOgviIttfiXAPaUqw2rHQrnUOpP7HPBPiY= github.com/lightningnetwork/lnd/tlv v1.3.0/go.mod h1:pJuiBj1ecr1WWLOtcZ+2+hu9Ey25aJWFIsjmAoPPnmc= -github.com/lightningnetwork/lnd/tor v1.1.5 h1:kZ1Ab4EcPFCny3a179rZGKeDUlgYilEcfKx3tD5+L+k= -github.com/lightningnetwork/lnd/tor v1.1.5/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI= +github.com/lightningnetwork/lnd/tor v1.1.6 h1:WHUumk7WgU6BUFsqHuqszI9P6nfhMeIG+rjJBlVE6OE= +github.com/lightningnetwork/lnd/tor v1.1.6/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI= github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796 h1:sjOGyegMIhvgfq5oaue6Td+hxZuf3tDC8lAPrFldqFw= github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796/go.mod h1:3p7ZTf9V1sNPI5H8P3NkTFF4LuwMdPl2DodF60qAKqY= github.com/ltcsuite/ltcutil v0.0.0-20181217130922-17f3b04680b6/go.mod h1:8Vg/LTOO0KYa/vlHWJ6XZAevPQThGH5sufO0Hrou/lA=