Merge pull request #9600 from lightningnetwork/yy-more-flakes

lntest+itest: document and fix more flakes
This commit is contained in:
Yong 2025-03-21 03:23:02 +08:00 committed by GitHub
commit 09b6745086
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 104 additions and 31 deletions

2
go.mod
View File

@ -43,7 +43,7 @@ require (
github.com/lightningnetwork/lnd/sqldb v1.0.7
github.com/lightningnetwork/lnd/ticker v1.1.1
github.com/lightningnetwork/lnd/tlv v1.3.0
github.com/lightningnetwork/lnd/tor v1.1.5
github.com/lightningnetwork/lnd/tor v1.1.6
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796
github.com/miekg/dns v1.1.43
github.com/pkg/errors v0.9.1

4
go.sum
View File

@ -470,8 +470,8 @@ github.com/lightningnetwork/lnd/ticker v1.1.1 h1:J/b6N2hibFtC7JLV77ULQp++QLtCwT6
github.com/lightningnetwork/lnd/ticker v1.1.1/go.mod h1:waPTRAAcwtu7Ji3+3k+u/xH5GHovTsCoSVpho0KDvdA=
github.com/lightningnetwork/lnd/tlv v1.3.0 h1:exS/KCPEgpOgviIttfiXAPaUqw2rHQrnUOpP7HPBPiY=
github.com/lightningnetwork/lnd/tlv v1.3.0/go.mod h1:pJuiBj1ecr1WWLOtcZ+2+hu9Ey25aJWFIsjmAoPPnmc=
github.com/lightningnetwork/lnd/tor v1.1.5 h1:kZ1Ab4EcPFCny3a179rZGKeDUlgYilEcfKx3tD5+L+k=
github.com/lightningnetwork/lnd/tor v1.1.5/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI=
github.com/lightningnetwork/lnd/tor v1.1.6 h1:WHUumk7WgU6BUFsqHuqszI9P6nfhMeIG+rjJBlVE6OE=
github.com/lightningnetwork/lnd/tor v1.1.6/go.mod h1:qSRB8llhAK+a6kaTPWOLLXSZc6Hg8ZC0mq1sUQ/8JfI=
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796 h1:sjOGyegMIhvgfq5oaue6Td+hxZuf3tDC8lAPrFldqFw=
github.com/ltcsuite/ltcd v0.0.0-20190101042124-f37f8bf35796/go.mod h1:3p7ZTf9V1sNPI5H8P3NkTFF4LuwMdPl2DodF60qAKqY=
github.com/ltcsuite/ltcutil v0.0.0-20181217130922-17f3b04680b6/go.mod h1:8Vg/LTOO0KYa/vlHWJ6XZAevPQThGH5sufO0Hrou/lA=

View File

@ -104,3 +104,60 @@ func flakeSkipPendingSweepsCheckDarwin(ht *lntest.HarnessTest,
func isDarwin() bool {
return runtime.GOOS == "darwin"
}
// flakeInconsistentHTLCView documents a flake found that the `ListChannels` RPC
// can give inaccurate HTLC states, which is found when we call
// `AssertHTLCNotActive` after a commitment dance is finished. Suppose Carol is
// settling an invoice with Bob, from Bob's PoV, a typical healthy settlement
// flow goes like this:
//
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received UpdateFulfillHTLC
// [DBG] HSWC switch.go:1315: Closed completed SETTLE circuit for ...
// [INF] HSWC switch.go:3044: Forwarded HTLC...
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received CommitSig
// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending RevokeAndAck
// [DBG] PEER brontide.go:2412: Peer([Carol]): Sending CommitSig
// [DBG] PEER brontide.go:2412: Peer([Carol]): Received RevokeAndAck
// [DBG] HSWC link.go:3617: ChannelLink([ChanPoint: Bob=>Carol]): settle-fail-filter: count=1, filter=[0]
// [DBG] HSWC switch.go:3001: Circuit is closing for packet...
//
// Bob receives the preimage, closes the circuit, and exchanges commit sig and
// revoke msgs with Carol. Once Bob receives the `CommitSig` from Carol, the
// HTLC should be removed from his `LocalCommitment` via
// `RevokeCurrentCommitment`.
//
// However, in the test where `AssertHTLCNotActive` is called, although the
// above process is finished, the `ListChannels“ still finds the HTLC. Also note
// that the RPC makes direct call to the channeldb without any locks, which
// should be fine as the struct `OpenChannel.LocalCommitment` is passed by
// value, although we need to double check.
//
// TODO(yy): In order to fix it, we should make the RPC share the same view of
// our channel state machine. Instead of making DB queries, it should instead
// use `lnwallet.LightningChannel` instead to stay consistent.
//
//nolint:ll
func flakeInconsistentHTLCView() {
// Perform a sleep so the commiment dance can be finished before we call
// the ListChannels.
time.Sleep(2 * time.Second)
}
// flakePaymentStreamReturnEarly documents a flake found in the test which
// relies on a given payment to be settled before testing other state changes.
// The issue comes from the payment stream created from the RPC `SendPaymentV2`
// gives premature settled event for a given payment, which is found in,
// - if we force close the channel immediately, we may get an error because
// the commitment dance is not finished.
// - if we subscribe HTLC events immediately, we may get extra events, which
// is also related to the above unfinished commitment dance.
//
// TODO(yy): Make sure we only mark the payment being settled once the
// commitment dance is finished. In addition, we should also fix the exit hop
// logic in the invoice settlement flow to make sure the invoice is only marked
// as settled after the commitment dance is finished.
func flakePaymentStreamReturnEarly() {
// Sleep 2 seconds so the pending HTLCs will be removed from the
// commitment.
time.Sleep(2 * time.Second)
}

View File

@ -1224,13 +1224,7 @@ func testDataLossProtection(ht *lntest.HarnessTest) {
// payment hashes.
ht.CompletePaymentRequests(carol, payReqs[numInvoices/2:])
// TODO(yy): remove the sleep once the following bug is fixed.
//
// While the payment is reported as settled, the commitment
// dance may not be finished, which leaves several HTLCs in the
// commitment. Later on, when Carol force closes this channel,
// she would have HTLCs there and the test won't pass.
time.Sleep(2 * time.Second)
flakePaymentStreamReturnEarly()
// Now we shutdown Dave, copying over the its temporary
// database state which has the *prior* channel state over his

View File

@ -222,11 +222,14 @@ func coopCloseWithHTLCsWithRestart(ht *lntest.HarnessTest, alice,
ht.AssertChannelInactive(bob, chanPoint)
ht.AssertChannelInactive(alice, chanPoint)
// Now restart Alice and Bob.
ht.RestartNode(alice)
ht.RestartNode(bob)
// Shutdown both Alice and Bob.
restartAlice := ht.SuspendNode(alice)
restartBob := ht.SuspendNode(bob)
ht.AssertConnected(alice, bob)
// Once shutdown, restart and connect them.
require.NoError(ht, restartAlice())
require.NoError(ht, restartBob())
ht.EnsureConnected(alice, bob)
// Show that both nodes still see the channel as waiting for close after
// the restart.

View File

@ -837,6 +837,24 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) {
param := lntest.OpenChannelParams{Amt: 100000}
ht.OpenChannelAssertPending(alice, bob, param)
// assertExpiry is a helper closure to assert the FundingExpiryBlocks
// has been updated to the expected value.
assertExpiry := func(hn *node.HarnessNode, expected int32) {
err := wait.NoError(func() error {
pending := ht.AssertNumPendingOpenChannels(hn, 1)
expiry := pending[0].FundingExpiryBlocks
// Exit early if matched.
if expected == expiry {
return nil
}
return fmt.Errorf("want %v, got %v", expected, expiry)
}, wait.DefaultTimeout)
require.NoErrorf(ht, err, "%s: assert FundingExpiryBlocks "+
"timeout", hn.Name())
}
// At this point, the channel's funding transaction will have been
// broadcast, but not confirmed. Alice and Bob's nodes should reflect
// this when queried via RPC. FundingExpiryBlock should decrease
@ -846,15 +864,16 @@ func testFundingExpiryBlocksOnPending(ht *lntest.HarnessTest) {
const numEmptyBlocks = 3
for i := int32(0); i < numEmptyBlocks; i++ {
expectedVal := lncfg.DefaultMaxWaitNumBlocksFundingConf - i
pending := ht.AssertNumPendingOpenChannels(alice, 1)
require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks)
pending = ht.AssertNumPendingOpenChannels(bob, 1)
require.Equal(ht, expectedVal, pending[0].FundingExpiryBlocks)
// Assert Alice and Bob have updated the FundingExpiryBlocks.
assertExpiry(alice, expectedVal)
assertExpiry(bob, expectedVal)
ht.MineEmptyBlocks(1)
}
// Mine 1 block to confirm the funding transaction, and then close the
// channel.
// Mine 1 block to confirm the funding transaction so clean up the
// mempool.
ht.MineBlocksAndAssertNumTxes(1, 1)
}

View File

@ -667,6 +667,10 @@ func testRelayingBlindedError(ht *lntest.HarnessTest) {
// so that she can't forward the payment to Dave.
testCase.drainCarolLiquidity(false)
// NOTE: The above draining requires Carol to send a payment, which may
// create extra events, causing the `AssertHtlcEvents` to fail below.
flakePaymentStreamReturnEarly()
// Subscribe to Carol's HTLC events so that we can observe the payment
// coming in.
carolEvents := testCase.carol.RPC.SubscribeHtlcEvents()
@ -815,6 +819,7 @@ func testErrorHandlingOnChainFailure(ht *lntest.HarnessTest) {
ht.MineBlocksAndAssertNumTxes(1, 1)
// Assert that the HTLC has cleared.
flakeInconsistentHTLCView()
ht.AssertHTLCNotActive(bob, testCase.channels[0], hash[:])
ht.AssertHTLCNotActive(alice, testCase.channels[0], hash[:])

View File

@ -500,6 +500,7 @@ func testSweepCPFPAnchorIncomingTimeout(ht *lntest.HarnessTest) {
carol.RPC.SettleInvoice(preimage[:])
// Bob should have settled his outgoing HTLC with Carol.
flakeInconsistentHTLCView()
ht.AssertHTLCNotActive(bob, bcChanPoint, payHash[:])
// We'll now mine enough blocks to trigger Bob to force close channel
@ -851,6 +852,7 @@ func testSweepHTLCs(ht *lntest.HarnessTest) {
carol.RPC.SettleInvoice(preimageSettled[:])
// Bob should have settled his outgoing HTLC with Carol.
flakeInconsistentHTLCView()
ht.AssertHTLCNotActive(bob, bcChanPoint, payHashSettled[:])
// Let Carol go offline so we can focus on testing Bob's sweeping

View File

@ -1,8 +1,6 @@
package itest
import (
"time"
"github.com/lightningnetwork/lnd/chainreg"
"github.com/lightningnetwork/lnd/lnrpc"
"github.com/lightningnetwork/lnd/lntest"
@ -49,13 +47,7 @@ func testWipeForwardingPackages(ht *lntest.HarnessTest) {
ht.CompletePaymentRequests(alice, []string{resp.PaymentRequest})
}
// TODO(yy): remove the sleep once the following bug is fixed.
// When the invoice is reported settled, the commitment dance is not
// yet finished, which can cause an error when closing the channel,
// saying there's active HTLCs. We need to investigate this issue and
// reverse the order to, first finish the commitment dance, then report
// the invoice as settled.
time.Sleep(2 * time.Second)
flakePaymentStreamReturnEarly()
// Firstly, Bob force closes the channel.
ht.CloseChannelAssertPending(bob, chanPointAB, true)

View File

@ -401,7 +401,7 @@ func (h *HarnessTest) assertChannelStatus(hn *node.HarnessNode,
}, DefaultTimeout)
require.NoErrorf(h, err, "%s: timeout checking for channel point: %v",
hn.Name(), cp)
hn.Name(), h.OutPointFromChannelPoint(cp))
return channel
}
@ -2329,7 +2329,8 @@ func (h *HarnessTest) AssertHtlcEvents(client rpc.HtlcEventsClient,
event := h.ReceiveHtlcEvent(client)
require.Containsf(h, eventTypes, event.EventType,
"wrong event type, got %v", userType, event.EventType)
"wrong event type, want %v, got %v", userType,
event.EventType)
events = append(events, event)