Merge pull request #5509 from guggero/itest-flake-hunt

itest: neutrino anchor output flake fix
This commit is contained in:
Oliver Gugger 2021-08-05 17:44:53 +02:00 committed by GitHub
commit 31063c269e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 147 additions and 97 deletions

View File

@ -52,6 +52,9 @@ you.
* [The itest error whitelist check was removed to reduce the number of failed
Travis builds](https://github.com/lightningnetwork/lnd/pull/5588).
* [A flake in the Neutrino integration tests with anchor sweeps was
addressed](https://github.com/lightningnetwork/lnd/pull/5509).
# Documentation
* [Outdated warning about unsupported pruning was replaced with clarification that LND **does**

View File

@ -772,7 +772,15 @@ func (n *NetworkHarness) RestartNode(node *HarnessNode, callback func() error,
unlockReq.RecoveryWindow = 1000
}
return node.Unlock(context.Background(), unlockReq)
if err := node.Unlock(context.Background(), unlockReq); err != nil {
return err
}
// Give the node some time to catch up with the chain before we continue
// with the tests.
ctxc, done := context.WithTimeout(context.Background(), DefaultTimeout)
defer done()
return node.WaitForBlockchainSync(ctxc)
}
// RestartNodeNoUnlock attempts to restart a lightning node by shutting it down
@ -840,7 +848,7 @@ func (n *NetworkHarness) StopNode(node *HarnessNode) error {
// SaveProfilesPages hits profiles pages of all active nodes and writes it to
// disk using a similar naming scheme as to the regular set of logs.
func (n *NetworkHarness) SaveProfilesPages() {
func (n *NetworkHarness) SaveProfilesPages(t *testing.T) {
// Only write gorutine dumps if flag is active.
if !(*goroutineDump) {
return
@ -848,7 +856,8 @@ func (n *NetworkHarness) SaveProfilesPages() {
for _, node := range n.activeNodes {
if err := saveProfilesPage(node); err != nil {
fmt.Printf("Error: %v\n", err)
t.Logf("Logging follow-up error only, see rest of "+
"the log for actual cause: %v\n", err)
}
}
}

View File

@ -5,6 +5,7 @@ import (
"encoding/hex"
"fmt"
"io"
"math"
"sync/atomic"
"testing"
"time"
@ -1304,7 +1305,6 @@ func assertDLPExecuted(net *lntest.NetworkHarness, t *harnessTest,
// Dave should sweep his funds immediately, as they are not timelocked.
// We also expect Dave to sweep his anchor, if present.
_, err = waitForNTxsInMempool(
net.Miner.Client, expectedTxes, minerMempoolTimeout,
)
@ -1340,7 +1340,7 @@ func assertDLPExecuted(net *lntest.NetworkHarness, t *harnessTest,
// After the Carol's output matures, she should also reclaim her funds.
//
// The commit sweep resolver publishes the sweep tx at defaultCSV-1 and
// we already mined one block after the commitmment was published, so
// we already mined one block after the commitment was published, so
// take that into account.
mineBlocks(t, net, defaultCSV-1-1, 0)
carolSweep, err := waitForTxInMempool(
@ -1362,6 +1362,16 @@ func assertDLPExecuted(net *lntest.NetworkHarness, t *harnessTest,
}
carolBalance := carolBalResp.ConfirmedBalance
// With Neutrino we don't get a backend error when trying to
// publish an orphan TX (which is what the sweep for the remote
// anchor is since the remote commitment TX was not broadcast).
// That's why the wallet still sees that as unconfirmed and we
// need to count the total balance instead of the confirmed.
if net.BackendCfg.Name() == lntest.NeutrinoBackendName {
carolBalance = carolBalResp.TotalBalance
}
if carolBalance <= carolStartingBalance {
return fmt.Errorf("expected carol to have balance "+
"above %d, instead had %v", carolStartingBalance,
@ -1714,3 +1724,30 @@ func getPaymentResult(stream routerrpc.Router_SendPaymentV2Client) (
}
}
}
// assertNumUTXOs waits for the given number of UTXOs to be available or fails
// if that isn't the case before the default timeout.
func assertNumUTXOs(t *testing.T, node *lntest.HarnessNode, expectedUtxos int) {
ctxb := context.Background()
ctxt, cancel := context.WithTimeout(ctxb, defaultTimeout)
defer cancel()
err := wait.NoError(func() error {
resp, err := node.ListUnspent( // nolint:staticcheck
ctxt, &lnrpc.ListUnspentRequest{
MinConfs: 1,
MaxConfs: math.MaxInt32,
},
)
if err != nil {
return fmt.Errorf("error listing unspent: %v", err)
}
if len(resp.Utxos) != expectedUtxos {
return fmt.Errorf("not enough UTXOs, got %d wanted %d",
len(resp.Utxos), expectedUtxos)
}
return nil
}, defaultTimeout)
require.NoError(t, err, "wait for listunspent")
}

View File

@ -847,6 +847,12 @@ func testChanRestoreScenario(t *harnessTest, net *lntest.NetworkHarness,
ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
net.SendCoins(ctxt, t.t, btcutil.SatoshiPerBitcoin, carol)
// For the anchor output case we need two UTXOs for Carol so she can
// sweep both the local and remote anchor.
if testCase.anchorCommit {
net.SendCoins(ctxt, t.t, btcutil.SatoshiPerBitcoin, carol)
}
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
net.SendCoins(ctxt, t.t, btcutil.SatoshiPerBitcoin, dave)
@ -1077,6 +1083,12 @@ func testChanRestoreScenario(t *harnessTest, net *lntest.NetworkHarness,
err = restartCarol()
require.NoError(t.t, err)
numUTXOs := 1
if testCase.anchorCommit {
numUTXOs = 2
}
assertNumUTXOs(t.t, carol, numUTXOs)
// Now that we have our new node up, we expect that it'll re-connect to
// Carol automatically based on the restored backup.
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)

View File

@ -314,6 +314,9 @@ func channelForceClosureTest(net *lntest.NetworkHarness, t *harnessTest,
ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
net.ConnectNodes(ctxt, t.t, alice, carol)
// We need one additional UTXO for sweeping the remote anchor.
net.SendCoins(ctxt, t.t, btcutil.SatoshiPerBitcoin, alice)
// Before we start, obtain Carol's current wallet balance, we'll check
// to ensure that at the end of the force closure by Alice, Carol
// recognizes his new on-chain output.
@ -484,6 +487,12 @@ func channelForceClosureTest(net *lntest.NetworkHarness, t *harnessTest,
t.Fatalf("Node restart failed: %v", err)
}
// To give the neutrino backend some time to catch up with the chain, we
// wait here until we have enough UTXOs to actually sweep the local and
// remote anchor.
const expectedUtxos = 2
assertNumUTXOs(t.t, alice, expectedUtxos)
// Mine a block which should confirm the commitment transaction
// broadcast as a result of the force closure. If there are anchors, we
// also expect the anchor sweep tx to be in the mempool.
@ -497,9 +506,7 @@ func channelForceClosureTest(net *lntest.NetworkHarness, t *harnessTest,
sweepTxns, err := getNTxsFromMempool(
net.Miner.Client, expectedTxes, minerMempoolTimeout,
)
if err != nil {
t.Fatalf("failed to find commitment in miner mempool: %v", err)
}
require.NoError(t.t, err, "sweep txns in miner mempool")
// Verify fee rate of the commitment tx plus anchor if present.
var totalWeight, totalFee int64
@ -520,9 +527,7 @@ func channelForceClosureTest(net *lntest.NetworkHarness, t *harnessTest,
// Find alice's commit sweep and anchor sweep (if present) in the
// mempool.
aliceCloseTx := waitingClose.Commitments.LocalTxid
_, aliceAnchor := findCommitAndAnchor(
t, net, sweepTxns, aliceCloseTx,
)
_, aliceAnchor := findCommitAndAnchor(t, net, sweepTxns, aliceCloseTx)
// If we expect anchors, add alice's anchor to our expected set of
// reports.

View File

@ -610,7 +610,7 @@ func testMaxPendingChannels(net *lntest.NetworkHarness, t *harnessTest) {
}
}
// testGarbageCollectLinkNodes tests that we properly garbase collect link nodes
// testGarbageCollectLinkNodes tests that we properly garbage collect link nodes
// from the database and the set of persistent connections within the server.
func testGarbageCollectLinkNodes(net *lntest.NetworkHarness, t *harnessTest) {
ctxb := context.Background()
@ -623,8 +623,7 @@ func testGarbageCollectLinkNodes(net *lntest.NetworkHarness, t *harnessTest) {
// cooperatively closed.
ctxt, _ := context.WithTimeout(ctxb, channelOpenTimeout)
coopChanPoint := openChannelAndAssert(
ctxt, t, net, net.Alice, net.Bob,
lntest.OpenChannelParams{
ctxt, t, net, net.Alice, net.Bob, lntest.OpenChannelParams{
Amt: chanAmt,
},
)
@ -639,8 +638,7 @@ func testGarbageCollectLinkNodes(net *lntest.NetworkHarness, t *harnessTest) {
// closed.
ctxt, _ = context.WithTimeout(ctxb, channelOpenTimeout)
forceCloseChanPoint := openChannelAndAssert(
ctxt, t, net, net.Alice, carol,
lntest.OpenChannelParams{
ctxt, t, net, net.Alice, carol, lntest.OpenChannelParams{
Amt: chanAmt,
},
)
@ -654,8 +652,7 @@ func testGarbageCollectLinkNodes(net *lntest.NetworkHarness, t *harnessTest) {
net.ConnectNodes(ctxt, t.t, net.Alice, dave)
ctxt, _ = context.WithTimeout(ctxb, channelOpenTimeout)
persistentChanPoint := openChannelAndAssert(
ctxt, t, net, net.Alice, dave,
lntest.OpenChannelParams{
ctxt, t, net, net.Alice, dave, lntest.OpenChannelParams{
Amt: chanAmt,
},
)
@ -1620,30 +1617,22 @@ func testAbandonChannel(net *lntest.NetworkHarness, t *harnessTest) {
ctxt, t, net, net.Alice, net.Bob, channelParam,
)
txid, err := lnrpc.GetChanPointFundingTxid(chanPoint)
if err != nil {
t.Fatalf("unable to get txid: %v", err)
}
require.NoError(t.t, err, "alice bob get channel funding txid")
chanPointStr := fmt.Sprintf("%v:%v", txid, chanPoint.OutputIndex)
// Wait for channel to be confirmed open.
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
err = net.Alice.WaitForNetworkChannelOpen(ctxt, chanPoint)
if err != nil {
t.Fatalf("alice didn't report channel: %v", err)
}
require.NoError(t.t, err, "alice wait for network channel open")
err = net.Bob.WaitForNetworkChannelOpen(ctxt, chanPoint)
if err != nil {
t.Fatalf("bob didn't report channel: %v", err)
}
require.NoError(t.t, err, "bob wait for network channel open")
// Now that the channel is open, we'll obtain its channel ID real quick
// so we can use it to query the graph below.
listReq := &lnrpc.ListChannelsRequest{}
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
aliceChannelList, err := net.Alice.ListChannels(ctxt, listReq)
if err != nil {
t.Fatalf("unable to fetch alice's channels: %v", err)
}
require.NoError(t.t, err)
var chanID uint64
for _, channel := range aliceChannelList.Channels {
if channel.ChannelPoint == chanPointStr {
@ -1651,18 +1640,13 @@ func testAbandonChannel(net *lntest.NetworkHarness, t *harnessTest) {
}
}
if chanID == 0 {
t.Fatalf("unable to find channel")
}
require.NotZero(t.t, chanID, "unable to find channel")
// To make sure the channel is removed from the backup file as well when
// being abandoned, grab a backup snapshot so we can compare it with the
// later state.
bkupBefore, err := ioutil.ReadFile(net.Alice.ChanBackupPath())
if err != nil {
t.Fatalf("could not get channel backup before abandoning "+
"channel: %v", err)
}
require.NoError(t.t, err, "channel backup before abandoning channel")
// Send request to abandon channel.
abandonChannelRequest := &lnrpc.AbandonChannelRequest{
@ -1671,44 +1655,31 @@ func testAbandonChannel(net *lntest.NetworkHarness, t *harnessTest) {
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
_, err = net.Alice.AbandonChannel(ctxt, abandonChannelRequest)
if err != nil {
t.Fatalf("unable to abandon channel: %v", err)
}
require.NoError(t.t, err, "abandon channel")
// Assert that channel in no longer open.
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
aliceChannelList, err = net.Alice.ListChannels(ctxt, listReq)
if err != nil {
t.Fatalf("unable to list channels: %v", err)
}
if len(aliceChannelList.Channels) != 0 {
t.Fatalf("alice should only have no channels open, "+
"instead she has %v",
len(aliceChannelList.Channels))
}
require.NoError(t.t, err, "list channels")
require.Zero(t.t, len(aliceChannelList.Channels), "alice open channels")
// Assert that channel is not pending closure.
pendingReq := &lnrpc.PendingChannelsRequest{}
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
alicePendingList, err := net.Alice.PendingChannels(ctxt, pendingReq)
if err != nil {
t.Fatalf("unable to list pending channels: %v", err)
}
if len(alicePendingList.PendingClosingChannels) != 0 { //nolint:staticcheck
t.Fatalf("alice should only have no pending closing channels, "+
"instead she has %v",
len(alicePendingList.PendingClosingChannels)) //nolint:staticcheck
}
if len(alicePendingList.PendingForceClosingChannels) != 0 {
t.Fatalf("alice should only have no pending force closing "+
"channels instead she has %v",
len(alicePendingList.PendingForceClosingChannels))
}
if len(alicePendingList.WaitingCloseChannels) != 0 {
t.Fatalf("alice should only have no waiting close "+
"channels instead she has %v",
len(alicePendingList.WaitingCloseChannels))
}
require.NoError(t.t, err, "alice list pending channels")
require.Zero(
t.t, len(alicePendingList.PendingClosingChannels), //nolint:staticcheck
"alice pending channels",
)
require.Zero(
t.t, len(alicePendingList.PendingForceClosingChannels),
"alice pending force close channels",
)
require.Zero(
t.t, len(alicePendingList.WaitingCloseChannels),
"alice waiting close channels",
)
// Assert that channel is listed as abandoned.
closedReq := &lnrpc.ClosedChannelsRequest{
@ -1716,45 +1687,49 @@ func testAbandonChannel(net *lntest.NetworkHarness, t *harnessTest) {
}
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
aliceClosedList, err := net.Alice.ClosedChannels(ctxt, closedReq)
if err != nil {
t.Fatalf("unable to list closed channels: %v", err)
}
if len(aliceClosedList.Channels) != 1 {
t.Fatalf("alice should only have a single abandoned channel, "+
"instead she has %v",
len(aliceClosedList.Channels))
}
require.NoError(t.t, err, "alice list closed channels")
require.Len(t.t, aliceClosedList.Channels, 1, "alice closed channels")
// Ensure that the channel can no longer be found in the channel graph.
_, err = net.Alice.GetChanInfo(ctxb, &lnrpc.ChanInfoRequest{
ChanId: chanID,
})
if !strings.Contains(err.Error(), "marked as zombie") {
t.Fatalf("channel shouldn't be found in the channel " +
"graph!")
}
// Make sure the channel is no longer in the channel backup list.
err = wait.Predicate(func() bool {
bkupAfter, err := ioutil.ReadFile(net.Alice.ChanBackupPath())
if err != nil {
t.Fatalf("could not get channel backup before "+
"abandoning channel: %v", err)
err = wait.NoError(func() error {
_, err := net.Alice.GetChanInfo(ctxb, &lnrpc.ChanInfoRequest{
ChanId: chanID,
})
if err == nil {
return fmt.Errorf("expected error but got nil")
}
return len(bkupAfter) < len(bkupBefore)
if !strings.Contains(err.Error(), "marked as zombie") {
return fmt.Errorf("expected error to contain '%s' but "+
"was '%v'", "marked as zombie", err)
}
return nil
}, defaultTimeout)
if err != nil {
t.Fatalf("channel wasn't removed from channel backup file")
}
require.NoError(t.t, err, "marked as zombie")
// Make sure the channel is no longer in the channel backup list.
err = wait.NoError(func() error {
bkupAfter, err := ioutil.ReadFile(net.Alice.ChanBackupPath())
if err != nil {
return fmt.Errorf("could not get channel backup "+
"before abandoning channel: %v", err)
}
if len(bkupAfter) >= len(bkupBefore) {
return fmt.Errorf("expected backups after to be less "+
"than %d but was %d", bkupBefore, bkupAfter)
}
return nil
}, defaultTimeout)
require.NoError(t.t, err, "channel removed from backup file")
// Calling AbandonChannel again, should result in no new errors, as the
// channel has already been removed.
ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
_, err = net.Alice.AbandonChannel(ctxt, abandonChannelRequest)
if err != nil {
t.Fatalf("unable to abandon channel a second time: %v", err)
}
require.NoError(t.t, err, "abandon channel second time")
// Now that we're done with the test, the channel can be closed. This
// is necessary to avoid unexpected outcomes of other tests that use

View File

@ -18,7 +18,9 @@
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to advance state: channel not found
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to broadcast close tx: Transaction rejected: output already spent
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to force close: channel not found
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to mark commitment broadcasted: channel not found
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to progress *contractcourt.commitSweepResolver: remote party swept utxo
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to progress *contractcourt.commitSweepResolver: chainntnfs: system interrupt while attempting to register for block epoch notification.
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to progress *contractcourt.htlcOutgoingContestResolver: chain notifier shutting down
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to progress *contractcourt.htlcOutgoingContestResolver: chainntnfs: system interrupt while attempting to register for block epoch notification.
<time> [ERR] CNCT: ChannelArbitrator(<chan_point>): unable to progress *contractcourt.htlcOutgoingContestResolver: resolver canceled
@ -83,8 +85,10 @@
<time> [ERR] FNDG: Unable to advance state(<chan_point>): error sending channel announcement: unable to register for confirmation of ChannelPoint(<chan_point>): TxNotifier is exiting
<time> [ERR] FNDG: Unable to advance state(<chan_point>): failed adding to router graph: error sending channel announcement: gossiper is shutting down
<time> [ERR] FNDG: Unable to advance state(<chan_point>): failed adding to router graph: error sending channel announcement: router shutting down
<time> [ERR] FNDG: Unable to advance state(<chan_point>): failed adding to router graph: error sending channel update: router shutting down
<time> [ERR] FNDG: Unable to advance state(<chan_point>): failed adding to router graph: funding manager shutting down
<time> [ERR] FNDG: Unable to advance state(<chan_point>): failed sending fundingLocked: funding manager shutting down
<time> [ERR] FNDG: Unable to advance state(<chan_point>): funding manager shutting down
<time> [ERR] FNDG: unable to cancel reservation: no active reservations for peer(<hex>)
<time> [ERR] FNDG: unable to report short chan id: link <hex> not found
<time> [ERR] FNDG: Unable to send channel proof: channel announcement proof for short_chan_id=<cid> isn't valid: can't verify first bitcoin signature
@ -191,6 +195,8 @@
<time> [ERR] RPCS: [/lnrpc.Lightning/CloseChannel]: cannot co-op close frozen channel as initiator until height=<height>, (current_height=<height>)
<time> [ERR] RPCS: [/lnrpc.Lightning/CloseChannel]: chain notifier shutting down
<time> [ERR] RPCS: [/lnrpc.Lightning/CloseChannel]: must specify channel point in close channel
<time> [ERR] RPCS: [/lnrpc.Lightning/CloseChannel]: rpc error: code = DeadlineExceeded desc = context deadline exceeded
<time> [ERR] RPCS: [/lnrpc.Lightning/CloseChannel]: server is still in the process of starting
<time> [ERR] RPCS: [/lnrpc.Lightning/ConnectPeer]: already connected to peer: <hex>@<ip>
<time> [ERR] RPCS: [/lnrpc.Lightning/ConnectPeer]: dial tcp <ip>: i/o timeout
<time> [ERR] RPCS: [/lnrpc.Lightning/ConnectPeer]: dial tcp <ip>: i/o timeout
@ -199,6 +205,7 @@
<time> [ERR] RPCS: [/lnrpc.Lightning/DeleteMacaroonID]: the specified ID cannot be deleted
<time> [ERR] RPCS: [/lnrpc.Lightning/FundingStateStep]: pendingChanID(<hex>) already has intent registered
<time> [ERR] RPCS: [/lnrpc.Lightning/GetChanInfo]: edge marked as zombie
<time> [ERR] RPCS: [/lnrpc.Lightning/GetChanInfo]: edge not found
<time> [ERR] RPCS: [/lnrpc.Lightning/OpenChannel]: channels cannot be created before the wallet is fully synced
<time> [ERR] RPCS: [/lnrpc.Lightning/OpenChannel]: received funding error from <hex>: chan_id=<hex>, err=channel too large
<time> [ERR] RPCS: [/lnrpc.Lightning/OpenChannel]: received funding error from <hex>: chan_id=<hex>, err=chan size of 0.16777216 BTC exceeds maximum chan size of 0.16777215 BTC
@ -231,6 +238,7 @@
<time> [ERR] RPCS: [/routerrpc.Router/SendPaymentV2]: routerrpc server shutting down
<time> [ERR] RPCS: [/routerrpc.Router/SubscribeHtlcEvents]: context canceled
<time> [ERR] RPCS: [/routerrpc.Router/SubscribeHtlcEvents]: htlc event subscription terminated
<time> [ERR] RPCS: [/routerrpc.Router/SubscribeHtlcEvents]: context deadline exceeded
<time> [ERR] RPCS: [/routerrpc.Route<time> [INF] LTND: Listening on the p2p interface is disabled!
<time> [ERR] RPCS: [/signrpc.Signer/DeriveSharedKey]: must provide ephemeral pubkey
<time> [ERR] RPCS: [/signrpc.Signer/DeriveSharedKey]: use either key_desc or key_loc

View File

@ -82,7 +82,7 @@ func (h *harnessTest) Skipf(format string, args ...interface{}) {
// the error stack traces it produces.
func (h *harnessTest) Fatalf(format string, a ...interface{}) {
if h.lndHarness != nil {
h.lndHarness.SaveProfilesPages()
h.lndHarness.SaveProfilesPages(h.t)
}
stacktrace := errors.Wrap(fmt.Sprintf(format, a...), 1).ErrorStack()

View File

@ -1167,7 +1167,8 @@ func (hn *HarnessNode) stop() error {
if hn.conn != nil {
err := hn.conn.Close()
if err != nil {
return fmt.Errorf("error attempting to stop grpc client: %v", err)
return fmt.Errorf("error attempting to stop grpc "+
"client: %v", err)
}
}