From 19873f721f4ac1c544c2f9eda0075101b6e502e6 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Mon, 9 Mar 2020 11:10:14 -0300 Subject: [PATCH 1/7] lntest: more descriptive errors for CloseChannel This improves the error reporting for the harness' CloseChannel so that the exact step where closure fails can be better indicated. This is to help debug some flaky failures in the CI. --- lntest/harness.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lntest/harness.go b/lntest/harness.go index eb75d0fe3..89a4141ba 100644 --- a/lntest/harness.go +++ b/lntest/harness.go @@ -1128,7 +1128,8 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context, // within the network. closeResp, err := closeRespStream.Recv() if err != nil { - errChan <- err + errChan <- fmt.Errorf("unable to recv() from close "+ + "stream: %v", err) return } pendingClose, ok := closeResp.Update.(*lnrpc.CloseStatusUpdate_ClosePending) @@ -1140,11 +1141,13 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context, closeTxid, err := chainhash.NewHash(pendingClose.ClosePending.Txid) if err != nil { - errChan <- err + errChan <- fmt.Errorf("unable to decode closeTxid: "+ + "%v", err) return } if err := n.WaitForTxBroadcast(ctx, *closeTxid); err != nil { - errChan <- err + errChan <- fmt.Errorf("error while waiting for "+ + "broadcast tx: %v", err) return } fin <- closeTxid @@ -1153,9 +1156,6 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context, // Wait until either the deadline for the context expires, an error // occurs, or the channel close update is received. select { - case <-ctx.Done(): - return nil, nil, fmt.Errorf("timeout reached before channel close " + - "initiated") case err := <-errChan: return nil, nil, err case closeTxid := <-fin: From f52ee770f427313b6d8dbc20eb7284c8e6fa18d0 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Thu, 9 Apr 2020 08:29:48 -0300 Subject: [PATCH 2/7] lntest: use ctx when not using macaroons This forces the Dial attempt to succeed or fail before proceeding with node setup. We also log on the node a failure to establish the graph subscription before panicking so that we can more easily find issues. --- lntest/node.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lntest/node.go b/lntest/node.go index 0f864f2a8..00ac34a61 100644 --- a/lntest/node.go +++ b/lntest/node.go @@ -737,14 +737,15 @@ func (hn *HarnessNode) ConnectRPCWithMacaroon(mac *macaroon.Macaroon) ( } opts = append(opts, grpc.WithTransportCredentials(tlsCreds)) + ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) + defer cancel() + if mac == nil { - return grpc.Dial(hn.Cfg.RPCAddr(), opts...) + return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...) } macCred := macaroons.NewMacaroonCredential(mac) opts = append(opts, grpc.WithPerRPCCredentials(macCred)) - ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) - defer cancel() return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...) } From 9f036b431097cabe39184ad09e19d99234d2eb98 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Mon, 13 Apr 2020 13:33:48 -0300 Subject: [PATCH 3/7] lntest: wait for graph subscription on node init During the channel_backup_restore/restore_during_unlock itest, the node is restored from seed and immediately restarted. Depending on specific timing of the machine, the test harness might not have had the graph subscription processed before the node shuts down, causing the harness to trigger a panic. Reducing this to a synchronous subscription attempt means node initialization necessarily waits until the subscription is done before attempting to restart, reducing flakiness and ensuring correct behavior. --- lntest/node.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lntest/node.go b/lntest/node.go index 00ac34a61..d8eb08ac2 100644 --- a/lntest/node.go +++ b/lntest/node.go @@ -625,9 +625,10 @@ func (hn *HarnessNode) initLightningClient(conn *grpc.ClientConn) error { // Launch the watcher that will hook into graph related topology change // from the PoV of this node. hn.wg.Add(1) - go hn.lightningNetworkWatcher() + subscribed := make(chan error) + go hn.lightningNetworkWatcher(subscribed) - return nil + return <-subscribed } // FetchNodeInfo queries an unlocked node to retrieve its public key. @@ -874,7 +875,7 @@ func getChanPointFundingTxid(chanPoint *lnrpc.ChannelPoint) ([]byte, error) { // closed or opened within the network. In order to dispatch these // notifications, the GraphTopologySubscription client exposed as part of the // gRPC interface is used. -func (hn *HarnessNode) lightningNetworkWatcher() { +func (hn *HarnessNode) lightningNetworkWatcher(subscribed chan error) { defer hn.wg.Done() graphUpdates := make(chan *lnrpc.GraphTopologyUpdate) @@ -884,16 +885,16 @@ func (hn *HarnessNode) lightningNetworkWatcher() { req := &lnrpc.GraphTopologySubscription{} ctx, cancelFunc := context.WithCancel(context.Background()) + defer cancelFunc() topologyClient, err := hn.SubscribeChannelGraph(ctx, req) if err != nil { - // We panic here in case of an error as failure to - // create the topology client will cause all subsequent - // tests to fail. - panic(fmt.Errorf("unable to create topology "+ - "client: %v", err)) + msg := fmt.Sprintf("%s(%d): unable to create topology "+ + "client: %v (%s)", hn.Name(), hn.NodeID, err, + time.Now().String()) + subscribed <- fmt.Errorf(msg) + return } - - defer cancelFunc() + close(subscribed) for { update, err := topologyClient.Recv() From 91538884daa7b94505c8a8721bfd82de47f6a434 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Mon, 20 Apr 2020 12:09:05 -0300 Subject: [PATCH 4/7] lntest: wait for valid tls cert and macaroon files This changes the wait during node connection to check both for the existance as well as for the validity of the tls cert and macaroon files. This ensures that nodes in the process of starting up don't inadvertedly cause a connection error due to not yet having written the entire file. --- lntest/node.go | 80 +++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 47 deletions(-) diff --git a/lntest/node.go b/lntest/node.go index d8eb08ac2..83d23b4ca 100644 --- a/lntest/node.go +++ b/lntest/node.go @@ -689,28 +689,26 @@ func (hn *HarnessNode) writePidFile() error { func (hn *HarnessNode) ReadMacaroon(macPath string, timeout time.Duration) ( *macaroon.Macaroon, error) { - // Wait until macaroon file is created before using it. - macTimeout := time.After(timeout) - for !fileExists(macPath) { - select { - case <-macTimeout: - return nil, fmt.Errorf("timeout waiting for macaroon "+ - "file %s to be created after %d seconds", - macPath, timeout/time.Second) - case <-time.After(100 * time.Millisecond): + // Wait until macaroon file is created and has valid content before + // using it. + var mac *macaroon.Macaroon + err := wait.NoError(func() error { + macBytes, err := ioutil.ReadFile(macPath) + if err != nil { + return fmt.Errorf("error reading macaroon file: %v", err) } - } - // Now that we know the file exists, read it and return the macaroon. - macBytes, err := ioutil.ReadFile(macPath) - if err != nil { - return nil, err - } - mac := &macaroon.Macaroon{} - if err = mac.UnmarshalBinary(macBytes); err != nil { - return nil, err - } - return mac, nil + newMac := &macaroon.Macaroon{} + if err = newMac.UnmarshalBinary(macBytes); err != nil { + return fmt.Errorf("error unmarshalling macaroon "+ + "file: %v", err) + } + mac = newMac + + return nil + }, timeout) + + return mac, err } // ConnectRPCWithMacaroon uses the TLS certificate and given macaroon to @@ -718,25 +716,24 @@ func (hn *HarnessNode) ReadMacaroon(macPath string, timeout time.Duration) ( func (hn *HarnessNode) ConnectRPCWithMacaroon(mac *macaroon.Macaroon) ( *grpc.ClientConn, error) { - // Wait until TLS certificate is created before using it, up to 30 sec. - tlsTimeout := time.After(DefaultTimeout) - for !fileExists(hn.Cfg.TLSCertPath) { - select { - case <-tlsTimeout: - return nil, fmt.Errorf("timeout waiting for TLS cert " + - "file to be created") - case <-time.After(100 * time.Millisecond): - } + // Wait until TLS certificate is created and has valid content before + // using it, up to 30 sec. + var tlsCreds credentials.TransportCredentials + err := wait.NoError(func() error { + var err error + tlsCreds, err = credentials.NewClientTLSFromFile( + hn.Cfg.TLSCertPath, "", + ) + return err + }, DefaultTimeout) + if err != nil { + return nil, fmt.Errorf("error reading TLS cert: %v", err) } - opts := []grpc.DialOption{grpc.WithBlock()} - tlsCreds, err := credentials.NewClientTLSFromFile( - hn.Cfg.TLSCertPath, "", - ) - if err != nil { - return nil, err + opts := []grpc.DialOption{ + grpc.WithBlock(), + grpc.WithTransportCredentials(tlsCreds), } - opts = append(opts, grpc.WithTransportCredentials(tlsCreds)) ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) defer cancel() @@ -1159,14 +1156,3 @@ func (hn *HarnessNode) WaitForBalance(expectedBalance btcutil.Amount, confirmed return nil } - -// fileExists reports whether the named file or directory exists. -// This function is taken from https://github.com/btcsuite/btcd -func fileExists(name string) bool { - if _, err := os.Stat(name); err != nil { - if os.IsNotExist(err) { - return false - } - } - return true -} From af660d01521190c01d73e77b9354a7eb0cbf5af0 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Tue, 2 Jun 2020 13:55:55 -0300 Subject: [PATCH 5/7] itest: improve backup check during wt test This replaces an outstanding sleep for a check for a specific state during the test for watchtower use: specifically, that the backup has been sent to the watchtower prior to shutting down Dave. This reduces flakiness in the test that could occur if the Dave shutdown without the backup being comitted to the watchtower, causing the rest of the test to fail. --- lntest/itest/lnd_test.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go index 0cf06fd1c..254c7c3af 100644 --- a/lntest/itest/lnd_test.go +++ b/lntest/itest/lnd_test.go @@ -9050,6 +9050,29 @@ func testRevokedCloseRetributionAltruistWatchtower(net *lntest.NetworkHarness, davePreSweepBalance := daveBalResp.ConfirmedBalance + // Wait until the backup has been accepted by the watchtower before + // shutting down Dave. + err = wait.NoError(func() error { + ctxt, cancel := context.WithTimeout(ctxb, defaultTimeout) + defer cancel() + bkpStats, err := dave.WatchtowerClient.Stats(ctxt, &wtclientrpc.StatsRequest{}) + if err != nil { + return err + + } + if bkpStats == nil { + return errors.New("no active backup sessions") + } + if bkpStats.NumBackups == 0 { + return errors.New("no backups accepted") + } + + return nil + }, defaultTimeout) + if err != nil { + t.Fatalf("unable to verify backup task completed: %v", err) + } + // Shutdown Dave to simulate going offline for an extended period of // time. Once he's not watching, Carol will try to breach the channel. restart, err := net.SuspendNode(dave) @@ -9078,9 +9101,6 @@ func testRevokedCloseRetributionAltruistWatchtower(net *lntest.NetworkHarness, t.Fatalf("db copy failed: %v", carolChan.NumUpdates) } - // TODO(conner): add hook for backup completion - time.Sleep(3 * time.Second) - // Now force Carol to execute a *force* channel closure by unilaterally // broadcasting his current channel state. This is actually the // commitment transaction of a prior *revoked* state, so he'll soon From 3e1b4eadbb831c1b593c8eb9356b1826d719e59e Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Tue, 2 Jun 2020 14:00:36 -0300 Subject: [PATCH 6/7] itest: switch to openChannelTimeout This switches a few call sites that used a different timeout when openening channels to the correct openChannelTimeout, which better deal with flakes in the CI. --- lntest/itest/lnd_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go index 254c7c3af..16fdd0b35 100644 --- a/lntest/itest/lnd_test.go +++ b/lntest/itest/lnd_test.go @@ -13382,7 +13382,7 @@ func testChannelBackupUpdates(net *lntest.NetworkHarness, t *harnessTest) { numChans := 2 chanAmt := btcutil.Amount(1000000) for i := 0; i < numChans; i++ { - ctxt, _ := context.WithTimeout(ctxb, defaultTimeout) + ctxt, _ := context.WithTimeout(ctxb, channelOpenTimeout) chanPoint := openChannelAndAssert( ctxt, t, net, net.Alice, carol, lntest.OpenChannelParams{ @@ -13516,7 +13516,7 @@ func testExportChannelBackup(net *lntest.NetworkHarness, t *harnessTest) { numChans := 2 chanAmt := btcutil.Amount(1000000) for i := 0; i < numChans; i++ { - ctxt, _ := context.WithTimeout(ctxb, defaultTimeout) + ctxt, _ := context.WithTimeout(ctxb, channelOpenTimeout) chanPoint := openChannelAndAssert( ctxt, t, net, net.Alice, carol, lntest.OpenChannelParams{ From 0e73d2d243ed7fdb95befc23062b5d35f56d4d11 Mon Sep 17 00:00:00 2001 From: Matheus Degiovani Date: Wed, 8 Apr 2020 09:44:31 -0300 Subject: [PATCH 7/7] itest: assert unspent before performing CPFP This reduces the flakiness of the CPFP test by asserting the wallet has seen the unspent output before attempting to perform the walletkit's BumpFee method. Previously the attempt to bump the fee of the target transaction could be made before the wallet had had a chance to fully process the transaction, causing a flaky error. --- lnrpc/walletrpc/walletkit_server.go | 2 ++ lntest/itest/lnd_test.go | 33 +++++++++++++++++++++++++++++ lntest/itest/onchain.go | 7 ++++-- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/lnrpc/walletrpc/walletkit_server.go b/lnrpc/walletrpc/walletkit_server.go index a52bb9204..d93dd04e7 100644 --- a/lnrpc/walletrpc/walletkit_server.go +++ b/lnrpc/walletrpc/walletkit_server.go @@ -678,6 +678,8 @@ func (w *WalletKit) BumpFee(ctx context.Context, return nil, err } + log.Debugf("Attempting to CPFP outpoint %s", op) + // Since we're unable to perform a bump through RBF, we'll assume the // user is attempting to bump an unconfirmed transaction's fee rate by // sweeping an output within it under control of the wallet with a diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go index 16fdd0b35..380596e5d 100644 --- a/lntest/itest/lnd_test.go +++ b/lntest/itest/lnd_test.go @@ -155,6 +155,39 @@ func assertTxInBlock(t *harnessTest, block *wire.MsgBlock, txid *chainhash.Hash) t.Fatalf("tx was not included in block") } +func assertWalletUnspent(t *harnessTest, node *lntest.HarnessNode, out *lnrpc.OutPoint) { + t.t.Helper() + + err := wait.NoError(func() error { + ctxt, cancel := context.WithTimeout(context.Background(), defaultTimeout) + defer cancel() + unspent, err := node.ListUnspent(ctxt, &lnrpc.ListUnspentRequest{}) + if err != nil { + return err + } + + err = errors.New("tx with wanted txhash never found") + for _, utxo := range unspent.Utxos { + if !bytes.Equal(utxo.Outpoint.TxidBytes, out.TxidBytes) { + continue + } + + err = errors.New("wanted output is not a wallet utxo") + if utxo.Outpoint.OutputIndex != out.OutputIndex { + continue + } + + return nil + } + + return err + }, defaultTimeout) + if err != nil { + t.Fatalf("outpoint %s not unspent by %s's wallet: %v", out, + node.Name(), err) + } +} + func rpcPointToWirePoint(t *harnessTest, chanPoint *lnrpc.ChannelPoint) wire.OutPoint { txid, err := lnd.GetChanPointFundingTxid(chanPoint) if err != nil { diff --git a/lntest/itest/onchain.go b/lntest/itest/onchain.go index 22b50e3c5..7cc4df11e 100644 --- a/lntest/itest/onchain.go +++ b/lntest/itest/onchain.go @@ -86,12 +86,15 @@ func testCPFP(net *lntest.NetworkHarness, t *harnessTest) { t.Fatalf("bob's output was not found within the transaction") } - // We'll attempt to bump the fee of this transaction by performing a - // CPFP from Alice's point of view. + // Wait until bob has seen the tx and considers it as owned. op := &lnrpc.OutPoint{ TxidBytes: txid[:], OutputIndex: uint32(bobOutputIdx), } + assertWalletUnspent(t, net.Bob, op) + + // We'll attempt to bump the fee of this transaction by performing a + // CPFP from Alice's point of view. bumpFeeReq := &walletrpc.BumpFeeRequest{ Outpoint: op, SatPerByte: uint32(sweep.DefaultMaxFeeRate.FeePerKVByte() / 2000),