From 19873f721f4ac1c544c2f9eda0075101b6e502e6 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Mon, 9 Mar 2020 11:10:14 -0300
Subject: [PATCH 1/7] lntest: more descriptive errors for CloseChannel

This improves the error reporting for the harness' CloseChannel so that
the exact step where closure fails can be better indicated.

This is to help debug some flaky failures in the CI.
---
 lntest/harness.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lntest/harness.go b/lntest/harness.go
index eb75d0fe3..89a4141ba 100644
--- a/lntest/harness.go
+++ b/lntest/harness.go
@@ -1128,7 +1128,8 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context,
 		// within the network.
 		closeResp, err := closeRespStream.Recv()
 		if err != nil {
-			errChan <- err
+			errChan <- fmt.Errorf("unable to recv() from close "+
+				"stream: %v", err)
 			return
 		}
 		pendingClose, ok := closeResp.Update.(*lnrpc.CloseStatusUpdate_ClosePending)
@@ -1140,11 +1141,13 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context,
 
 		closeTxid, err := chainhash.NewHash(pendingClose.ClosePending.Txid)
 		if err != nil {
-			errChan <- err
+			errChan <- fmt.Errorf("unable to decode closeTxid: "+
+				"%v", err)
 			return
 		}
 		if err := n.WaitForTxBroadcast(ctx, *closeTxid); err != nil {
-			errChan <- err
+			errChan <- fmt.Errorf("error while waiting for "+
+				"broadcast tx: %v", err)
 			return
 		}
 		fin <- closeTxid
@@ -1153,9 +1156,6 @@ func (n *NetworkHarness) CloseChannel(ctx context.Context,
 	// Wait until either the deadline for the context expires, an error
 	// occurs, or the channel close update is received.
 	select {
-	case <-ctx.Done():
-		return nil, nil, fmt.Errorf("timeout reached before channel close " +
-			"initiated")
 	case err := <-errChan:
 		return nil, nil, err
 	case closeTxid := <-fin:

From f52ee770f427313b6d8dbc20eb7284c8e6fa18d0 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Thu, 9 Apr 2020 08:29:48 -0300
Subject: [PATCH 2/7] lntest: use ctx when not using macaroons

This forces the Dial attempt to succeed or fail before proceeding with
node setup.

We also log on the node a failure to establish the graph subscription
before panicking so that we can more easily find issues.
---
 lntest/node.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lntest/node.go b/lntest/node.go
index 0f864f2a8..00ac34a61 100644
--- a/lntest/node.go
+++ b/lntest/node.go
@@ -737,14 +737,15 @@ func (hn *HarnessNode) ConnectRPCWithMacaroon(mac *macaroon.Macaroon) (
 	}
 	opts = append(opts, grpc.WithTransportCredentials(tlsCreds))
 
+	ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
+	defer cancel()
+
 	if mac == nil {
-		return grpc.Dial(hn.Cfg.RPCAddr(), opts...)
+		return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...)
 	}
 	macCred := macaroons.NewMacaroonCredential(mac)
 	opts = append(opts, grpc.WithPerRPCCredentials(macCred))
 
-	ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
-	defer cancel()
 	return grpc.DialContext(ctx, hn.Cfg.RPCAddr(), opts...)
 }
 

From 9f036b431097cabe39184ad09e19d99234d2eb98 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Mon, 13 Apr 2020 13:33:48 -0300
Subject: [PATCH 3/7] lntest: wait for graph subscription on node init

During the channel_backup_restore/restore_during_unlock itest, the node
is restored from seed and immediately restarted. Depending on specific
timing of the machine, the test harness might not have had the graph
subscription processed before the node shuts down, causing the harness
to trigger a panic.

Reducing this to a synchronous subscription attempt means node
initialization necessarily waits until the subscription is done before
attempting to restart, reducing flakiness and ensuring correct behavior.
---
 lntest/node.go | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lntest/node.go b/lntest/node.go
index 00ac34a61..d8eb08ac2 100644
--- a/lntest/node.go
+++ b/lntest/node.go
@@ -625,9 +625,10 @@ func (hn *HarnessNode) initLightningClient(conn *grpc.ClientConn) error {
 	// Launch the watcher that will hook into graph related topology change
 	// from the PoV of this node.
 	hn.wg.Add(1)
-	go hn.lightningNetworkWatcher()
+	subscribed := make(chan error)
+	go hn.lightningNetworkWatcher(subscribed)
 
-	return nil
+	return <-subscribed
 }
 
 // FetchNodeInfo queries an unlocked node to retrieve its public key.
@@ -874,7 +875,7 @@ func getChanPointFundingTxid(chanPoint *lnrpc.ChannelPoint) ([]byte, error) {
 // closed or opened within the network. In order to dispatch these
 // notifications, the GraphTopologySubscription client exposed as part of the
 // gRPC interface is used.
-func (hn *HarnessNode) lightningNetworkWatcher() {
+func (hn *HarnessNode) lightningNetworkWatcher(subscribed chan error) {
 	defer hn.wg.Done()
 
 	graphUpdates := make(chan *lnrpc.GraphTopologyUpdate)
@@ -884,16 +885,16 @@ func (hn *HarnessNode) lightningNetworkWatcher() {
 
 		req := &lnrpc.GraphTopologySubscription{}
 		ctx, cancelFunc := context.WithCancel(context.Background())
+		defer cancelFunc()
 		topologyClient, err := hn.SubscribeChannelGraph(ctx, req)
 		if err != nil {
-			// We panic here in case of an error as failure to
-			// create the topology client will cause all subsequent
-			// tests to fail.
-			panic(fmt.Errorf("unable to create topology "+
-				"client: %v", err))
+			msg := fmt.Sprintf("%s(%d): unable to create topology "+
+				"client: %v (%s)", hn.Name(), hn.NodeID, err,
+				time.Now().String())
+			subscribed <- fmt.Errorf(msg)
+			return
 		}
-
-		defer cancelFunc()
+		close(subscribed)
 
 		for {
 			update, err := topologyClient.Recv()

From 91538884daa7b94505c8a8721bfd82de47f6a434 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Mon, 20 Apr 2020 12:09:05 -0300
Subject: [PATCH 4/7] lntest: wait for valid tls cert and macaroon files

This changes the wait during node connection to check both for the
existance as well as for the validity of the tls cert and macaroon
files.

This ensures that nodes in the process of starting up don't inadvertedly
cause a connection error due to not yet having written the entire file.
---
 lntest/node.go | 80 +++++++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 47 deletions(-)

diff --git a/lntest/node.go b/lntest/node.go
index d8eb08ac2..83d23b4ca 100644
--- a/lntest/node.go
+++ b/lntest/node.go
@@ -689,28 +689,26 @@ func (hn *HarnessNode) writePidFile() error {
 func (hn *HarnessNode) ReadMacaroon(macPath string, timeout time.Duration) (
 	*macaroon.Macaroon, error) {
 
-	// Wait until macaroon file is created before using it.
-	macTimeout := time.After(timeout)
-	for !fileExists(macPath) {
-		select {
-		case <-macTimeout:
-			return nil, fmt.Errorf("timeout waiting for macaroon "+
-				"file %s to be created after %d seconds",
-				macPath, timeout/time.Second)
-		case <-time.After(100 * time.Millisecond):
+	// Wait until macaroon file is created and has valid content before
+	// using it.
+	var mac *macaroon.Macaroon
+	err := wait.NoError(func() error {
+		macBytes, err := ioutil.ReadFile(macPath)
+		if err != nil {
+			return fmt.Errorf("error reading macaroon file: %v", err)
 		}
-	}
 
-	// Now that we know the file exists, read it and return the macaroon.
-	macBytes, err := ioutil.ReadFile(macPath)
-	if err != nil {
-		return nil, err
-	}
-	mac := &macaroon.Macaroon{}
-	if err = mac.UnmarshalBinary(macBytes); err != nil {
-		return nil, err
-	}
-	return mac, nil
+		newMac := &macaroon.Macaroon{}
+		if err = newMac.UnmarshalBinary(macBytes); err != nil {
+			return fmt.Errorf("error unmarshalling macaroon "+
+				"file: %v", err)
+		}
+		mac = newMac
+
+		return nil
+	}, timeout)
+
+	return mac, err
 }
 
 // ConnectRPCWithMacaroon uses the TLS certificate and given macaroon to
@@ -718,25 +716,24 @@ func (hn *HarnessNode) ReadMacaroon(macPath string, timeout time.Duration) (
 func (hn *HarnessNode) ConnectRPCWithMacaroon(mac *macaroon.Macaroon) (
 	*grpc.ClientConn, error) {
 
-	// Wait until TLS certificate is created before using it, up to 30 sec.
-	tlsTimeout := time.After(DefaultTimeout)
-	for !fileExists(hn.Cfg.TLSCertPath) {
-		select {
-		case <-tlsTimeout:
-			return nil, fmt.Errorf("timeout waiting for TLS cert " +
-				"file to be created")
-		case <-time.After(100 * time.Millisecond):
-		}
+	// Wait until TLS certificate is created and has valid content before
+	// using it, up to 30 sec.
+	var tlsCreds credentials.TransportCredentials
+	err := wait.NoError(func() error {
+		var err error
+		tlsCreds, err = credentials.NewClientTLSFromFile(
+			hn.Cfg.TLSCertPath, "",
+		)
+		return err
+	}, DefaultTimeout)
+	if err != nil {
+		return nil, fmt.Errorf("error reading TLS cert: %v", err)
 	}
 
-	opts := []grpc.DialOption{grpc.WithBlock()}
-	tlsCreds, err := credentials.NewClientTLSFromFile(
-		hn.Cfg.TLSCertPath, "",
-	)
-	if err != nil {
-		return nil, err
+	opts := []grpc.DialOption{
+		grpc.WithBlock(),
+		grpc.WithTransportCredentials(tlsCreds),
 	}
-	opts = append(opts, grpc.WithTransportCredentials(tlsCreds))
 
 	ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
 	defer cancel()
@@ -1159,14 +1156,3 @@ func (hn *HarnessNode) WaitForBalance(expectedBalance btcutil.Amount, confirmed
 
 	return nil
 }
-
-// fileExists reports whether the named file or directory exists.
-// This function is taken from https://github.com/btcsuite/btcd
-func fileExists(name string) bool {
-	if _, err := os.Stat(name); err != nil {
-		if os.IsNotExist(err) {
-			return false
-		}
-	}
-	return true
-}

From af660d01521190c01d73e77b9354a7eb0cbf5af0 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Tue, 2 Jun 2020 13:55:55 -0300
Subject: [PATCH 5/7] itest: improve backup check during wt test

This replaces an outstanding sleep for a check for a specific state
during the test for watchtower use: specifically, that the backup has
been sent to the watchtower prior to shutting down Dave.

This reduces flakiness in the test that could occur if the Dave shutdown
without the backup being comitted to the watchtower, causing the rest of
the test to fail.
---
 lntest/itest/lnd_test.go | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go
index 0cf06fd1c..254c7c3af 100644
--- a/lntest/itest/lnd_test.go
+++ b/lntest/itest/lnd_test.go
@@ -9050,6 +9050,29 @@ func testRevokedCloseRetributionAltruistWatchtower(net *lntest.NetworkHarness,
 
 	davePreSweepBalance := daveBalResp.ConfirmedBalance
 
+	// Wait until the backup has been accepted by the watchtower before
+	// shutting down Dave.
+	err = wait.NoError(func() error {
+		ctxt, cancel := context.WithTimeout(ctxb, defaultTimeout)
+		defer cancel()
+		bkpStats, err := dave.WatchtowerClient.Stats(ctxt, &wtclientrpc.StatsRequest{})
+		if err != nil {
+			return err
+
+		}
+		if bkpStats == nil {
+			return errors.New("no active backup sessions")
+		}
+		if bkpStats.NumBackups == 0 {
+			return errors.New("no backups accepted")
+		}
+
+		return nil
+	}, defaultTimeout)
+	if err != nil {
+		t.Fatalf("unable to verify backup task completed: %v", err)
+	}
+
 	// Shutdown Dave to simulate going offline for an extended period of
 	// time. Once he's not watching, Carol will try to breach the channel.
 	restart, err := net.SuspendNode(dave)
@@ -9078,9 +9101,6 @@ func testRevokedCloseRetributionAltruistWatchtower(net *lntest.NetworkHarness,
 		t.Fatalf("db copy failed: %v", carolChan.NumUpdates)
 	}
 
-	// TODO(conner): add hook for backup completion
-	time.Sleep(3 * time.Second)
-
 	// Now force Carol to execute a *force* channel closure by unilaterally
 	// broadcasting his current channel state. This is actually the
 	// commitment transaction of a prior *revoked* state, so he'll soon

From 3e1b4eadbb831c1b593c8eb9356b1826d719e59e Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Tue, 2 Jun 2020 14:00:36 -0300
Subject: [PATCH 6/7] itest: switch to openChannelTimeout

This switches a few call sites that used a different timeout when
openening channels to the correct openChannelTimeout, which better deal
with flakes in the CI.
---
 lntest/itest/lnd_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go
index 254c7c3af..16fdd0b35 100644
--- a/lntest/itest/lnd_test.go
+++ b/lntest/itest/lnd_test.go
@@ -13382,7 +13382,7 @@ func testChannelBackupUpdates(net *lntest.NetworkHarness, t *harnessTest) {
 	numChans := 2
 	chanAmt := btcutil.Amount(1000000)
 	for i := 0; i < numChans; i++ {
-		ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
+		ctxt, _ := context.WithTimeout(ctxb, channelOpenTimeout)
 		chanPoint := openChannelAndAssert(
 			ctxt, t, net, net.Alice, carol,
 			lntest.OpenChannelParams{
@@ -13516,7 +13516,7 @@ func testExportChannelBackup(net *lntest.NetworkHarness, t *harnessTest) {
 	numChans := 2
 	chanAmt := btcutil.Amount(1000000)
 	for i := 0; i < numChans; i++ {
-		ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
+		ctxt, _ := context.WithTimeout(ctxb, channelOpenTimeout)
 		chanPoint := openChannelAndAssert(
 			ctxt, t, net, net.Alice, carol,
 			lntest.OpenChannelParams{

From 0e73d2d243ed7fdb95befc23062b5d35f56d4d11 Mon Sep 17 00:00:00 2001
From: Matheus Degiovani <opensource@matheusd.com>
Date: Wed, 8 Apr 2020 09:44:31 -0300
Subject: [PATCH 7/7] itest: assert unspent before performing CPFP

This reduces the flakiness of the CPFP test by asserting the wallet has
seen the unspent output before attempting to perform the walletkit's
BumpFee method.

Previously the attempt to bump the fee of the target transaction could
be made before the wallet had had a chance to fully process the
transaction, causing a flaky error.
---
 lnrpc/walletrpc/walletkit_server.go |  2 ++
 lntest/itest/lnd_test.go            | 33 +++++++++++++++++++++++++++++
 lntest/itest/onchain.go             |  7 ++++--
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/lnrpc/walletrpc/walletkit_server.go b/lnrpc/walletrpc/walletkit_server.go
index a52bb9204..d93dd04e7 100644
--- a/lnrpc/walletrpc/walletkit_server.go
+++ b/lnrpc/walletrpc/walletkit_server.go
@@ -678,6 +678,8 @@ func (w *WalletKit) BumpFee(ctx context.Context,
 		return nil, err
 	}
 
+	log.Debugf("Attempting to CPFP outpoint %s", op)
+
 	// Since we're unable to perform a bump through RBF, we'll assume the
 	// user is attempting to bump an unconfirmed transaction's fee rate by
 	// sweeping an output within it under control of the wallet with a
diff --git a/lntest/itest/lnd_test.go b/lntest/itest/lnd_test.go
index 16fdd0b35..380596e5d 100644
--- a/lntest/itest/lnd_test.go
+++ b/lntest/itest/lnd_test.go
@@ -155,6 +155,39 @@ func assertTxInBlock(t *harnessTest, block *wire.MsgBlock, txid *chainhash.Hash)
 	t.Fatalf("tx was not included in block")
 }
 
+func assertWalletUnspent(t *harnessTest, node *lntest.HarnessNode, out *lnrpc.OutPoint) {
+	t.t.Helper()
+
+	err := wait.NoError(func() error {
+		ctxt, cancel := context.WithTimeout(context.Background(), defaultTimeout)
+		defer cancel()
+		unspent, err := node.ListUnspent(ctxt, &lnrpc.ListUnspentRequest{})
+		if err != nil {
+			return err
+		}
+
+		err = errors.New("tx with wanted txhash never found")
+		for _, utxo := range unspent.Utxos {
+			if !bytes.Equal(utxo.Outpoint.TxidBytes, out.TxidBytes) {
+				continue
+			}
+
+			err = errors.New("wanted output is not a wallet utxo")
+			if utxo.Outpoint.OutputIndex != out.OutputIndex {
+				continue
+			}
+
+			return nil
+		}
+
+		return err
+	}, defaultTimeout)
+	if err != nil {
+		t.Fatalf("outpoint %s not unspent by %s's wallet: %v", out,
+			node.Name(), err)
+	}
+}
+
 func rpcPointToWirePoint(t *harnessTest, chanPoint *lnrpc.ChannelPoint) wire.OutPoint {
 	txid, err := lnd.GetChanPointFundingTxid(chanPoint)
 	if err != nil {
diff --git a/lntest/itest/onchain.go b/lntest/itest/onchain.go
index 22b50e3c5..7cc4df11e 100644
--- a/lntest/itest/onchain.go
+++ b/lntest/itest/onchain.go
@@ -86,12 +86,15 @@ func testCPFP(net *lntest.NetworkHarness, t *harnessTest) {
 		t.Fatalf("bob's output was not found within the transaction")
 	}
 
-	// We'll attempt to bump the fee of this transaction by performing a
-	// CPFP from Alice's point of view.
+	// Wait until bob has seen the tx and considers it as owned.
 	op := &lnrpc.OutPoint{
 		TxidBytes:   txid[:],
 		OutputIndex: uint32(bobOutputIdx),
 	}
+	assertWalletUnspent(t, net.Bob, op)
+
+	// We'll attempt to bump the fee of this transaction by performing a
+	// CPFP from Alice's point of view.
 	bumpFeeReq := &walletrpc.BumpFeeRequest{
 		Outpoint:   op,
 		SatPerByte: uint32(sweep.DefaultMaxFeeRate.FeePerKVByte() / 2000),