diff --git a/config.go b/config.go index 53f8f36e5..f20517c27 100644 --- a/config.go +++ b/config.go @@ -751,6 +751,7 @@ func DefaultConfig() Config { Sweeper: lncfg.DefaultSweeperConfig(), Htlcswitch: &lncfg.Htlcswitch{ MailboxDeliveryTimeout: htlcswitch.DefaultMailboxDeliveryTimeout, + QuiescenceTimeout: lncfg.DefaultQuiescenceTimeout, }, GRPC: &GRPCConfig{ ServerPingTime: defaultGrpcServerPingTime, diff --git a/docs/release-notes/release-notes-0.20.0.md b/docs/release-notes/release-notes-0.20.0.md index 814380a09..f61d0a4cd 100644 --- a/docs/release-notes/release-notes-0.20.0.md +++ b/docs/release-notes/release-notes-0.20.0.md @@ -58,6 +58,14 @@ circuit. The indices are only available for forwarding events saved after v0.20. `include_auth_proof`. With the flag, these APIs add AuthProof (signatures from the channel announcement) to the returned ChannelEdge. +* A [new config](https://github.com/lightningnetwork/lnd/pull/10001) value + `--htlcswitch.quiescencetimeout` is added to allow specifying the max duration + the channel can be quiescent. A minimal value of 30s is enforced, and a + default value of 60s is used. This value is used to limit the dependent + protocols like dynamic commitments by restricting that the operation must + finish under this timeout value. Consider using a larger timeout value if you + have a slow network. + ## lncli Additions @@ -180,4 +188,5 @@ reader of a payment request. * Funyug * Mohamed Awnallah * Pins +* Yong Yu * Ziggie diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 25d06d83c..1abf496a1 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -297,6 +297,12 @@ type ChannelLinkConfig struct { // AuxTrafficShaper is an optional auxiliary traffic shaper that can be // used to manage the bandwidth of the link. AuxTrafficShaper fn.Option[AuxTrafficShaper] + + // QuiescenceTimeout is the max duration that the channel can be + // quiesced. Any dependent protocols (dynamic commitments, splicing, + // etc.) must finish their operations under this timeout value, + // otherwise the node will disconnect. + QuiescenceTimeout time.Duration } // channelLink is the service which drives a channel's commitment update @@ -493,7 +499,7 @@ func NewChannelLink(cfg ChannelLinkConfig, sendMsg: func(s lnwire.Stfu) error { return cfg.Peer.SendMessage(false, &s) }, - timeoutDuration: defaultQuiescenceTimeout, + timeoutDuration: cfg.QuiescenceTimeout, onTimeout: func() { cfg.Peer.Disconnect(ErrQuiescenceTimeout) }, diff --git a/htlcswitch/quiescer.go b/htlcswitch/quiescer.go index 468ad5e70..e16935d85 100644 --- a/htlcswitch/quiescer.go +++ b/htlcswitch/quiescer.go @@ -47,13 +47,9 @@ var ( // ErrQuiescenceTimeout indicates that the quiescer has been quiesced // beyond the allotted time. - ErrQuiescenceTimeout = fmt.Errorf( - "quiescence timeout", - ) + ErrQuiescenceTimeout = fmt.Errorf("quiescence timeout") ) -const defaultQuiescenceTimeout = 30 * time.Second - type StfuReq = fn.Req[fn.Unit, fn.Result[lntypes.ChannelParty]] // Quiescer is the public interface of the quiescence mechanism. Callers of the diff --git a/itest/lnd_quiescence_test.go b/itest/lnd_quiescence_test.go index d782228a8..c60f93842 100644 --- a/itest/lnd_quiescence_test.go +++ b/itest/lnd_quiescence_test.go @@ -13,38 +13,102 @@ import ( // testQuiescence tests whether we can come to agreement on quiescence of a // channel. We initiate quiescence via RPC and if it succeeds we verify that // the expected initiator is the resulting initiator. -// -// NOTE FOR REVIEW: this could be improved by blasting the channel with HTLC -// traffic on both sides to increase the surface area of the change under test. func testQuiescence(ht *lntest.HarnessTest) { - cfg := node.CfgAnchor + aCfg := node.CfgAnchor + bCfg := node.CfgAnchor + + // Use different minbackoff values for Alice and Bob to avoid connection + // race. See https://github.com/lightningnetwork/lnd/issues/6788. + aCfg = append(aCfg, "--minbackoff=1s") + bCfg = append(bCfg, "--minbackoff=60s") + chanPoints, nodes := ht.CreateSimpleNetwork( - [][]string{cfg, cfg}, lntest.OpenChannelParams{ + [][]string{aCfg, bCfg}, lntest.OpenChannelParams{ Amt: btcutil.Amount(1000000), }) alice, bob := nodes[0], nodes[1] chanPoint := chanPoints[0] + // Bob adds an invoice. + payAmt := btcutil.Amount(100000) + invReq := &lnrpc.Invoice{ + Value: int64(payAmt), + } + invoice1 := bob.RPC.AddInvoice(invReq) + + // Before quiescence, Alice should be able to send HTLCs. + req := &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice1.PaymentRequest, + FeeLimitMsat: noFeeLimitMsat, + } + ht.SendPaymentAssertSettled(alice, req) + + // Alice now requires the channel to be quiescent. Once it's done, she + // will not be able to send payments. res := alice.RPC.Quiesce(&devrpc.QuiescenceRequest{ ChanId: chanPoint, }) - require.True(ht, res.Initiator) - req := &routerrpc.SendPaymentRequest{ - Dest: alice.PubKey[:], - Amt: 100, - PaymentHash: ht.Random32Bytes(), - FinalCltvDelta: finalCltvDelta, + // Bob adds another invoice. + invoice2 := bob.RPC.AddInvoice(invReq) + + // Alice now tries to pay the second invoice. + // + // This fails with insufficient balance because the bandwidth manager + // reports 0 bandwidth if a link is not eligible for forwarding, which + // is the case during quiescence. + req = &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice2.PaymentRequest, FeeLimitMsat: noFeeLimitMsat, } - ht.SendPaymentAssertFail( - bob, req, - // This fails with insufficient balance because the bandwidth - // manager reports 0 bandwidth if a link is not eligible for - // forwarding, which is the case during quiescence. + alice, req, lnrpc.PaymentFailureReason_FAILURE_REASON_INSUFFICIENT_BALANCE, ) + + // Bob now subscribes the peer events, which will be used to assert the + // connection updates. + client := bob.RPC.SubscribePeerEvents(&lnrpc.PeerEventSubscription{}) + + // Alice now restarts with an extremely short quiescence timeout. + ht.RestartNodeWithExtraArgs( + alice, []string{"--htlcswitch.quiescencetimeout=1ms"}, + ) + + // Bob should be reconnected to Alice. + ht.AssertPeerReconnected(client) + + // Once restarted, the channel is no longer quiescent so Alice can + // finish the payment for invoice2. + ht.SendPaymentAssertSettled(alice, req) + + // Bob adds another invoice. + invoice3 := bob.RPC.AddInvoice(invReq) + + // Alice now requires the channel to be quiescent again. Since we are + // using a short timeout (1ms) for the quiescence, Alice should + // disconnect from Bob immediately. + res = alice.RPC.Quiesce(&devrpc.QuiescenceRequest{ + ChanId: chanPoint, + }) + require.True(ht, res.Initiator) + + // The above quiescence timeout will cause Alice to disconnect with Bob. + // However, since the connection has an open channel, Alice and Bob will + // be reconnected shortly. + ht.AssertPeerReconnected(client) + + // Make sure Alice has finished the connection too before attempting the + // payment below. + ht.AssertConnected(alice, bob) + + // Assert that Alice can pay invoice3. This implicitly checks that the + // above quiescence is terminated. + req = &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice3.PaymentRequest, + FeeLimitMsat: noFeeLimitMsat, + } + ht.SendPaymentAssertSettled(alice, req) } diff --git a/lncfg/htlcswitch.go b/lncfg/htlcswitch.go index 613b18991..12c421ca7 100644 --- a/lncfg/htlcswitch.go +++ b/lncfg/htlcswitch.go @@ -11,11 +11,21 @@ var ( // where both side send 483 payments at the same time to stress test // lnd. MaxMailboxDeliveryTimeout = 2 * time.Minute + + // minQuiescenceTimeout specifies the minimal timeout value that can be + // used for `QuiescenceTimeout`. + minQuiescenceTimeout = 30 * time.Second + + // DefaultQuiescenceTimeout specifies the default value to be used for + // `QuiescenceTimeout`. + DefaultQuiescenceTimeout = 60 * time.Second ) //nolint:ll type Htlcswitch struct { MailboxDeliveryTimeout time.Duration `long:"mailboxdeliverytimeout" description:"The timeout value when delivering HTLCs to a channel link. Setting this value too small will result in local payment failures if large number of payments are sent over a short period."` + + QuiescenceTimeout time.Duration `long:"quiescencetimeout" description:"The max duration that the channel can be quiesced. Any dependent protocols (dynamic commitments, splicing, etc.) must finish their operations under this timeout value, otherwise the node will disconnect."` } // Validate checks the values configured for htlcswitch. @@ -30,5 +40,12 @@ func (h *Htlcswitch) Validate() error { MaxMailboxDeliveryTimeout) } + // Skip the validation for integration tests so we can use a smaller + // timeout value to check the timeout behavior. + if !IsDevBuild() && h.QuiescenceTimeout < minQuiescenceTimeout { + return fmt.Errorf("quiescencetimeout: %v below minimal: %v", + h.QuiescenceTimeout, minQuiescenceTimeout) + } + return nil } diff --git a/lncfg/protocol.go b/lncfg/protocol.go index 3c5220d72..4d348b215 100644 --- a/lncfg/protocol.go +++ b/lncfg/protocol.go @@ -147,7 +147,7 @@ func (l *ProtocolOptions) NoExperimentalEndorsement() bool { // NoQuiescence returns true if quiescence is disabled. func (l *ProtocolOptions) NoQuiescence() bool { - return true + return false } // CustomMessageOverrides returns the set of protocol messages that we override diff --git a/lntest/harness_assertion.go b/lntest/harness_assertion.go index 2540ef6b0..5a598033a 100644 --- a/lntest/harness_assertion.go +++ b/lntest/harness_assertion.go @@ -2878,3 +2878,65 @@ func (h *HarnessTest) AssertForceCloseAndAnchorTxnsInMempool() (*wire.MsgTx, return nil, nil } } + +// ReceiveSendToRouteUpdate waits until a message is received on the +// PeerEventsClient stream or the timeout is reached. +func (h *HarnessTest) ReceivePeerEvent( + stream rpc.PeerEventsClient) (*lnrpc.PeerEvent, error) { + + eventChan := make(chan *lnrpc.PeerEvent, 1) + errChan := make(chan error, 1) + go func() { + // Consume one message. This will block until the message is + // received. + resp, err := stream.Recv() + if err != nil { + errChan <- err + + return + } + eventChan <- resp + }() + + select { + case <-time.After(DefaultTimeout): + require.Fail(h, "timeout", "timeout waiting for peer event") + return nil, nil + + case err := <-errChan: + return nil, err + + case event := <-eventChan: + return event, nil + } +} + +// AssertPeerOnlineEvent reads an event from the PeerEventsClient stream and +// asserts it's an online event. +func (h HarnessTest) AssertPeerOnlineEvent(stream rpc.PeerEventsClient) { + event, err := h.ReceivePeerEvent(stream) + require.NoError(h, err) + + require.Equal(h, lnrpc.PeerEvent_PEER_ONLINE, event.Type) +} + +// AssertPeerOfflineEvent reads an event from the PeerEventsClient stream and +// asserts it's an offline event. +func (h HarnessTest) AssertPeerOfflineEvent(stream rpc.PeerEventsClient) { + event, err := h.ReceivePeerEvent(stream) + require.NoError(h, err) + + require.Equal(h, lnrpc.PeerEvent_PEER_OFFLINE, event.Type) +} + +// AssertPeerReconnected reads two events from the PeerEventsClient stream. The +// first event must be an offline event, and the second event must be an online +// event. This is a typical reconnection scenario, where the peer is +// disconnected then connected again. +// +// NOTE: It's important to make the subscription before the disconnection +// happens, otherwise the events can be missed. +func (h HarnessTest) AssertPeerReconnected(stream rpc.PeerEventsClient) { + h.AssertPeerOfflineEvent(stream) + h.AssertPeerOnlineEvent(stream) +} diff --git a/lntest/rpc/lnd.go b/lntest/rpc/lnd.go index 1a49cd18b..055b3f29b 100644 --- a/lntest/rpc/lnd.go +++ b/lntest/rpc/lnd.go @@ -755,3 +755,19 @@ func (h *HarnessRPC) Quiesce( return res } + +type PeerEventsClient lnrpc.Lightning_SubscribePeerEventsClient + +// SubscribePeerEvents makes a RPC call to the node's SubscribePeerEvents and +// returns the stream client. +func (h *HarnessRPC) SubscribePeerEvents( + req *lnrpc.PeerEventSubscription) PeerEventsClient { + + // SubscribePeerEvents needs to have the context alive for the entire + // test case as the returned client will be used for send and receive + // events stream. Thus we use runCtx here instead of a timeout context. + resp, err := h.LN.SubscribePeerEvents(h.runCtx, req) + h.NoError(err, "SubscribePeerEvents") + + return resp +} diff --git a/peer/brontide.go b/peer/brontide.go index 514256420..5b0d84f2f 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -438,6 +438,12 @@ type Config struct { // should have the quiescence feature disabled. DisallowQuiescence bool + // QuiescenceTimeout is the max duration that the channel can be + // quiesced. Any dependent protocols (dynamic commitments, splicing, + // etc.) must finish their operations under this timeout value, + // otherwise the node will disconnect. + QuiescenceTimeout time.Duration + // MaxFeeExposure limits the number of outstanding fees in a channel. // This value will be passed to created links. MaxFeeExposure lnwire.MilliSatoshi @@ -1449,7 +1455,8 @@ func (p *Brontide) addLink(chanPoint *wire.OutPoint, ShouldFwdExpEndorsement: p.cfg.ShouldFwdExpEndorsement, DisallowQuiescence: p.cfg.DisallowQuiescence || !p.remoteFeatures.HasFeature(lnwire.QuiescenceOptional), - AuxTrafficShaper: p.cfg.AuxTrafficShaper, + AuxTrafficShaper: p.cfg.AuxTrafficShaper, + QuiescenceTimeout: p.cfg.QuiescenceTimeout, } // Before adding our new link, purge the switch of any pending or live diff --git a/sample-lnd.conf b/sample-lnd.conf index 58020a8ba..7e105de27 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -1913,6 +1913,10 @@ ; are sent over a short period. ; htlcswitch.mailboxdeliverytimeout=1m +; The max duration that the channel can be quiesced. Any dependent protocols +; (dynamic commitments, splicing, etc.) must finish their operations under this +; timeout value, otherwise the node will disconnect. +; htlcswitch.quiescencetimeout=1m [grpc] diff --git a/server.go b/server.go index 3626158fd..4da9cbcea 100644 --- a/server.go +++ b/server.go @@ -4529,6 +4529,7 @@ func (s *server) peerConnected(conn net.Conn, connReq *connmgr.ConnReq, AddLocalAlias: s.aliasMgr.AddLocalAlias, DisallowRouteBlinding: s.cfg.ProtocolOptions.NoRouteBlinding(), DisallowQuiescence: s.cfg.ProtocolOptions.NoQuiescence(), + QuiescenceTimeout: s.cfg.Htlcswitch.QuiescenceTimeout, MaxFeeExposure: thresholdMSats, Quit: s.quit, AuxLeafStore: s.implCfg.AuxLeafStore, @@ -4613,8 +4614,6 @@ func (s *server) addPeer(p *peer.Brontide) { // to clients listening for peer events. var pubKey [33]byte copy(pubKey[:], pubBytes) - - s.peerNotifier.NotifyPeerOnline(pubKey) } // peerInitializer asynchronously starts a newly connected peer after it has @@ -4682,6 +4681,10 @@ func (s *server) peerInitializer(p *peer.Brontide) { } } delete(s.peerConnectedListeners, pubStr) + + // Since the peer has been fully initialized, now it's time to notify + // the RPC about the peer online event. + s.peerNotifier.NotifyPeerOnline([33]byte(pubBytes)) } // peerTerminationWatcher waits until a peer has been disconnected unexpectedly,