From 127b0e9f41e83e4839abf56768d02969f72a0d1d Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 27 Jun 2025 19:52:56 +0800 Subject: [PATCH 1/7] multi: add new config `QuiescenceTimeout` This commit makes removes the `defaultQuiescenceTimeout` and makes it configurable as different nodes have different network environment. In addition the default timeout has been increased from 30s to 60s. --- config.go | 1 + htlcswitch/link.go | 8 +++++++- htlcswitch/quiescer.go | 6 +----- lncfg/htlcswitch.go | 15 +++++++++++++++ peer/brontide.go | 9 ++++++++- sample-lnd.conf | 4 ++++ server.go | 1 + 7 files changed, 37 insertions(+), 7 deletions(-) diff --git a/config.go b/config.go index 53f8f36e5..f20517c27 100644 --- a/config.go +++ b/config.go @@ -751,6 +751,7 @@ func DefaultConfig() Config { Sweeper: lncfg.DefaultSweeperConfig(), Htlcswitch: &lncfg.Htlcswitch{ MailboxDeliveryTimeout: htlcswitch.DefaultMailboxDeliveryTimeout, + QuiescenceTimeout: lncfg.DefaultQuiescenceTimeout, }, GRPC: &GRPCConfig{ ServerPingTime: defaultGrpcServerPingTime, diff --git a/htlcswitch/link.go b/htlcswitch/link.go index 3aa47caf3..1a6fd7988 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -301,6 +301,12 @@ type ChannelLinkConfig struct { // AuxTrafficShaper is an optional auxiliary traffic shaper that can be // used to manage the bandwidth of the link. AuxTrafficShaper fn.Option[AuxTrafficShaper] + + // QuiescenceTimeout is the max duration that the channel can be + // quiesced. Any dependent protocols (dynamic commitments, splicing, + // etc.) must finish their operations under this timeout value, + // otherwise the node will disconnect. + QuiescenceTimeout time.Duration } // channelLink is the service which drives a channel's commitment update @@ -497,7 +503,7 @@ func NewChannelLink(cfg ChannelLinkConfig, sendMsg: func(s lnwire.Stfu) error { return cfg.Peer.SendMessage(false, &s) }, - timeoutDuration: defaultQuiescenceTimeout, + timeoutDuration: cfg.QuiescenceTimeout, onTimeout: func() { cfg.Peer.Disconnect(ErrQuiescenceTimeout) }, diff --git a/htlcswitch/quiescer.go b/htlcswitch/quiescer.go index 468ad5e70..e16935d85 100644 --- a/htlcswitch/quiescer.go +++ b/htlcswitch/quiescer.go @@ -47,13 +47,9 @@ var ( // ErrQuiescenceTimeout indicates that the quiescer has been quiesced // beyond the allotted time. - ErrQuiescenceTimeout = fmt.Errorf( - "quiescence timeout", - ) + ErrQuiescenceTimeout = fmt.Errorf("quiescence timeout") ) -const defaultQuiescenceTimeout = 30 * time.Second - type StfuReq = fn.Req[fn.Unit, fn.Result[lntypes.ChannelParty]] // Quiescer is the public interface of the quiescence mechanism. Callers of the diff --git a/lncfg/htlcswitch.go b/lncfg/htlcswitch.go index 613b18991..9942df22b 100644 --- a/lncfg/htlcswitch.go +++ b/lncfg/htlcswitch.go @@ -11,11 +11,21 @@ var ( // where both side send 483 payments at the same time to stress test // lnd. MaxMailboxDeliveryTimeout = 2 * time.Minute + + // minQuiescenceTimeout specifies the minimal timeout value that can be + // used for `QuiescenceTimeout`. + minQuiescenceTimeout = 30 * time.Second + + // DefaultQuiescenceTimeout specifies the default value to be used for + // `QuiescenceTimeout`. + DefaultQuiescenceTimeout = 60 * time.Second ) //nolint:ll type Htlcswitch struct { MailboxDeliveryTimeout time.Duration `long:"mailboxdeliverytimeout" description:"The timeout value when delivering HTLCs to a channel link. Setting this value too small will result in local payment failures if large number of payments are sent over a short period."` + + QuiescenceTimeout time.Duration `long:"quiescencetimeout" description:"The max duration that the channel can be quiesced. Any dependent protocols (dynamic commitments, splicing, etc.) must finish their operations under this timeout value, otherwise the node will disconnect."` } // Validate checks the values configured for htlcswitch. @@ -30,5 +40,10 @@ func (h *Htlcswitch) Validate() error { MaxMailboxDeliveryTimeout) } + if h.QuiescenceTimeout < minQuiescenceTimeout { + return fmt.Errorf("quiescencetimeout: %v below minimal: %v", + h.QuiescenceTimeout, minQuiescenceTimeout) + } + return nil } diff --git a/peer/brontide.go b/peer/brontide.go index 213f28c76..34e504a37 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -438,6 +438,12 @@ type Config struct { // should have the quiescence feature disabled. DisallowQuiescence bool + // QuiescenceTimeout is the max duration that the channel can be + // quiesced. Any dependent protocols (dynamic commitments, splicing, + // etc.) must finish their operations under this timeout value, + // otherwise the node will disconnect. + QuiescenceTimeout time.Duration + // MaxFeeExposure limits the number of outstanding fees in a channel. // This value will be passed to created links. MaxFeeExposure lnwire.MilliSatoshi @@ -1449,7 +1455,8 @@ func (p *Brontide) addLink(chanPoint *wire.OutPoint, ShouldFwdExpEndorsement: p.cfg.ShouldFwdExpEndorsement, DisallowQuiescence: p.cfg.DisallowQuiescence || !p.remoteFeatures.HasFeature(lnwire.QuiescenceOptional), - AuxTrafficShaper: p.cfg.AuxTrafficShaper, + AuxTrafficShaper: p.cfg.AuxTrafficShaper, + QuiescenceTimeout: p.cfg.QuiescenceTimeout, } // Before adding our new link, purge the switch of any pending or live diff --git a/sample-lnd.conf b/sample-lnd.conf index 58020a8ba..7e105de27 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -1913,6 +1913,10 @@ ; are sent over a short period. ; htlcswitch.mailboxdeliverytimeout=1m +; The max duration that the channel can be quiesced. Any dependent protocols +; (dynamic commitments, splicing, etc.) must finish their operations under this +; timeout value, otherwise the node will disconnect. +; htlcswitch.quiescencetimeout=1m [grpc] diff --git a/server.go b/server.go index 3626158fd..952877d6e 100644 --- a/server.go +++ b/server.go @@ -4529,6 +4529,7 @@ func (s *server) peerConnected(conn net.Conn, connReq *connmgr.ConnReq, AddLocalAlias: s.aliasMgr.AddLocalAlias, DisallowRouteBlinding: s.cfg.ProtocolOptions.NoRouteBlinding(), DisallowQuiescence: s.cfg.ProtocolOptions.NoQuiescence(), + QuiescenceTimeout: s.cfg.Htlcswitch.QuiescenceTimeout, MaxFeeExposure: thresholdMSats, Quit: s.quit, AuxLeafStore: s.implCfg.AuxLeafStore, From 463f51d371848de7a0bfe91823053d3024feabec Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 27 Jun 2025 19:57:53 +0800 Subject: [PATCH 2/7] lncfg: enable quiescence in non-dev environment --- lncfg/protocol.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lncfg/protocol.go b/lncfg/protocol.go index 3c5220d72..4d348b215 100644 --- a/lncfg/protocol.go +++ b/lncfg/protocol.go @@ -147,7 +147,7 @@ func (l *ProtocolOptions) NoExperimentalEndorsement() bool { // NoQuiescence returns true if quiescence is disabled. func (l *ProtocolOptions) NoQuiescence() bool { - return true + return false } // CustomMessageOverrides returns the set of protocol messages that we override From 2a45e8a0fa591b91a3ad827ba65d3bee3917e17d Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 27 Jun 2025 20:15:16 +0800 Subject: [PATCH 3/7] itest: fix `testQuiescence` The original test lets Bob send an HTLC to Alice, but Bob doesn't have any balance to begin with. This commit now fixes it by explicitly checking sending payment is allowed before quiescence, and forbidden after. --- itest/lnd_quiescence_test.go | 41 ++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/itest/lnd_quiescence_test.go b/itest/lnd_quiescence_test.go index d782228a8..a00c00189 100644 --- a/itest/lnd_quiescence_test.go +++ b/itest/lnd_quiescence_test.go @@ -13,9 +13,6 @@ import ( // testQuiescence tests whether we can come to agreement on quiescence of a // channel. We initiate quiescence via RPC and if it succeeds we verify that // the expected initiator is the resulting initiator. -// -// NOTE FOR REVIEW: this could be improved by blasting the channel with HTLC -// traffic on both sides to increase the surface area of the change under test. func testQuiescence(ht *lntest.HarnessTest) { cfg := node.CfgAnchor chanPoints, nodes := ht.CreateSimpleNetwork( @@ -26,25 +23,41 @@ func testQuiescence(ht *lntest.HarnessTest) { alice, bob := nodes[0], nodes[1] chanPoint := chanPoints[0] + // Bob adds an invoice. + payAmt := btcutil.Amount(100000) + invReq := &lnrpc.Invoice{ + Value: int64(payAmt), + } + invoice1 := bob.RPC.AddInvoice(invReq) + + // Before quiescence, Alice should be able to send HTLCs. + req := &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice1.PaymentRequest, + FeeLimitMsat: noFeeLimitMsat, + } + ht.SendPaymentAssertSettled(alice, req) + + // Alice now requires the channel to be quiescent. Once it's done, she + // will not be able to send payments. res := alice.RPC.Quiesce(&devrpc.QuiescenceRequest{ ChanId: chanPoint, }) - require.True(ht, res.Initiator) - req := &routerrpc.SendPaymentRequest{ - Dest: alice.PubKey[:], - Amt: 100, - PaymentHash: ht.Random32Bytes(), - FinalCltvDelta: finalCltvDelta, + // Bob adds another invoice. + invoice2 := bob.RPC.AddInvoice(invReq) + + // Alice now tries to pay the second invoice. + // + // This fails with insufficient balance because the bandwidth manager + // reports 0 bandwidth if a link is not eligible for forwarding, which + // is the case during quiescence. + req = &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice2.PaymentRequest, FeeLimitMsat: noFeeLimitMsat, } - ht.SendPaymentAssertFail( - bob, req, - // This fails with insufficient balance because the bandwidth - // manager reports 0 bandwidth if a link is not eligible for - // forwarding, which is the case during quiescence. + alice, req, lnrpc.PaymentFailureReason_FAILURE_REASON_INSUFFICIENT_BALANCE, ) } From d44b48097781ee134a00bcc55f05d65af0f37dde Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 4 Jul 2025 04:01:54 +0800 Subject: [PATCH 4/7] lnd: fix notifying peer online event too early We need to notify the peer is online only when it has been started successfully. --- server.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server.go b/server.go index 952877d6e..4da9cbcea 100644 --- a/server.go +++ b/server.go @@ -4614,8 +4614,6 @@ func (s *server) addPeer(p *peer.Brontide) { // to clients listening for peer events. var pubKey [33]byte copy(pubKey[:], pubBytes) - - s.peerNotifier.NotifyPeerOnline(pubKey) } // peerInitializer asynchronously starts a newly connected peer after it has @@ -4683,6 +4681,10 @@ func (s *server) peerInitializer(p *peer.Brontide) { } } delete(s.peerConnectedListeners, pubStr) + + // Since the peer has been fully initialized, now it's time to notify + // the RPC about the peer online event. + s.peerNotifier.NotifyPeerOnline([33]byte(pubBytes)) } // peerTerminationWatcher waits until a peer has been disconnected unexpectedly, From 56fd8eeb4317eb9fe3a2e1dc0ead4b2dbc71d775 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Thu, 3 Jul 2025 03:51:41 +0800 Subject: [PATCH 5/7] lntest: add RPC `SubscribePeerEvents` And a few assertion helpers. --- lntest/harness_assertion.go | 62 +++++++++++++++++++++++++++++++++++++ lntest/rpc/lnd.go | 16 ++++++++++ 2 files changed, 78 insertions(+) diff --git a/lntest/harness_assertion.go b/lntest/harness_assertion.go index 2540ef6b0..5a598033a 100644 --- a/lntest/harness_assertion.go +++ b/lntest/harness_assertion.go @@ -2878,3 +2878,65 @@ func (h *HarnessTest) AssertForceCloseAndAnchorTxnsInMempool() (*wire.MsgTx, return nil, nil } } + +// ReceiveSendToRouteUpdate waits until a message is received on the +// PeerEventsClient stream or the timeout is reached. +func (h *HarnessTest) ReceivePeerEvent( + stream rpc.PeerEventsClient) (*lnrpc.PeerEvent, error) { + + eventChan := make(chan *lnrpc.PeerEvent, 1) + errChan := make(chan error, 1) + go func() { + // Consume one message. This will block until the message is + // received. + resp, err := stream.Recv() + if err != nil { + errChan <- err + + return + } + eventChan <- resp + }() + + select { + case <-time.After(DefaultTimeout): + require.Fail(h, "timeout", "timeout waiting for peer event") + return nil, nil + + case err := <-errChan: + return nil, err + + case event := <-eventChan: + return event, nil + } +} + +// AssertPeerOnlineEvent reads an event from the PeerEventsClient stream and +// asserts it's an online event. +func (h HarnessTest) AssertPeerOnlineEvent(stream rpc.PeerEventsClient) { + event, err := h.ReceivePeerEvent(stream) + require.NoError(h, err) + + require.Equal(h, lnrpc.PeerEvent_PEER_ONLINE, event.Type) +} + +// AssertPeerOfflineEvent reads an event from the PeerEventsClient stream and +// asserts it's an offline event. +func (h HarnessTest) AssertPeerOfflineEvent(stream rpc.PeerEventsClient) { + event, err := h.ReceivePeerEvent(stream) + require.NoError(h, err) + + require.Equal(h, lnrpc.PeerEvent_PEER_OFFLINE, event.Type) +} + +// AssertPeerReconnected reads two events from the PeerEventsClient stream. The +// first event must be an offline event, and the second event must be an online +// event. This is a typical reconnection scenario, where the peer is +// disconnected then connected again. +// +// NOTE: It's important to make the subscription before the disconnection +// happens, otherwise the events can be missed. +func (h HarnessTest) AssertPeerReconnected(stream rpc.PeerEventsClient) { + h.AssertPeerOfflineEvent(stream) + h.AssertPeerOnlineEvent(stream) +} diff --git a/lntest/rpc/lnd.go b/lntest/rpc/lnd.go index 1a49cd18b..055b3f29b 100644 --- a/lntest/rpc/lnd.go +++ b/lntest/rpc/lnd.go @@ -755,3 +755,19 @@ func (h *HarnessRPC) Quiesce( return res } + +type PeerEventsClient lnrpc.Lightning_SubscribePeerEventsClient + +// SubscribePeerEvents makes a RPC call to the node's SubscribePeerEvents and +// returns the stream client. +func (h *HarnessRPC) SubscribePeerEvents( + req *lnrpc.PeerEventSubscription) PeerEventsClient { + + // SubscribePeerEvents needs to have the context alive for the entire + // test case as the returned client will be used for send and receive + // events stream. Thus we use runCtx here instead of a timeout context. + resp, err := h.LN.SubscribePeerEvents(h.runCtx, req) + h.NoError(err, "SubscribePeerEvents") + + return resp +} From c4c519a3ae219a92b12feb87c5294557170077a7 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 27 Jun 2025 21:13:09 +0800 Subject: [PATCH 6/7] itest+lncfg: test the quiescence timeout behaivor --- itest/lnd_quiescence_test.go | 55 ++++++++++++++++++++++++++++++++++-- lncfg/htlcswitch.go | 4 ++- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/itest/lnd_quiescence_test.go b/itest/lnd_quiescence_test.go index a00c00189..c60f93842 100644 --- a/itest/lnd_quiescence_test.go +++ b/itest/lnd_quiescence_test.go @@ -14,9 +14,16 @@ import ( // channel. We initiate quiescence via RPC and if it succeeds we verify that // the expected initiator is the resulting initiator. func testQuiescence(ht *lntest.HarnessTest) { - cfg := node.CfgAnchor + aCfg := node.CfgAnchor + bCfg := node.CfgAnchor + + // Use different minbackoff values for Alice and Bob to avoid connection + // race. See https://github.com/lightningnetwork/lnd/issues/6788. + aCfg = append(aCfg, "--minbackoff=1s") + bCfg = append(bCfg, "--minbackoff=60s") + chanPoints, nodes := ht.CreateSimpleNetwork( - [][]string{cfg, cfg}, lntest.OpenChannelParams{ + [][]string{aCfg, bCfg}, lntest.OpenChannelParams{ Amt: btcutil.Amount(1000000), }) @@ -60,4 +67,48 @@ func testQuiescence(ht *lntest.HarnessTest) { alice, req, lnrpc.PaymentFailureReason_FAILURE_REASON_INSUFFICIENT_BALANCE, ) + + // Bob now subscribes the peer events, which will be used to assert the + // connection updates. + client := bob.RPC.SubscribePeerEvents(&lnrpc.PeerEventSubscription{}) + + // Alice now restarts with an extremely short quiescence timeout. + ht.RestartNodeWithExtraArgs( + alice, []string{"--htlcswitch.quiescencetimeout=1ms"}, + ) + + // Bob should be reconnected to Alice. + ht.AssertPeerReconnected(client) + + // Once restarted, the channel is no longer quiescent so Alice can + // finish the payment for invoice2. + ht.SendPaymentAssertSettled(alice, req) + + // Bob adds another invoice. + invoice3 := bob.RPC.AddInvoice(invReq) + + // Alice now requires the channel to be quiescent again. Since we are + // using a short timeout (1ms) for the quiescence, Alice should + // disconnect from Bob immediately. + res = alice.RPC.Quiesce(&devrpc.QuiescenceRequest{ + ChanId: chanPoint, + }) + require.True(ht, res.Initiator) + + // The above quiescence timeout will cause Alice to disconnect with Bob. + // However, since the connection has an open channel, Alice and Bob will + // be reconnected shortly. + ht.AssertPeerReconnected(client) + + // Make sure Alice has finished the connection too before attempting the + // payment below. + ht.AssertConnected(alice, bob) + + // Assert that Alice can pay invoice3. This implicitly checks that the + // above quiescence is terminated. + req = &routerrpc.SendPaymentRequest{ + PaymentRequest: invoice3.PaymentRequest, + FeeLimitMsat: noFeeLimitMsat, + } + ht.SendPaymentAssertSettled(alice, req) } diff --git a/lncfg/htlcswitch.go b/lncfg/htlcswitch.go index 9942df22b..12c421ca7 100644 --- a/lncfg/htlcswitch.go +++ b/lncfg/htlcswitch.go @@ -40,7 +40,9 @@ func (h *Htlcswitch) Validate() error { MaxMailboxDeliveryTimeout) } - if h.QuiescenceTimeout < minQuiescenceTimeout { + // Skip the validation for integration tests so we can use a smaller + // timeout value to check the timeout behavior. + if !IsDevBuild() && h.QuiescenceTimeout < minQuiescenceTimeout { return fmt.Errorf("quiescencetimeout: %v below minimal: %v", h.QuiescenceTimeout, minQuiescenceTimeout) } From bac8c51076645e434749fdb223944bea25402027 Mon Sep 17 00:00:00 2001 From: yyforyongyu Date: Fri, 27 Jun 2025 21:19:11 +0800 Subject: [PATCH 7/7] docs: update release notes --- docs/release-notes/release-notes-0.20.0.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/release-notes/release-notes-0.20.0.md b/docs/release-notes/release-notes-0.20.0.md index ee36d7c8f..0a2c240f5 100644 --- a/docs/release-notes/release-notes-0.20.0.md +++ b/docs/release-notes/release-notes-0.20.0.md @@ -55,6 +55,14 @@ circuit. The indices are only available for forwarding events saved after v0.20. `include_auth_proof`. With the flag, these APIs add AuthProof (signatures from the channel announcement) to the returned ChannelEdge. +* A [new config](https://github.com/lightningnetwork/lnd/pull/10001) value + `--htlcswitch.quiescencetimeout` is added to allow specifying the max duration + the channel can be quiescent. A minimal value of 30s is enforced, and a + default value of 60s is used. This value is used to limit the dependent + protocols like dynamic commitments by restricting that the operation must + finish under this timeout value. Consider using a larger timeout value if you + have a slow network. + ## lncli Additions @@ -171,4 +179,5 @@ reader of a payment request. * Funyug * Mohamed Awnallah * Pins +* Yong Yu * Ziggie