diff --git a/docs/release-notes/release-notes-0.16.3.md b/docs/release-notes/release-notes-0.16.3.md index b7b5ff5ba..b5ca8ab71 100644 --- a/docs/release-notes/release-notes-0.16.3.md +++ b/docs/release-notes/release-notes-0.16.3.md @@ -1,10 +1,10 @@ # Release Notes -## Mempool +## Mempool Optimizations * Optimized [mempool -management](https://github.com/lightningnetwork/lnd/pull/7681) to lower the CPU -usage. + management](https://github.com/lightningnetwork/lnd/pull/7681) to lower the + CPU usage. ## Misc @@ -12,7 +12,16 @@ usage. all macaroon DB root keys on `ChangePassword`/`GenerateNewRootKey` respectively. +## Channel Link Bug Fix + +* If we detect the remote link is inactive, [we'll now tear down the + connection](https://github.com/lightningnetwork/lnd/pull/7711) in addition to + stopping the link's statemachine. If we're persistently connected with the + peer, then this'll force a reconnect, which may restart things and help avoid + certain force close scenarios. + # Contributors (Alphabetical Order) * Elle Mouton +* Olaoluwa Osuntokun * Yong Yu diff --git a/htlcswitch/link.go b/htlcswitch/link.go index feb0a7a8b..7ef6d9a00 100644 --- a/htlcswitch/link.go +++ b/htlcswitch/link.go @@ -1037,7 +1037,7 @@ func (l *channelLink) htlcManager() { l.fail( LinkFailureError{ code: ErrSyncError, - FailureAction: LinkFailureForceClose, // nolint:lll + FailureAction: LinkFailureForceClose, //nolint:lll }, "unable to synchronize channel "+ "states: %v", err, @@ -1239,8 +1239,13 @@ func (l *channelLink) htlcManager() { } case <-l.cfg.PendingCommitTicker.Ticks(): - l.fail(LinkFailureError{code: ErrRemoteUnresponsive}, - "unable to complete dance") + l.fail( + LinkFailureError{ + code: ErrRemoteUnresponsive, + FailureAction: LinkFailureDisconnect, + }, + "unable to complete dance", + ) return // A message from the switch was just received. This indicates diff --git a/htlcswitch/link_test.go b/htlcswitch/link_test.go index c461ab33e..e744c75fc 100644 --- a/htlcswitch/link_test.go +++ b/htlcswitch/link_test.go @@ -5457,7 +5457,8 @@ func TestChannelLinkFail(t *testing.T) { // If we expect the link to force close the channel in this // case, check that it happens. If not, make sure it does not // happen. - isForceCloseErr := (linkErr.FailureAction == LinkFailureForceClose) + isForceCloseErr := (linkErr.FailureAction == + LinkFailureForceClose) require.True( t, test.shouldForceClose == isForceCloseErr, test.name, ) @@ -6343,11 +6344,12 @@ func TestPendingCommitTicker(t *testing.T) { // Assert that we get the expected link failure from Alice. select { case linkErr := <-linkErrs: - if linkErr.code != ErrRemoteUnresponsive { - t.Fatalf("error code mismatch, "+ - "want: ErrRemoteUnresponsive, got: %v", - linkErr.code) - } + require.Equal( + t, linkErr.code, ErrRemoteUnresponsive, + fmt.Sprintf("error code mismatch, want: "+ + "ErrRemoteUnresponsive, got: %v", linkErr.code), + ) + require.Equal(t, linkErr.FailureAction, LinkFailureDisconnect) case <-time.After(time.Second): t.Fatalf("did not receive failure") diff --git a/htlcswitch/linkfailure.go b/htlcswitch/linkfailure.go index 58da01189..f04a41603 100644 --- a/htlcswitch/linkfailure.go +++ b/htlcswitch/linkfailure.go @@ -64,6 +64,11 @@ const ( // LinkFailureForceClose indicates that the channel should be force // closed. LinkFailureForceClose + + // LinkFailureDisconnect indicates that we should disconnect in an + // attempt to recycle the connection. This can be useful if we think a + // TCP connection or state machine is stalled. + LinkFailureDisconnect ) // LinkFailureError encapsulates an error that will make us fail the current diff --git a/peer/brontide.go b/peer/brontide.go index 964054a6e..1b1534b2d 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -3142,6 +3142,13 @@ func (p *Brontide) handleLinkFailure(failure linkFailureReport) { "remote peer: %v", err) } } + + // If the failure action is disconnect, then we'll execute that now. If + // we had to send an error above, it was a sync call, so we expect the + // message to be flushed on the wire by now. + if failure.linkErr.FailureAction == htlcswitch.LinkFailureDisconnect { + p.Disconnect(fmt.Errorf("link requested disconnect")) + } } // tryLinkShutdown attempts to fetch a target link from the switch, calls