mirror of
https://github.com/lightningnetwork/lnd.git
synced 2025-04-02 00:52:13 +02:00
Merge pull request #7711 from Roasbeef/commit-dance-stall-disconnect
htlcswitch+peer: actually actually disconnect if we detect a stalled remote channel state machine
This commit is contained in:
commit
b9b20acd41
@ -1,10 +1,10 @@
|
||||
# Release Notes
|
||||
|
||||
## Mempool
|
||||
## Mempool Optimizations
|
||||
|
||||
* Optimized [mempool
|
||||
management](https://github.com/lightningnetwork/lnd/pull/7681) to lower the CPU
|
||||
usage.
|
||||
management](https://github.com/lightningnetwork/lnd/pull/7681) to lower the
|
||||
CPU usage.
|
||||
|
||||
## Misc
|
||||
|
||||
@ -12,7 +12,16 @@ usage.
|
||||
all macaroon DB root keys on `ChangePassword`/`GenerateNewRootKey`
|
||||
respectively.
|
||||
|
||||
## Channel Link Bug Fix
|
||||
|
||||
* If we detect the remote link is inactive, [we'll now tear down the
|
||||
connection](https://github.com/lightningnetwork/lnd/pull/7711) in addition to
|
||||
stopping the link's statemachine. If we're persistently connected with the
|
||||
peer, then this'll force a reconnect, which may restart things and help avoid
|
||||
certain force close scenarios.
|
||||
|
||||
# Contributors (Alphabetical Order)
|
||||
|
||||
* Elle Mouton
|
||||
* Olaoluwa Osuntokun
|
||||
* Yong Yu
|
||||
|
@ -1036,8 +1036,8 @@ func (l *channelLink) htlcManager() {
|
||||
// storing the transaction in the db.
|
||||
l.fail(
|
||||
LinkFailureError{
|
||||
code: ErrSyncError,
|
||||
ForceClose: true,
|
||||
code: ErrSyncError,
|
||||
FailureAction: LinkFailureForceClose, //nolint:lll
|
||||
},
|
||||
"unable to synchronize channel "+
|
||||
"states: %v", err,
|
||||
@ -1077,8 +1077,8 @@ func (l *channelLink) htlcManager() {
|
||||
|
||||
l.fail(
|
||||
LinkFailureError{
|
||||
code: ErrRecoveryError,
|
||||
ForceClose: false,
|
||||
code: ErrRecoveryError,
|
||||
FailureAction: LinkFailureForceNone,
|
||||
},
|
||||
"unable to synchronize channel "+
|
||||
"states: %v", err,
|
||||
@ -1239,8 +1239,13 @@ func (l *channelLink) htlcManager() {
|
||||
}
|
||||
|
||||
case <-l.cfg.PendingCommitTicker.Ticks():
|
||||
l.fail(LinkFailureError{code: ErrRemoteUnresponsive},
|
||||
"unable to complete dance")
|
||||
l.fail(
|
||||
LinkFailureError{
|
||||
code: ErrRemoteUnresponsive,
|
||||
FailureAction: LinkFailureDisconnect,
|
||||
},
|
||||
"unable to complete dance",
|
||||
)
|
||||
return
|
||||
|
||||
// A message from the switch was just received. This indicates
|
||||
@ -1782,8 +1787,8 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) {
|
||||
if err := l.channel.ReceiveHTLCSettle(pre, idx); err != nil {
|
||||
l.fail(
|
||||
LinkFailureError{
|
||||
code: ErrInvalidUpdate,
|
||||
ForceClose: true,
|
||||
code: ErrInvalidUpdate,
|
||||
FailureAction: LinkFailureForceClose,
|
||||
},
|
||||
"unable to handle upstream settle HTLC: %v", err,
|
||||
)
|
||||
@ -1947,9 +1952,9 @@ func (l *channelLink) handleUpstreamMsg(msg lnwire.Message) {
|
||||
}
|
||||
l.fail(
|
||||
LinkFailureError{
|
||||
code: ErrInvalidCommitment,
|
||||
ForceClose: true,
|
||||
SendData: sendData,
|
||||
code: ErrInvalidCommitment,
|
||||
FailureAction: LinkFailureForceClose,
|
||||
SendData: sendData,
|
||||
},
|
||||
"ChannelPoint(%v): unable to accept new "+
|
||||
"commitment: %v",
|
||||
|
@ -5457,8 +5457,10 @@ func TestChannelLinkFail(t *testing.T) {
|
||||
// If we expect the link to force close the channel in this
|
||||
// case, check that it happens. If not, make sure it does not
|
||||
// happen.
|
||||
require.Equal(
|
||||
t, test.shouldForceClose, linkErr.ForceClose, test.name,
|
||||
isForceCloseErr := (linkErr.FailureAction ==
|
||||
LinkFailureForceClose)
|
||||
require.True(
|
||||
t, test.shouldForceClose == isForceCloseErr, test.name,
|
||||
)
|
||||
require.Equal(
|
||||
t, test.permanentFailure, linkErr.PermanentFailure,
|
||||
@ -6342,11 +6344,12 @@ func TestPendingCommitTicker(t *testing.T) {
|
||||
// Assert that we get the expected link failure from Alice.
|
||||
select {
|
||||
case linkErr := <-linkErrs:
|
||||
if linkErr.code != ErrRemoteUnresponsive {
|
||||
t.Fatalf("error code mismatch, "+
|
||||
"want: ErrRemoteUnresponsive, got: %v",
|
||||
linkErr.code)
|
||||
}
|
||||
require.Equal(
|
||||
t, linkErr.code, ErrRemoteUnresponsive,
|
||||
fmt.Sprintf("error code mismatch, want: "+
|
||||
"ErrRemoteUnresponsive, got: %v", linkErr.code),
|
||||
)
|
||||
require.Equal(t, linkErr.FailureAction, LinkFailureDisconnect)
|
||||
|
||||
case <-time.After(time.Second):
|
||||
t.Fatalf("did not receive failure")
|
||||
@ -6523,7 +6526,7 @@ func TestPipelineSettle(t *testing.T) {
|
||||
// ForceClose should be false.
|
||||
select {
|
||||
case linkErr := <-linkErrors:
|
||||
require.False(t, linkErr.ForceClose)
|
||||
require.False(t, linkErr.FailureAction == LinkFailureForceClose)
|
||||
case <-forwardChan:
|
||||
t.Fatal("packet was erroneously forwarded")
|
||||
}
|
||||
@ -6559,7 +6562,7 @@ func TestPipelineSettle(t *testing.T) {
|
||||
// ForceClose should be false.
|
||||
select {
|
||||
case linkErr := <-linkErrors:
|
||||
require.False(t, linkErr.ForceClose)
|
||||
require.False(t, linkErr.FailureAction == LinkFailureForceClose)
|
||||
case <-forwardChan:
|
||||
t.Fatal("packet was erroneously forwarded")
|
||||
}
|
||||
|
@ -53,6 +53,24 @@ const (
|
||||
ErrCircuitError
|
||||
)
|
||||
|
||||
// LinkFailureAction is an enum-like type that describes the action that should
|
||||
// be taken in response to a link failure.
|
||||
type LinkFailureAction uint8
|
||||
|
||||
const (
|
||||
// LinkFailureForceNone indicates no action is to be taken.
|
||||
LinkFailureForceNone LinkFailureAction = iota
|
||||
|
||||
// LinkFailureForceClose indicates that the channel should be force
|
||||
// closed.
|
||||
LinkFailureForceClose
|
||||
|
||||
// LinkFailureDisconnect indicates that we should disconnect in an
|
||||
// attempt to recycle the connection. This can be useful if we think a
|
||||
// TCP connection or state machine is stalled.
|
||||
LinkFailureDisconnect
|
||||
)
|
||||
|
||||
// LinkFailureError encapsulates an error that will make us fail the current
|
||||
// link. It contains the necessary information needed to determine if we should
|
||||
// force close the channel in the process, and if any error data should be sent
|
||||
@ -61,9 +79,8 @@ type LinkFailureError struct {
|
||||
// code is the type of error this LinkFailureError encapsulates.
|
||||
code errorCode
|
||||
|
||||
// ForceClose indicates whether we should force close the channel
|
||||
// because of this error.
|
||||
ForceClose bool
|
||||
// FailureAction describes what we should do to fail the channel.
|
||||
FailureAction LinkFailureAction
|
||||
|
||||
// PermanentFailure indicates whether this failure is permanent, and
|
||||
// the channel should not be attempted loaded again.
|
||||
|
@ -3094,11 +3094,10 @@ func (p *Brontide) handleLinkFailure(failure linkFailureReport) {
|
||||
// being applied.
|
||||
p.WipeChannel(&failure.chanPoint)
|
||||
|
||||
// If the error encountered was severe enough, we'll now force close the
|
||||
// channel to prevent reading it to the switch in the future.
|
||||
if failure.linkErr.ForceClose {
|
||||
p.log.Warnf("Force closing link(%v)",
|
||||
failure.shortChanID)
|
||||
// If the error encountered was severe enough, we'll now force close
|
||||
// the channel to prevent reading it to the switch in the future.
|
||||
if failure.linkErr.FailureAction == htlcswitch.LinkFailureForceClose {
|
||||
p.log.Warnf("Force closing link(%v)", failure.shortChanID)
|
||||
|
||||
closeTx, err := p.cfg.ChainArb.ForceCloseContract(
|
||||
failure.chanPoint,
|
||||
@ -3143,6 +3142,13 @@ func (p *Brontide) handleLinkFailure(failure linkFailureReport) {
|
||||
"remote peer: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// If the failure action is disconnect, then we'll execute that now. If
|
||||
// we had to send an error above, it was a sync call, so we expect the
|
||||
// message to be flushed on the wire by now.
|
||||
if failure.linkErr.FailureAction == htlcswitch.LinkFailureDisconnect {
|
||||
p.Disconnect(fmt.Errorf("link requested disconnect"))
|
||||
}
|
||||
}
|
||||
|
||||
// tryLinkShutdown attempts to fetch a target link from the switch, calls
|
||||
|
Loading…
x
Reference in New Issue
Block a user