mirror of
https://github.com/lightningnetwork/lnd.git
synced 2025-11-10 06:07:16 +01:00
peer+lnd: add new CLI option to control if we D/C on slow pongs
In this commit, we add a new CLI option to control if we D/C on slow pongs or not. Due to the existence of head-of-the-line blocking at various levels of abstraction (app buffer, slow processing, TCP kernel buffers, etc), if there's a flurry of gossip messages (eg: 1K channel updates), then even with a reasonable processing latency, a peer may still not read our ping in time. To give users another option, we add a flag that allows users to disable this behavior. The default remains.
This commit is contained in:
committed by
Oliver Gugger
parent
b0cba7dd08
commit
c493f0c0a9
@@ -7,6 +7,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/lightningnetwork/lnd/fn/v2"
|
||||
"github.com/lightningnetwork/lnd/lnwire"
|
||||
)
|
||||
|
||||
@@ -36,7 +37,8 @@ type PingManagerConfig struct {
|
||||
// OnPongFailure is a closure that is responsible for executing the
|
||||
// logic when a Pong message is either late or does not match our
|
||||
// expectations for that Pong
|
||||
OnPongFailure func(error)
|
||||
OnPongFailure func(failureReason error, timeWaitedForPong time.Duration,
|
||||
lastKnownRTT time.Duration)
|
||||
}
|
||||
|
||||
// PingManager is a structure that is designed to manage the internal state
|
||||
@@ -108,6 +110,26 @@ func (m *PingManager) Start() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// getLastRTT safely retrieves the last known RTT, returning 0 if none exists.
|
||||
func (m *PingManager) getLastRTT() time.Duration {
|
||||
rttPtr := m.pingTime.Load()
|
||||
if rttPtr == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return *rttPtr
|
||||
}
|
||||
|
||||
// pendingPingWait calculates the time waited since the last ping was sent. If
|
||||
// no ping time is reported, None is returned. defaultDuration.
|
||||
func (m *PingManager) pendingPingWait() fn.Option[time.Duration] {
|
||||
if m.pingLastSend != nil {
|
||||
return fn.Some(time.Since(*m.pingLastSend))
|
||||
}
|
||||
|
||||
return fn.None[time.Duration]()
|
||||
}
|
||||
|
||||
// pingHandler is the main goroutine responsible for enforcing the ping/pong
|
||||
// protocol.
|
||||
func (m *PingManager) pingHandler() {
|
||||
@@ -119,6 +141,10 @@ func (m *PingManager) pingHandler() {
|
||||
<-m.pingTimeout.C
|
||||
}
|
||||
|
||||
// Because we don't know if the OnPingFailure callback actually
|
||||
// disconnects a peer (dependent on user config), we should never return
|
||||
// from this loop unless the ping manager is stopped explicitly (which
|
||||
// happens on disconnect).
|
||||
for {
|
||||
select {
|
||||
case <-m.pingTicker.C:
|
||||
@@ -127,12 +153,20 @@ func (m *PingManager) pingHandler() {
|
||||
// awaiting a pong response. This should never occur,
|
||||
// but if it does, it implies a timeout.
|
||||
if m.outstandingPongSize >= 0 {
|
||||
e := errors.New("impossible: new ping" +
|
||||
"in unclean state",
|
||||
// Ping was outstanding, meaning it timed out by
|
||||
// the arrival of the next ping interval.
|
||||
timeWaited := m.pendingPingWait().UnwrapOr(
|
||||
m.cfg.IntervalDuration,
|
||||
)
|
||||
m.cfg.OnPongFailure(e)
|
||||
lastRTT := m.getLastRTT()
|
||||
|
||||
return
|
||||
m.cfg.OnPongFailure(
|
||||
errors.New("ping timed "+
|
||||
"out by next interval"),
|
||||
timeWaited, lastRTT,
|
||||
)
|
||||
|
||||
m.resetPingState()
|
||||
}
|
||||
|
||||
pongSize := m.cfg.NewPongSize()
|
||||
@@ -143,52 +177,66 @@ func (m *PingManager) pingHandler() {
|
||||
|
||||
// Set up our bookkeeping for the new Ping.
|
||||
if err := m.setPingState(pongSize); err != nil {
|
||||
m.cfg.OnPongFailure(err)
|
||||
// This is an internal error related to timer
|
||||
// reset. Pass it to OnPongFailure as it's
|
||||
// critical. Current and last RTT are not
|
||||
// directly applicable here.
|
||||
m.cfg.OnPongFailure(err, 0, 0)
|
||||
|
||||
return
|
||||
m.resetPingState()
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
m.cfg.SendPing(ping)
|
||||
|
||||
case <-m.pingTimeout.C:
|
||||
m.resetPingState()
|
||||
timeWaited := m.pendingPingWait().UnwrapOr(
|
||||
m.cfg.TimeoutDuration,
|
||||
)
|
||||
lastRTT := m.getLastRTT()
|
||||
|
||||
e := errors.New("timeout while waiting for " +
|
||||
"pong response",
|
||||
m.cfg.OnPongFailure(
|
||||
errors.New("timeout while waiting for "+
|
||||
"pong response"),
|
||||
timeWaited, lastRTT,
|
||||
)
|
||||
|
||||
m.cfg.OnPongFailure(e)
|
||||
|
||||
return
|
||||
m.resetPingState()
|
||||
|
||||
case pong := <-m.pongChan:
|
||||
pongSize := int32(len(pong.PongBytes))
|
||||
|
||||
// Save off values we are about to override when we
|
||||
// call resetPingState.
|
||||
// Save off values we are about to override when we call
|
||||
// resetPingState.
|
||||
expected := m.outstandingPongSize
|
||||
lastPing := m.pingLastSend
|
||||
lastPingTime := m.pingLastSend
|
||||
|
||||
m.resetPingState()
|
||||
// This is an unexpected pong, we'll continue.
|
||||
if lastPingTime == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// If the pong we receive doesn't match the ping we
|
||||
// sent out, then we fail out.
|
||||
actualRTT := time.Since(*lastPingTime)
|
||||
|
||||
// If the pong we receive doesn't match the ping we sent
|
||||
// out, then we fail out.
|
||||
if pongSize != expected {
|
||||
e := errors.New("pong response does " +
|
||||
"not match expected size",
|
||||
)
|
||||
e := fmt.Errorf("pong response does not match "+
|
||||
"expected size. Expected: %d, Got: %d",
|
||||
expected, pongSize)
|
||||
|
||||
m.cfg.OnPongFailure(e)
|
||||
lastRTT := m.getLastRTT()
|
||||
m.cfg.OnPongFailure(e, actualRTT, lastRTT)
|
||||
|
||||
return
|
||||
m.resetPingState()
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Compute RTT of ping and save that for future
|
||||
// querying.
|
||||
if lastPing != nil {
|
||||
rtt := time.Since(*lastPing)
|
||||
m.pingTime.Store(&rtt)
|
||||
}
|
||||
// Pong is good, update RTT and reset state.
|
||||
m.pingTime.Store(&actualRTT)
|
||||
m.resetPingState()
|
||||
|
||||
case <-m.quit:
|
||||
return
|
||||
@@ -231,6 +279,7 @@ func (m *PingManager) setPingState(pongSize uint16) error {
|
||||
func (m *PingManager) resetPingState() {
|
||||
m.pingLastSend = nil
|
||||
m.outstandingPongSize = -1
|
||||
|
||||
if !m.pingTimeout.Stop() {
|
||||
select {
|
||||
case <-m.pingTimeout.C:
|
||||
|
||||
Reference in New Issue
Block a user