multi: add health check for remote signer

This commit is contained in:
Oliver Gugger
2022-01-05 11:04:30 +01:00
parent 9601a9ab84
commit da59c1fa62
5 changed files with 86 additions and 0 deletions

View File

@@ -148,6 +148,16 @@ const (
defaultTCBackoff = time.Minute
defaultTCAttempts = 0
// Set defaults for a health check which ensures that the remote signer
// RPC connection is alive. Although this check is off by default (only
// active when remote signing is turned on), we still set the other
// default values so that the health check can be easily enabled with
// sane defaults.
defaultRSInterval = time.Minute
defaultRSTimeout = time.Second * 1
defaultRSBackoff = time.Second * 30
defaultRSAttempts = 1
// defaultRemoteMaxHtlcs specifies the default limit for maximum
// concurrent HTLCs the remote party may add to commitment transactions.
// This value can be overridden with --default-remote-max-htlcs.
@@ -558,6 +568,12 @@ func DefaultConfig() Config {
Attempts: defaultTCAttempts,
Backoff: defaultTCBackoff,
},
RemoteSigner: &lncfg.CheckConfig{
Interval: defaultRSInterval,
Timeout: defaultRSTimeout,
Attempts: defaultRSAttempts,
Backoff: defaultRSBackoff,
},
},
Gossip: &lncfg.Gossip{
MaxChannelUpdateBurst: discovery.DefaultMaxChannelUpdateBurst,

View File

@@ -30,6 +30,8 @@ type HealthCheckConfig struct {
TLSCheck *CheckConfig `group:"tls" namespace:"tls"`
TorConnection *CheckConfig `group:"torconnection" namespace:"torconnection"`
RemoteSigner *CheckConfig `group:"remotesigner" namespace:"remotesigner"`
}
// Validate checks the values configured for our health checks.

View File

@@ -0,0 +1,24 @@
package rpcwallet
import (
"fmt"
"time"
"github.com/lightningnetwork/lnd/lncfg"
)
// HealthCheck returns a health check function for the given remote signing
// configuration.
func HealthCheck(cfg *lncfg.RemoteSigner, timeout time.Duration) func() error {
return func() error {
_, err := connectRPC(
cfg.RPCHost, cfg.TLSCertPath, cfg.MacaroonPath, timeout,
)
if err != nil {
return fmt.Errorf("error connecting to the remote "+
"signing node through RPC: %v", err)
}
return nil
}
}

View File

@@ -1006,6 +1006,23 @@ litecoin.node=ltcd
; value must be >= 1m.
; healthcheck.torconnection.interval=1m
; The number of times we should attempt to check our remote signer RPC
; connection before gracefully shutting down. Set this value to 0 to disable
; this health check.
; healthcheck.remotesigner.attempts=1
; The amount of time we allow a call to our remote signer RPC connection to take
; before we fail the attempt. This value must be >= 1s.
; healthcheck.remotesigner.timeout=1s
; The amount of time we should backoff between failed attempts to check remote
; signer RPC connection. This value must be >= 1s.
; healthcheck.remotesigner.backoff=30s
; The amount of time we should wait between remote signer RPC connection health
; checks. This value must be >= 1m.
; healthcheck.remotesigner.interval=1m
[signrpc]

View File

@@ -52,6 +52,7 @@ import (
"github.com/lightningnetwork/lnd/lnrpc/routerrpc"
"github.com/lightningnetwork/lnd/lnwallet"
"github.com/lightningnetwork/lnd/lnwallet/chainfee"
"github.com/lightningnetwork/lnd/lnwallet/rpcwallet"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/nat"
"github.com/lightningnetwork/lnd/netann"
@@ -1584,6 +1585,32 @@ func (s *server) createLivenessMonitor(cfg *Config, cc *chainreg.ChainControl) {
checks = append(checks, torConnectionCheck)
}
// If remote signing is enabled, add the healthcheck for the remote
// signing RPC interface.
if s.cfg.RemoteSigner != nil && s.cfg.RemoteSigner.Enable {
// Because we have two cascading timeouts here, we need to add
// some slack to the "outer" one of them in case the "inner"
// returns exactly on time.
overhead := time.Millisecond * 10
remoteSignerConnectionCheck := healthcheck.NewObservation(
"remote signer connection",
rpcwallet.HealthCheck(
s.cfg.RemoteSigner,
// For the health check we might to be even
// stricter than the initial/normal connect, so
// we use the health check timeout here.
cfg.HealthChecks.RemoteSigner.Timeout,
),
cfg.HealthChecks.RemoteSigner.Interval,
cfg.HealthChecks.RemoteSigner.Timeout+overhead,
cfg.HealthChecks.RemoteSigner.Backoff,
cfg.HealthChecks.RemoteSigner.Attempts,
)
checks = append(checks, remoteSignerConnectionCheck)
}
// If we have not disabled all of our health checks, we create a
// liveliness monitor with our configured checks.
s.livelinessMonitor = healthcheck.NewMonitor(