From 23a153abaec38d122f1752e9071623798940e3d9 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Wed, 13 Sep 2023 11:12:26 -0700 Subject: [PATCH] lnd+config: add ability to obtain blocking and mutex profiles In this commit, we add the ability to obtain blocking and mutex profiles. The blocking profile will show which goroutines are consistently blocked on synchronization primitives like channels, or I/O. The mutex profile will show which mutexes are very contested. The blocking profile can be enabled with a new arg: `--blockingprofile`. The mutex profile can be enabled with a new arg: `--mutexprofile`. These are both ignored if the profile port isn't set. Activating these profiles requires the caller to pass in a sampling rate. For now I've set it just to `1` to test things out. Unfortunately documentation is rather scarce, so there aren't any good guides re what these values should be set to. AFAICT, these add more overhead than the other prowling options, so they shouldn't necessarily be enabled persistently in production. --- config.go | 3 +++ docs/release-notes/release-notes-0.17.0.md | 5 +++++ lnd.go | 8 ++++++++ sample-lnd.conf | 12 ++++++++++++ 4 files changed, 28 insertions(+) diff --git a/config.go b/config.go index e2ee0ca68..ab42976ab 100644 --- a/config.go +++ b/config.go @@ -337,6 +337,9 @@ type Config struct { Profile string `long:"profile" description:"Enable HTTP profiling on either a port or host:port"` + BlockingProfile int `long:"blockingprofile" description:"Used to enable a blocking profile to be served on the profiling port. This takes a value from 0 to 1, with 1 including every blocking event, and 0 including no events."` + MutexProfile int `long:"mutexprofile" description:"Used to Enable a mutex profile to be served on the profiling port. This takes a value from 0 to 1, with 1 including every mutex event, and 0 including no events."` + UnsafeDisconnect bool `long:"unsafe-disconnect" description:"DEPRECATED: Allows the rpcserver to intentionally disconnect from peers with open channels. THIS FLAG WILL BE REMOVED IN 0.10.0"` UnsafeReplay bool `long:"unsafe-replay" description:"Causes a link to replay the adds on its commitment txn after starting up, this enables testing of the sphinx replay logic."` MaxPendingChannels int `long:"maxpendingchannels" description:"The maximum number of incoming pending channels permitted per peer."` diff --git a/docs/release-notes/release-notes-0.17.0.md b/docs/release-notes/release-notes-0.17.0.md index 3f4cf7ceb..47f87890f 100644 --- a/docs/release-notes/release-notes-0.17.0.md +++ b/docs/release-notes/release-notes-0.17.0.md @@ -77,6 +77,11 @@ fails](https://github.com/lightningnetwork/lnd/pull/7876). # New Features ## Functional Enhancements + +* `lnd` can now optionally generate [blocking and mutex + profiles](https://github.com/lightningnetwork/lnd/pull/7983). These profiles + are useful to attempt to debug high mutex contention, or deadlock scenarios. + ### Protocol Features * This release marks the first release that includes the new [musig2-based taproot channel type](https://github.com/lightningnetwork/lnd/pull/7904). As diff --git a/lnd.go b/lnd.go index f8377f335..201dc2777 100644 --- a/lnd.go +++ b/lnd.go @@ -13,6 +13,7 @@ import ( "net/http" "net/http/pprof" "os" + "runtime" runtimePprof "runtime/pprof" "strings" "sync" @@ -194,6 +195,13 @@ func Main(cfg *Config, lisCfg ListenerCfg, implCfg *ImplementationCfg, pprofMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) pprofMux.HandleFunc("/debug/pprof/trace", pprof.Trace) + if cfg.BlockingProfile != 0 { + runtime.SetBlockProfileRate(cfg.BlockingProfile) + } + if cfg.MutexProfile != 0 { + runtime.SetMutexProfileFraction(cfg.MutexProfile) + } + // Redirect all requests to the pprof handler, thus visiting // `127.0.0.1:6060` will be redirected to // `127.0.0.1:6060/debug/pprof`. diff --git a/sample-lnd.conf b/sample-lnd.conf index 5d0dd4a48..1d2f77d33 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -270,6 +270,18 @@ ; 65536. The profile can be access at: http://localhost:/debug/pprof/. ; profile= +; Enable a blocking profile to be obtained from the profiling port. A blocking +; profile can show where goroutines are blocking (stuck on mutexes, I/O, etc). +; This takes a value from 0 to 1, with 0 turning off the setting, and 1 sampling +; every blocking event (it's a rate value). +; blockingprofile=0 + +; Enable a mutex profile to be obtained from the profiling port. A mutex +; profile can show where goroutines are blocked on mutexes, and which mutexes +; have high contention. This takes a value from 0 to 1, with 0 turning off the +; setting, and 1 sampling every mutex event (it's a rate value). +; mutexprofile=0 + ; DEPRECATED: Allows the rpcserver to intentionally disconnect from peers with ; open channels. THIS FLAG WILL BE REMOVED IN 0.10.0. ; unsafe-disconnect=false