monitoring+cfg: add new option to export gRPC perf metrics

In this commit, we expose a new monitoring option to allow users to export gRPC performance metrics. These metrics can be used to see how long certain calls are taking, the total amount of time spent handling calls, broken down by service and also call. This option consumes additional memory and disk space for the Prometheus server, which is why we're opting to make it an optional flag.
2025-03-28 02:33:22 +01:00 · 2022-02-01 15:48:35 -08:00 · 2022-02-01 15:48:35 -08:00 · 10c5d6c0bd
commit 10c5d6c0bd
parent a05b85686f
3 changed files with 20 additions and 0 deletions
--- a/lncfg/monitoring_on.go
+++ b/lncfg/monitoring_on.go
@ -13,6 +13,12 @@ type Prometheus struct {
 	// Enable indicates whether to export lnd gRPC performance metrics to
 	// Prometheus. Default is false.
 	Enable bool `long:"enable" description:"enable Prometheus exporting of lnd gRPC performance metrics."`
+
+	// PerfHistograms indicates if the additional histogram information for
+	// latency, and handling time of gRPC calls should be enabled. This
+	// generates additional data, and consume more memory for the
+	// Prometheus server.
+	PerfHistograms bool `long:"perfhistograms" description:"enable additional histogram to track gRPC call processing performance (latency, etc)"`
 }

 // DefaultPrometheus is the default configuration for the Prometheus metrics
--- a/monitoring/monitoring_on.go
+++ b/monitoring/monitoring_on.go
@ -36,6 +36,14 @@ func ExportPrometheusMetrics(grpcServer *grpc.Server, cfg lncfg.Prometheus) erro

 		grpc_prometheus.Register(grpcServer)

+		// Enable the histograms which can allow plotting latency
+		// distributions of inbound calls. However we guard this behind
+		// another flag as this can generate a lot of additional data,
+		// as its a high cardinality metric typically.
+		if cfg.PerfHistograms {
+			grpc_prometheus.EnableHandlingTimeHistogram()
+		}
+
 		http.Handle("/metrics", promhttp.Handler())
 		go func() {
 			http.ListenAndServe(cfg.Listen, nil)
--- a/sample-lnd.conf
+++ b/sample-lnd.conf
@ -421,6 +421,12 @@
 ; Specify the interface to listen on for Prometheus connections.
 ; prometheus.listen=0.0.0.0:8989

+; If true, then we'll export additional information that allows users to plot
+; the processing latency, and total time spent across each RPC calls+service.
+; This generates additional memory load for the Prometheus server, and will end
+; up using more disk space over time.
+; prometheus.perfhistograms=true
+
 ; The alias your node will use, which can be up to 32 UTF-8 characters in
 ; length.
 ; alias=My Lightning ☇