From 67e18e6089b787343130ca814dc51757d6b87d8c Mon Sep 17 00:00:00 2001
From: Olaoluwa Osuntokun <laolu32@gmail.com>
Date: Tue, 19 Oct 2021 16:19:53 -0700
Subject: [PATCH] rpc: optimize DescribeGraph by caching the response

Unfortunately, we can't use the graph cache directly here as it doesn't
include all the information we need, since it only includes the minimal
amount of information needed for path finding.

Instead, we use a simple mutex guarded cache that evicts itself after a
certain interval. The default is set small enough that most users
shouldn't really notice. We also provide a way to disable the cache, and
also disable the cache in the itests.

Fixes https://github.com/lightningnetwork/lnd/issues/1232
---
 lncfg/caches.go | 13 ++++++++++++-
 lntest/node.go  |  1 +
 rpcserver.go    | 42 ++++++++++++++++++++++++++++++++++++++++++
 sample-lnd.conf |  4 ++++
 4 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/lncfg/caches.go b/lncfg/caches.go
index 47b9a97e8..abb1cf24d 100644
--- a/lncfg/caches.go
+++ b/lncfg/caches.go
@@ -1,6 +1,9 @@
 package lncfg
 
-import "fmt"
+import (
+	"fmt"
+	"time"
+)
 
 const (
 	// MinRejectCacheSize is a floor on the maximum capacity allowed for
@@ -10,6 +13,10 @@ const (
 	// MinChannelCacheSize is a floor on the maximum capacity allowed for
 	// channeldb's channel cache. This amounts to roughly 2 MB when full.
 	MinChannelCacheSize = 1000
+
+	// DefaultRPCGraphCacheDuration is the default interval that the RPC
+	// response to DescribeGraph should be cached for.
+	DefaultRPCGraphCacheDuration = time.Minute
 )
 
 // Caches holds the configuration for various caches within lnd.
@@ -24,6 +31,10 @@ type Caches struct {
 	// peers querying for gossip traffic. Memory usage is roughly 2Kb per
 	// entry.
 	ChannelCacheSize int `long:"channel-cache-size" description:"Maximum number of entries contained in the channel cache, which is used to reduce memory allocations from gossip queries from peers. Each entry requires roughly 2Kb."`
+
+	// RPCGraphCacheDuration is used to control the flush interval of the
+	// channel graph cache.
+	RPCGraphCacheDuration time.Duration `long:"rpc-graph-cache-duration" description:"The period of time expressed as a duration (1s, 1m, 1h, etc) that the RPC response to DescribeGraph should be cached for."`
 }
 
 // Validate checks the Caches configuration for values that are too small to be
diff --git a/lntest/node.go b/lntest/node.go
index f164ad070..f56fe1c91 100644
--- a/lntest/node.go
+++ b/lntest/node.go
@@ -303,6 +303,7 @@ func (cfg NodeConfig) genArgs() []string {
 	args = append(args, fmt.Sprintf("--invoicemacaroonpath=%v", cfg.InvoiceMacPath))
 	args = append(args, fmt.Sprintf("--trickledelay=%v", trickleDelay))
 	args = append(args, fmt.Sprintf("--profile=%d", cfg.ProfilePort))
+	args = append(args, fmt.Sprintf("--caches.rpc-graph-cache-duration=0"))
 
 	if !cfg.HasSeed {
 		args = append(args, "--noseedbackup")
diff --git a/rpcserver.go b/rpcserver.go
index 5079f86d2..674e64d51 100644
--- a/rpcserver.go
+++ b/rpcserver.go
@@ -628,6 +628,10 @@ type rpcServer struct {
 
 	// interceptor is used to be able to request a shutdown
 	interceptor signal.Interceptor
+
+	graphCache        sync.RWMutex
+	describeGraphResp *lnrpc.ChannelGraph
+	graphCacheEvictor *time.Timer
 }
 
 // A compile time check to ensure that rpcServer fully implements the
@@ -813,6 +817,23 @@ func (r *rpcServer) addDeps(s *server, macService *macaroons.Service,
 	r.chanPredicate = chanPredicate
 	r.macService = macService
 	r.selfNode = selfNode.PubKeyBytes
+
+	graphCacheDuration := r.cfg.Caches.RPCGraphCacheDuration
+	if graphCacheDuration != 0 {
+		r.graphCacheEvictor = time.AfterFunc(graphCacheDuration, func() {
+			// Grab the mutex and purge the current populated
+			// describe graph response.
+			r.graphCache.Lock()
+			defer r.graphCache.Unlock()
+
+			r.describeGraphResp = nil
+
+			// Reset ourselves as well at the end so we run again
+			// after the duration.
+			r.graphCacheEvictor.Reset(graphCacheDuration)
+		})
+	}
+
 	return nil
 }
 
@@ -5381,6 +5402,20 @@ func (r *rpcServer) DescribeGraph(ctx context.Context,
 	resp := &lnrpc.ChannelGraph{}
 	includeUnannounced := req.IncludeUnannounced
 
+	// Check to see if the cache is already populated, if so then we can
+	// just return it directly.
+	//
+	// TODO(roasbeef): move this to an interceptor level feature?
+	graphCacheActive := r.cfg.Caches.RPCGraphCacheDuration != 0
+	if graphCacheActive {
+		r.graphCache.Lock()
+		defer r.graphCache.Unlock()
+
+		if r.describeGraphResp != nil {
+			return r.describeGraphResp, nil
+		}
+	}
+
 	// Obtain the pointer to the global singleton channel graph, this will
 	// provide a consistent view of the graph due to bolt db's
 	// transactional model.
@@ -5439,6 +5474,13 @@ func (r *rpcServer) DescribeGraph(ctx context.Context,
 		return nil, err
 	}
 
+	// We still have the mutex held, so we can safely populate the cache
+	// now to save on GC churn for this query, but only if the cache isn't
+	// disabled.
+	if graphCacheActive {
+		r.describeGraphResp = resp
+	}
+
 	return resp, nil
 }
 
diff --git a/sample-lnd.conf b/sample-lnd.conf
index 69533d338..7601873b4 100644
--- a/sample-lnd.conf
+++ b/sample-lnd.conf
@@ -1088,6 +1088,10 @@ litecoin.node=ltcd
 ; roughly 2Kb. (default: 20000)
 ; caches.channel-cache-size=9000000
 
+; The duration that the response to DescribeGraph should be cached for. Setting
+; the value to zero disables the cache. (default: 1m)
+; caches.rpc-graph-cache-duration=10m
+
 
 [protocol]