From 686816d7848fdd570a91d7a4519fda09b111fedf Mon Sep 17 00:00:00 2001
From: bitromortac <bitromortac@protonmail.com>
Date: Fri, 27 May 2022 11:34:07 +0200
Subject: [PATCH] routing: implement bimodal probability estimator

Implements a new probability estimator based on a probability theory
framework.

The computed probability consists of:
* the direct channel probability, which is estimated based on a
  depleted liquidity distribution model, formulas and broader concept derived
  after Pickhardt et al. https://arxiv.org/abs/2103.08576
* an extension of the probability model to incorporate knowledge decay
  after time for previous successes and failures
* a mixed node probability taking into account successes/failures on other
  channels of the node (similar to the apriori approach)
---
 docs/release-notes/release-notes-0.16.0.md |  11 +
 routing/probability_apriori_test.go        |   8 +
 routing/probability_bimodal.go             | 536 +++++++++++++++++
 routing/probability_bimodal_test.go        | 664 +++++++++++++++++++++
 4 files changed, 1219 insertions(+)
 create mode 100644 routing/probability_bimodal.go
 create mode 100644 routing/probability_bimodal_test.go

diff --git a/docs/release-notes/release-notes-0.16.0.md b/docs/release-notes/release-notes-0.16.0.md
index bee7566a9..519fba941 100644
--- a/docs/release-notes/release-notes-0.16.0.md
+++ b/docs/release-notes/release-notes-0.16.0.md
@@ -389,6 +389,16 @@ in the lnwire package](https://github.com/lightningnetwork/lnd/pull/7303)
 
 * [Pathfinding takes capacity of edges into account to improve success
   probability estimation.](https://github.com/lightningnetwork/lnd/pull/6857)
+* [A new probability model ("bimodal") is added which models channel based
+  liquidities within a probability theory framework.](
+  https://github.com/lightningnetwork/lnd/pull/6815)
+
+## Configuration
+* Note that [this pathfinding change](https://github.com/lightningnetwork/lnd/pull/6815)
+  introduces a breaking change in lnd.conf apriori parameters under the routing
+  section, see sample-lnd.conf for an updated configuration. The behavior of
+  `lncli setmccfg/getmccfg` is altered as well.
+
 
 ### Tooling and documentation
 
@@ -445,6 +455,7 @@ refactor the itest for code health and maintenance.
 * Alyssa Hertig
 * andreihod
 * Antoni Spaanderman
+* bitromortac
 * Carla Kirk-Cohen
 * Carsten Otto
 * Chris Geihsler
diff --git a/routing/probability_apriori_test.go b/routing/probability_apriori_test.go
index db11aa12c..a055d4ae2 100644
--- a/routing/probability_apriori_test.go
+++ b/routing/probability_apriori_test.go
@@ -87,6 +87,8 @@ func (c *estimatorTestContext) assertPairProbability(now time.Time,
 // TestProbabilityEstimatorNoResults tests the probability estimation when no
 // results are available.
 func TestProbabilityEstimatorNoResults(t *testing.T) {
+	t.Parallel()
+
 	ctx := newEstimatorTestContext(t)
 
 	// A zero amount does not trigger capacity rescaling.
@@ -104,6 +106,8 @@ func TestProbabilityEstimatorNoResults(t *testing.T) {
 // TestProbabilityEstimatorOneSuccess tests the probability estimation for nodes
 // that have a single success result.
 func TestProbabilityEstimatorOneSuccess(t *testing.T) {
+	t.Parallel()
+
 	ctx := newEstimatorTestContext(t)
 
 	ctx.results = map[int]TimedPairResult{
@@ -144,6 +148,8 @@ func TestProbabilityEstimatorOneSuccess(t *testing.T) {
 // TestProbabilityEstimatorOneFailure tests the probability estimation for nodes
 // that have a single failure.
 func TestProbabilityEstimatorOneFailure(t *testing.T) {
+	t.Parallel()
+
 	ctx := newEstimatorTestContext(t)
 
 	ctx.results = map[int]TimedPairResult{
@@ -171,6 +177,8 @@ func TestProbabilityEstimatorOneFailure(t *testing.T) {
 // TestProbabilityEstimatorMix tests the probability estimation for nodes for
 // which a mix of successes and failures is recorded.
 func TestProbabilityEstimatorMix(t *testing.T) {
+	t.Parallel()
+
 	ctx := newEstimatorTestContext(t)
 
 	ctx.results = map[int]TimedPairResult{
diff --git a/routing/probability_bimodal.go b/routing/probability_bimodal.go
new file mode 100644
index 000000000..daa086fa9
--- /dev/null
+++ b/routing/probability_bimodal.go
@@ -0,0 +1,536 @@
+package routing
+
+import (
+	"fmt"
+	"math"
+	"time"
+
+	"github.com/btcsuite/btcd/btcutil"
+	"github.com/go-errors/errors"
+	"github.com/lightningnetwork/lnd/lnwire"
+	"github.com/lightningnetwork/lnd/routing/route"
+)
+
+const (
+	// DefaultBimodalScaleMsat is the default value for BimodalScaleMsat in
+	// BimodalConfig. It describes the distribution of funds in the LN based
+	// on empirical findings. We assume an unbalanced network by default.
+	DefaultBimodalScaleMsat = lnwire.MilliSatoshi(300_000_000)
+
+	// DefaultBimodalNodeWeight is the default value for the
+	// BimodalNodeWeight in BimodalConfig. It is chosen such that past
+	// forwardings on other channels of a router are only slightly taken
+	// into account.
+	DefaultBimodalNodeWeight = 0.2
+
+	// DefaultBimodalDecayTime is the default value for BimodalDecayTime.
+	// We will forget about previous learnings about channel liquidity on
+	// the timescale of about a week.
+	DefaultBimodalDecayTime = 7 * 24 * time.Hour
+
+	// BimodalScaleMsatLimit is the maximum value for BimodalScaleMsat to
+	// avoid numerical issues.
+	BimodalScaleMsatMax = lnwire.MilliSatoshi(21e17)
+
+	// BimodalEstimatorName is used to identify the bimodal estimator.
+	BimodalEstimatorName = "bimodal"
+)
+
+var (
+	// ErrInvalidScale is returned when we get a scale below or equal zero.
+	ErrInvalidScale = errors.New("scale must be >= 0 and sane")
+
+	// ErrInvalidNodeWeight is returned when we get a node weight that is
+	// out of range.
+	ErrInvalidNodeWeight = errors.New("node weight must be in [0, 1]")
+
+	// ErrInvalidDecayTime is returned when we get a decay time below zero.
+	ErrInvalidDecayTime = errors.New("decay time must be larger than zero")
+)
+
+// BimodalConfig contains configuration for our probability estimator.
+type BimodalConfig struct {
+	// BimodalNodeWeight defines how strongly other previous forwardings on
+	// channels of a router should be taken into account when computing a
+	// channel's probability to route. The allowed values are in the range
+	// [0, 1], where a value of 0 means that only direct information about a
+	// channel is taken into account.
+	BimodalNodeWeight float64
+
+	// BimodalScaleMsat describes the scale over which channels
+	// statistically have some liquidity left. The value determines how
+	// quickly the bimodal distribution drops off from the edges of a
+	// channel. A larger value (compared to typical channel capacities)
+	// means that the drop off is slow and that channel balances are
+	// distributed more uniformly. A small value leads to the assumption of
+	// very unbalanced channels.
+	BimodalScaleMsat lnwire.MilliSatoshi
+
+	// BimodalDecayTime is the scale for the exponential information decay
+	// over time for previous successes or failures.
+	BimodalDecayTime time.Duration
+}
+
+// validate checks the configuration of the estimator for allowed values.
+func (p BimodalConfig) validate() error {
+	if p.BimodalDecayTime <= 0 {
+		return fmt.Errorf("%v: %w", BimodalEstimatorName,
+			ErrInvalidDecayTime)
+	}
+
+	if p.BimodalNodeWeight < 0 || p.BimodalNodeWeight > 1 {
+		return fmt.Errorf("%v: %w", BimodalEstimatorName,
+			ErrInvalidNodeWeight)
+	}
+
+	if p.BimodalScaleMsat == 0 || p.BimodalScaleMsat > BimodalScaleMsatMax {
+		return fmt.Errorf("%v: %w", BimodalEstimatorName,
+			ErrInvalidScale)
+	}
+
+	return nil
+}
+
+// DefaultBimodalConfig returns the default configuration for the estimator.
+func DefaultBimodalConfig() BimodalConfig {
+	return BimodalConfig{
+		BimodalNodeWeight: DefaultBimodalNodeWeight,
+		BimodalScaleMsat:  DefaultBimodalScaleMsat,
+		BimodalDecayTime:  DefaultBimodalDecayTime,
+	}
+}
+
+// BimodalEstimator returns node and pair probabilities based on historical
+// payment results based on a liquidity distribution model of the LN. The main
+// function is to estimate the direct channel probability based on a depleted
+// liquidity distribution model, with additional information decay over time. A
+// per-node probability can be mixed with the direct probability, taking into
+// account successes/failures on other channels of the forwarder.
+type BimodalEstimator struct {
+	// BimodalConfig contains configuration options for our estimator.
+	BimodalConfig
+}
+
+// NewBimodalEstimator creates a new BimodalEstimator.
+func NewBimodalEstimator(cfg BimodalConfig) (*BimodalEstimator, error) {
+	if err := cfg.validate(); err != nil {
+		return nil, err
+	}
+
+	return &BimodalEstimator{
+		BimodalConfig: cfg,
+	}, nil
+}
+
+// Compile-time checks that interfaces are implemented.
+var _ Estimator = (*BimodalEstimator)(nil)
+var _ estimatorConfig = (*BimodalConfig)(nil)
+
+// config returns the current configuration of the estimator.
+func (p *BimodalEstimator) Config() estimatorConfig {
+	return p.BimodalConfig
+}
+
+// String returns the estimator's configuration as a string representation.
+func (p *BimodalEstimator) String() string {
+	return fmt.Sprintf("estimator type: %v, decay time: %v, liquidity "+
+		"scale: %v, node weight: %v", BimodalEstimatorName,
+		p.BimodalDecayTime, p.BimodalScaleMsat, p.BimodalNodeWeight)
+}
+
+// PairProbability estimates the probability of successfully traversing to
+// toNode based on historical payment outcomes for the from node. Those outcomes
+// are passed in via the results parameter.
+func (p *BimodalEstimator) PairProbability(now time.Time,
+	results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
+	capacity btcutil.Amount) float64 {
+
+	// We first compute the probability for the desired hop taking into
+	// account previous knowledge.
+	directProbability := p.directProbability(
+		now, results, toNode, amt, lnwire.NewMSatFromSatoshis(capacity),
+	)
+
+	// The final probability is computed by taking into account other
+	// channels of the from node.
+	return p.calculateProbability(directProbability, now, results, toNode)
+}
+
+// LocalPairProbability computes the probability to reach toNode given a set of
+// previous learnings.
+func (p *BimodalEstimator) LocalPairProbability(now time.Time,
+	results NodeResults, toNode route.Vertex) float64 {
+
+	// For direct local probabilities we assume to know exactly how much we
+	// can send over a channel, which assumes that channels are active and
+	// have enough liquidity.
+	directProbability := 1.0
+
+	// If we had an unexpected failure for this node, we reduce the
+	// probability for some time to avoid infinite retries.
+	result, ok := results[toNode]
+	if ok && !result.FailTime.IsZero() {
+		timeAgo := now.Sub(result.FailTime)
+
+		// We only expect results in the past to get a probability
+		// between 0 and 1.
+		if timeAgo < 0 {
+			timeAgo = 0
+		}
+		exponent := -float64(timeAgo) / float64(p.BimodalDecayTime)
+		directProbability -= math.Exp(exponent)
+	}
+
+	return directProbability
+}
+
+// directProbability computes the probability to reach a node based on the
+// liquidity distribution in the LN.
+func (p *BimodalEstimator) directProbability(now time.Time,
+	results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
+	capacity lnwire.MilliSatoshi) float64 {
+
+	// We first determine the time-adjusted success and failure amounts to
+	// then compute a probability. We know that we can send a zero amount.
+	successAmount := lnwire.MilliSatoshi(0)
+
+	// We know that we cannot send the full capacity.
+	failAmount := capacity
+
+	// If we have information about past successes or failures, we modify
+	// them with a time decay.
+	result, ok := results[toNode]
+	if ok {
+		// Apply a time decay for the amount we cannot send.
+		if !result.FailTime.IsZero() {
+			failAmount = cannotSend(
+				result.FailAmt, capacity, now, result.FailTime,
+				p.BimodalDecayTime,
+			)
+		}
+
+		// Apply a time decay for the amount we can send.
+		if !result.SuccessTime.IsZero() {
+			successAmount = canSend(
+				result.SuccessAmt, now, result.SuccessTime,
+				p.BimodalDecayTime,
+			)
+		}
+	}
+
+	// Compute the direct channel probability.
+	probability, err := p.probabilityFormula(
+		capacity, successAmount, failAmount, amt,
+	)
+	if err != nil {
+		log.Errorf("error computing probability: %v", err)
+
+		return 0.0
+	}
+
+	return probability
+}
+
+// calculateProbability computes the total hop probability combining the channel
+// probability and historic forwarding data of other channels of the node we try
+// to send from.
+//
+// Goals:
+// * We want to incentivize good routing nodes: the more routable channels a
+// node has, the more we want to incentivize (vice versa for failures).
+// -> We reduce/increase the direct probability depending on past
+// failures/successes for other channels of the node.
+//
+// * We want to be forgiving/give other nodes a chance as well: we want to
+// forget about (non-)routable channels over time.
+// -> We weight the successes/failures with a time decay such that they will not
+// influence the total probability if a long time went by.
+//
+// * If we don't have other info, we want to solely rely on the direct
+// probability.
+//
+// * We want to be able to specify how important the other channels are compared
+// to the direct channel.
+// -> Introduce a node weight factor that weights the direct probability against
+// the node-wide average. The larger the node weight, the more important other
+// channels of the node are.
+//
+// How do failures on low fee nodes redirect routing to higher fee nodes?
+// Assumptions:
+// * attemptCostPPM of 1000 PPM
+// * constant direct channel probability of P0 (usually 0.5 for large amounts)
+// * node weight w of 0.2
+//
+// The question we want to answer is:
+// How often would a zero-fee node be tried (even if there were failures for its
+// other channels) over trying a high-fee node with 2000 PPM and no direct
+// knowledge about the channel to send over?
+//
+// The probability of a route of length l is P(l) = l * P0.
+//
+// The total probability after n failures (with the implemented method here) is:
+// P(l, n) = P(l-1) * P(n)
+// = P(l-1) * (P0 + n*0) / (1 + n*w)
+// = P(l) / (1 + n*w)
+//
+// Condition for a high-fee channel to overcome a low fee channel in the
+// Dijkstra weight function (only looking at fee and probability PPM terms):
+// highFeePPM + attemptCostPPM * 1/P(l) = 0PPM + attemptCostPPM * 1/P(l, n)
+// highFeePPM/attemptCostPPM = 1/P(l, n) - 1/P(l) =
+// = (1 + n*w)/P(l) - 1/P(l) =
+// = n*w/P(l)
+//
+// Therefore:
+// n = (highFeePPM/attemptCostPPM) * (P(l)/w) =
+// = (2000/1000) * 0.5 * l / w = l/w
+//
+// For a one-hop route we get:
+// n = 1/0.2 = 5 tolerated failures
+//
+// For a three-hop route we get:
+// n = 3/0.2 = 15 tolerated failures
+//
+// For more details on the behavior see tests.
+func (p *BimodalEstimator) calculateProbability(directProbability float64,
+	now time.Time, results NodeResults, toNode route.Vertex) float64 {
+
+	// If we don't take other channels into account, we can return early.
+	if p.BimodalNodeWeight == 0.0 {
+		return directProbability
+	}
+
+	// w is a parameter which determines how strongly the other channels of
+	// a node should be incorporated, the higher the stronger.
+	w := p.BimodalNodeWeight
+
+	// dt determines the timeliness of the previous successes/failures
+	// to be taken into account.
+	dt := float64(p.BimodalDecayTime)
+
+	// The direct channel probability is weighted fully, all other results
+	// are weighted according to how recent the information is.
+	totalProbabilities := directProbability
+	totalWeights := 1.0
+
+	for peer, result := range results {
+		// We don't include the direct hop probability here because it
+		// is already included in totalProbabilities.
+		if peer == toNode {
+			continue
+		}
+
+		// We add probabilities weighted by how recent the info is.
+		var weight float64
+		if result.SuccessAmt > 0 {
+			exponent := -float64(now.Sub(result.SuccessTime)) / dt
+			weight = math.Exp(exponent)
+			totalProbabilities += w * weight
+			totalWeights += w * weight
+		}
+		if result.FailAmt > 0 {
+			exponent := -float64(now.Sub(result.FailTime)) / dt
+			weight = math.Exp(exponent)
+
+			// Failures don't add to total success probability.
+			totalWeights += w * weight
+		}
+	}
+
+	return totalProbabilities / totalWeights
+}
+
+// canSend returns the sendable amount over the channel, respecting time decay.
+// canSend approaches zero, if we wait for a much longer time than the decay
+// time.
+func canSend(successAmount lnwire.MilliSatoshi, now, successTime time.Time,
+	decayConstant time.Duration) lnwire.MilliSatoshi {
+
+	// The factor approaches 0 for successTime a long time in the past,
+	// is 1 when the successTime is now.
+	factor := math.Exp(
+		-float64(now.Sub(successTime)) / float64(decayConstant),
+	)
+
+	canSend := factor * float64(successAmount)
+
+	return lnwire.MilliSatoshi(canSend)
+}
+
+// cannotSend returns the not sendable amount over the channel, respecting time
+// decay. cannotSend approaches the capacity, if we wait for a much longer time
+// than the decay time.
+func cannotSend(failAmount, capacity lnwire.MilliSatoshi, now,
+	failTime time.Time, decayConstant time.Duration) lnwire.MilliSatoshi {
+
+	if failAmount > capacity {
+		failAmount = capacity
+	}
+
+	// The factor approaches 0 for failTime a long time in the past and it
+	// is 1 when the failTime is now.
+	factor := math.Exp(
+		-float64(now.Sub(failTime)) / float64(decayConstant),
+	)
+
+	cannotSend := capacity - lnwire.MilliSatoshi(
+		factor*float64(capacity-failAmount),
+	)
+
+	return cannotSend
+}
+
+// primitive computes the indefinite integral of our assumed (normalized)
+// liquidity probability distribution. The distribution of liquidity x here is
+// the function P(x) ~ exp(-x/s) + exp((x-c)/s), i.e., two exponentials residing
+// at the ends of channels. This means that we expect liquidity to be at either
+// side of the channel with capacity c. The s parameter (scale) defines how far
+// the liquidity leaks into the channel. A very low scale assumes completely
+// unbalanced channels, a very high scale assumes a random distribution. More
+// details can be found in
+// https://github.com/lightningnetwork/lnd/issues/5988#issuecomment-1131234858.
+func (p *BimodalEstimator) primitive(c, x float64) float64 {
+	s := float64(p.BimodalScaleMsat)
+
+	// The indefinite integral of P(x) is given by
+	// Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s)),
+	// and its norm from 0 to c can be computed from it,
+	// norm = [H(x)]_0^c = s * (-e(-c/s) + 1 -(1 + e(-c/s))).
+	ecs := math.Exp(-c / s)
+	exs := math.Exp(-x / s)
+
+	// It would be possible to split the next term and reuse the factors
+	// from before, but this can lead to numerical issues with large
+	// numbers.
+	excs := math.Exp((x - c) / s)
+
+	// norm can only become zero, if c is zero, which we sorted out before
+	// calling this method.
+	norm := -2*ecs + 2
+
+	// We end up with the primitive function of the normalized P(x).
+	return (-exs + excs) / norm
+}
+
+// integral computes the integral of our liquidity distribution from the lower
+// to the upper value.
+func (p *BimodalEstimator) integral(capacity, lower, upper float64) float64 {
+	if lower < 0 || lower > upper {
+		log.Errorf("probability integral limits nonsensical: capacity:"+
+			"%v lower: %v upper: %v", capacity, lower, upper)
+
+		return 0.0
+	}
+
+	return p.primitive(capacity, upper) - p.primitive(capacity, lower)
+}
+
+// probabilityFormula computes the expected probability for a payment of
+// amountMsat given prior learnings for a channel of certain capacity.
+// successAmountMsat and failAmountMsat stand for the unsettled success and
+// failure amounts, respectively. The formula is derived using the formalism
+// presented in Pickhardt et al., https://arxiv.org/abs/2103.08576.
+func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
+	failAmountMsat, amountMsat lnwire.MilliSatoshi) (float64, error) {
+
+	// Convert to positive-valued floats.
+	capacity := float64(capacityMsat)
+	successAmount := float64(successAmountMsat)
+	failAmount := float64(failAmountMsat)
+	amount := float64(amountMsat)
+
+	// Capacity being zero is a sentinel value to ignore the probability
+	// estimation, we'll return the full probability here.
+	if capacity == 0.0 {
+		return 1.0, nil
+	}
+
+	// We cannot send more than the capacity.
+	if amount > capacity {
+		return 0.0, nil
+	}
+
+	// Mission control may have some outdated values, we correct them here.
+	// TODO(bitromortac): there may be better decisions to make in these
+	//  cases, e.g., resetting failAmount=cap and successAmount=0.
+
+	// failAmount should be capacity at max.
+	if failAmount > capacity {
+		failAmount = capacity
+	}
+
+	// successAmount should be capacity at max.
+	if successAmount > capacity {
+		successAmount = capacity
+	}
+
+	// The next statement is a safety check against an illogical condition,
+	// otherwise the renormalization integral would become zero. This may
+	// happen if a large channel gets closed and smaller ones remain, but
+	// it should recover with the time decay.
+	if failAmount <= successAmount {
+		log.Tracef("fail amount (%v) is larger than or equal the "+
+			"success amount (%v) for capacity (%v)",
+			failAmountMsat, successAmountMsat, capacityMsat)
+
+		return 0.0, nil
+	}
+
+	// We cannot send more than the fail amount.
+	if amount >= failAmount {
+		return 0.0, nil
+	}
+
+	// The success probability for payment amount a is the integral over the
+	// prior distribution P(x), the probability to find liquidity between
+	// the amount a and channel capacity c (or failAmount a_f):
+	// P(X >= a | X < a_f) = Integral_{a}^{a_f} P(x) dx
+	prob := p.integral(capacity, amount, failAmount)
+	if math.IsNaN(prob) {
+		return 0.0, fmt.Errorf("non-normalized probability is NaN, "+
+			"capacity: %v, amount: %v, fail amount: %v",
+			capacity, amount, failAmount)
+	}
+
+	// If we have payment information, we need to adjust the prior
+	// distribution P(x) and get the posterior distribution by renormalizing
+	// the prior distribution in such a way that the probability mass lies
+	// between a_s and a_f.
+	reNorm := p.integral(capacity, successAmount, failAmount)
+	if math.IsNaN(reNorm) {
+		return 0.0, fmt.Errorf("normalization factor is NaN, "+
+			"capacity: %v, success amount: %v, fail amount: %v",
+			capacity, successAmount, failAmount)
+	}
+
+	// The normalization factor can only be zero if the success amount is
+	// equal or larger than the fail amount. This should not happen as we
+	// have checked this scenario above.
+	if reNorm == 0.0 {
+		return 0.0, fmt.Errorf("normalization factor is zero, "+
+			"capacity: %v, success amount: %v, fail amount: %v",
+			capacity, successAmount, failAmount)
+	}
+
+	prob /= reNorm
+
+	// Note that for payment amounts smaller than successAmount, we can get
+	// a value larger than unity, which we cap here to get a proper
+	// probability.
+	if prob > 1.0 {
+		if amount > successAmount {
+			return 0.0, fmt.Errorf("unexpected large probability "+
+				"(%v) capacity: %v, amount: %v, success "+
+				"amount: %v, fail amount: %v", prob, capacity,
+				amount, successAmount, failAmount)
+		}
+
+		return 1.0, nil
+	} else if prob < 0.0 {
+		return 0.0, fmt.Errorf("negative probability "+
+			"(%v) capacity: %v, amount: %v, success "+
+			"amount: %v, fail amount: %v", prob, capacity,
+			amount, successAmount, failAmount)
+	}
+
+	return prob, nil
+}
diff --git a/routing/probability_bimodal_test.go b/routing/probability_bimodal_test.go
new file mode 100644
index 000000000..53f8829c4
--- /dev/null
+++ b/routing/probability_bimodal_test.go
@@ -0,0 +1,664 @@
+package routing
+
+import (
+	"math"
+	"testing"
+	"time"
+
+	"github.com/lightningnetwork/lnd/lnwire"
+	"github.com/lightningnetwork/lnd/routing/route"
+	"github.com/stretchr/testify/require"
+)
+
+const (
+	smallAmount = lnwire.MilliSatoshi(400_000)
+	largeAmount = lnwire.MilliSatoshi(5_000_000)
+	capacity    = lnwire.MilliSatoshi(10_000_000)
+	scale       = lnwire.MilliSatoshi(400_000)
+)
+
+// TestSuccessProbability tests that we get correct probability estimates for
+// the direct channel probability.
+func TestSuccessProbability(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                string
+		expectedProbability float64
+		tolerance           float64
+		successAmount       lnwire.MilliSatoshi
+		failAmount          lnwire.MilliSatoshi
+		amount              lnwire.MilliSatoshi
+		capacity            lnwire.MilliSatoshi
+	}{
+		// We can't send more than the capacity.
+		{
+			name:                "no info, larger than capacity",
+			capacity:            capacity,
+			successAmount:       0,
+			failAmount:          capacity,
+			amount:              capacity + 1,
+			expectedProbability: 0.0,
+		},
+		// With the current model we don't prefer any channels if the
+		// send amount is large compared to the scale but small compared
+		// to the capacity.
+		{
+			name:                "no info, large amount",
+			capacity:            capacity,
+			successAmount:       0,
+			failAmount:          capacity,
+			amount:              largeAmount,
+			expectedProbability: 0.5,
+		},
+		// We always expect to be able to "send" an amount of 0.
+		{
+			name:                "no info, zero amount",
+			capacity:            capacity,
+			successAmount:       0,
+			failAmount:          capacity,
+			amount:              0,
+			expectedProbability: 1.0,
+		},
+		// We can't send the whole capacity.
+		{
+			name:                "no info, full capacity",
+			capacity:            capacity,
+			successAmount:       0,
+			failAmount:          capacity,
+			amount:              capacity,
+			expectedProbability: 0.0,
+		},
+		// Sending a small amount will have a higher probability to go
+		// through than a large amount.
+		{
+			name:                "no info, small amount",
+			capacity:            capacity,
+			successAmount:       0,
+			failAmount:          capacity,
+			amount:              smallAmount,
+			expectedProbability: 0.684,
+			tolerance:           0.001,
+		},
+		// If we had an unsettled success, we are sure we can send a
+		// lower amount.
+		{
+			name:                "previous success, lower amount",
+			capacity:            capacity,
+			successAmount:       largeAmount,
+			failAmount:          capacity,
+			amount:              smallAmount,
+			expectedProbability: 1.0,
+		},
+		// If we had an unsettled success, we are sure we can send the
+		// same amount.
+		{
+			name:                "previous success, success amount",
+			capacity:            capacity,
+			successAmount:       largeAmount,
+			failAmount:          capacity,
+			amount:              largeAmount,
+			expectedProbability: 1.0,
+		},
+		// If we had an unsettled success with a small amount, we know
+		// with increased probability that we can send a comparable
+		// higher amount.
+		{
+			name:                "previous success, larger amount",
+			capacity:            capacity,
+			successAmount:       smallAmount / 2,
+			failAmount:          capacity,
+			amount:              smallAmount,
+			expectedProbability: 0.851,
+			tolerance:           0.001,
+		},
+		// If we had a large unsettled success before, we know we can
+		// send even larger payments with high probability.
+		{
+			name: "previous large success, larger " +
+				"amount",
+			capacity:            capacity,
+			successAmount:       largeAmount / 2,
+			failAmount:          capacity,
+			amount:              largeAmount,
+			expectedProbability: 0.998,
+			tolerance:           0.001,
+		},
+		// If we had a failure before, we can't send with the fail
+		// amount.
+		{
+			name:                "previous failure, fail amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			amount:              largeAmount,
+			expectedProbability: 0.0,
+		},
+		// We can't send a higher amount than the fail amount either.
+		{
+			name: "previous failure, larger fail " +
+				"amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			amount:              largeAmount + smallAmount,
+			expectedProbability: 0.0,
+		},
+		// We expect a diminished non-zero probability if we try to send
+		// an amount that's lower than the last fail amount.
+		{
+			name: "previous failure, lower than fail " +
+				"amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			amount:              smallAmount,
+			expectedProbability: 0.368,
+			tolerance:           0.001,
+		},
+		// From here on we deal with mixed previous successes and
+		// failures.
+		// We expect to be always able to send a tiny amount.
+		{
+			name:                "previous f/s, very small amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			successAmount:       smallAmount,
+			amount:              0,
+			expectedProbability: 1.0,
+		},
+		// We expect to be able to send up to the previous success
+		// amount will full certainty.
+		{
+			name:                "previous f/s, success amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			successAmount:       smallAmount,
+			amount:              smallAmount,
+			expectedProbability: 1.0,
+		},
+		// This tests a random value between small amount and large
+		// amount.
+		{
+			name:                "previous f/s, between f/s",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			successAmount:       smallAmount,
+			amount:              smallAmount + largeAmount/10,
+			expectedProbability: 0.287,
+			tolerance:           0.001,
+		},
+		// We still can't send the fail amount.
+		{
+			name:                "previous f/s, fail amount",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			successAmount:       smallAmount,
+			amount:              largeAmount,
+			expectedProbability: 0.0,
+		},
+		// Same success and failure amounts (illogical).
+		{
+			name:                "previous f/s, same",
+			capacity:            capacity,
+			failAmount:          largeAmount,
+			successAmount:       largeAmount,
+			amount:              largeAmount,
+			expectedProbability: 0.0,
+		},
+		// Higher success than failure amount (illogical).
+		{
+			name:                "previous f/s, higher success",
+			capacity:            capacity,
+			failAmount:          smallAmount,
+			successAmount:       largeAmount,
+			expectedProbability: 0.0,
+		},
+	}
+
+	estimator := BimodalEstimator{
+		BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
+	}
+
+	for _, test := range tests {
+		test := test
+
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+
+			p, err := estimator.probabilityFormula(
+				test.capacity, test.successAmount,
+				test.failAmount, test.amount,
+			)
+			require.InDelta(t, test.expectedProbability, p,
+				test.tolerance)
+			require.NoError(t, err)
+		})
+	}
+}
+
+// TestIntegral tests certain limits of the probability distribution integral.
+func TestIntegral(t *testing.T) {
+	t.Parallel()
+
+	defaultScale := lnwire.NewMSatFromSatoshis(300_000)
+
+	tests := []struct {
+		name     string
+		capacity float64
+		lower    float64
+		upper    float64
+		scale    lnwire.MilliSatoshi
+		expected float64
+	}{
+		{
+			name:     "all zero",
+			expected: math.NaN(),
+			scale:    defaultScale,
+		},
+		{
+			name:     "all same",
+			capacity: 1,
+			lower:    1,
+			upper:    1,
+			scale:    defaultScale,
+		},
+		{
+			name:     "large numbers, low lower",
+			capacity: 21e17,
+			lower:    0,
+			upper:    21e17,
+			expected: 1,
+			scale:    defaultScale,
+		},
+		{
+			name:     "large numbers, high lower",
+			capacity: 21e17,
+			lower:    21e17,
+			upper:    21e17,
+			scale:    defaultScale,
+		},
+		{
+			name:     "same scale and capacity",
+			capacity: 21e17,
+			lower:    21e17,
+			upper:    21e17,
+			scale:    21e17,
+		},
+	}
+
+	for _, test := range tests {
+		test := test
+
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			estimator := BimodalEstimator{
+				BimodalConfig: BimodalConfig{
+					BimodalScaleMsat: test.scale,
+				},
+			}
+
+			p := estimator.integral(
+				test.capacity, test.lower, test.upper,
+			)
+			require.InDelta(t, test.expected, p, 0.001)
+		})
+	}
+}
+
+// TestCanSend tests that the success amount drops to zero over time.
+func TestCanSend(t *testing.T) {
+	t.Parallel()
+
+	successAmount := lnwire.MilliSatoshi(1_000_000)
+	successTime := time.Unix(1_000, 0)
+	now := time.Unix(2_000, 0)
+	decayTime := time.Duration(1_000) * time.Second
+	infinity := time.Unix(10_000_000_000, 0)
+
+	// Test an immediate retry.
+	require.Equal(t, successAmount, canSend(
+		successAmount, successTime, successTime, decayTime,
+	))
+
+	// Test that after the decay time, the successAmount is 1/e of its
+	// value.
+	decayAmount := lnwire.MilliSatoshi(float64(successAmount) / math.E)
+	require.Equal(t, decayAmount, canSend(
+		successAmount, now, successTime, decayTime,
+	))
+
+	// After a long time, we want the amount to approach 0.
+	require.Equal(t, lnwire.MilliSatoshi(0), canSend(
+		successAmount, infinity, successTime, decayTime,
+	))
+}
+
+// TestCannotSend tests that the fail amount approaches the capacity over time.
+func TestCannotSend(t *testing.T) {
+	t.Parallel()
+
+	failAmount := lnwire.MilliSatoshi(1_000_000)
+	failTime := time.Unix(1_000, 0)
+	now := time.Unix(2_000, 0)
+	decayTime := time.Duration(1_000) * time.Second
+	infinity := time.Unix(10_000_000_000, 0)
+	capacity := lnwire.MilliSatoshi(3_000_000)
+
+	// Test immediate retry.
+	require.EqualValues(t, failAmount, cannotSend(
+		failAmount, capacity, failTime, failTime, decayTime,
+	))
+
+	// After the decay time we want to be between the fail amount and
+	// the capacity.
+	summand := lnwire.MilliSatoshi(float64(capacity-failAmount) / math.E)
+	expected := capacity - summand
+	require.Equal(t, expected, cannotSend(
+		failAmount, capacity, now, failTime, decayTime,
+	))
+
+	// After a long time, we want the amount to approach the capacity.
+	require.Equal(t, capacity, cannotSend(
+		failAmount, capacity, infinity, failTime, decayTime,
+	))
+}
+
+// TestComputeProbability tests the inclusion of previous forwarding results of
+// other channels of the node into the total probability.
+func TestComputeProbability(t *testing.T) {
+	t.Parallel()
+
+	nodeWeight := 1 / 5.
+	toNode := route.Vertex{10}
+	tolerance := 0.01
+	decayTime := time.Duration(1) * time.Hour * 24
+
+	// makeNodeResults prepares forwarding data for the other channels of
+	// the node.
+	makeNodeResults := func(successes []bool, now time.Time) NodeResults {
+		results := make(NodeResults, len(successes))
+
+		for i, s := range successes {
+			vertex := route.Vertex{byte(i)}
+
+			results[vertex] = TimedPairResult{
+				FailTime: now, FailAmt: 1,
+			}
+			if s {
+				results[vertex] = TimedPairResult{
+					SuccessTime: now, SuccessAmt: 1,
+				}
+			}
+		}
+
+		return results
+	}
+
+	tests := []struct {
+		name                string
+		directProbability   float64
+		otherResults        []bool
+		expectedProbability float64
+		delay               time.Duration
+	}{
+		// If no other information is available, use the direct
+		// probability.
+		{
+			name:                "unknown, only direct",
+			directProbability:   0.5,
+			expectedProbability: 0.5,
+		},
+		// If there was a single success, expect increased success
+		// probability.
+		{
+			name:                "unknown, single success",
+			directProbability:   0.5,
+			otherResults:        []bool{true},
+			expectedProbability: 0.583,
+		},
+		// If there were many successes, expect even higher success
+		// probability.
+		{
+			name:              "unknown, many successes",
+			directProbability: 0.5,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			expectedProbability: 0.75,
+		},
+		// If there was a single failure, we expect a slightly decreased
+		// probability.
+		{
+			name:                "unknown, single failure",
+			directProbability:   0.5,
+			otherResults:        []bool{false},
+			expectedProbability: 0.416,
+		},
+		// If there were many failures, we expect a strongly decreased
+		// probability.
+		{
+			name:              "unknown, many failures",
+			directProbability: 0.5,
+			otherResults: []bool{
+				false, false, false, false, false,
+			},
+			expectedProbability: 0.25,
+		},
+		// A success and a failure neutralize themselves.
+		{
+			name:                "unknown, mixed even",
+			directProbability:   0.5,
+			otherResults:        []bool{true, false},
+			expectedProbability: 0.5,
+		},
+		// A mixed result history leads to increase/decrease of the most
+		// experienced successes/failures.
+		{
+			name:              "unknown, mixed uneven",
+			directProbability: 0.5,
+			otherResults: []bool{
+				true, true, false, false, false,
+			},
+			expectedProbability: 0.45,
+		},
+		// Many successes don't elevate the probability above 1.
+		{
+			name:              "success, successes",
+			directProbability: 1.0,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			expectedProbability: 1.0,
+		},
+		// Five failures on a very certain channel will lower its
+		// success probability to the unknown probability.
+		{
+			name:              "success, failures",
+			directProbability: 1.0,
+			otherResults: []bool{
+				false, false, false, false, false,
+			},
+			expectedProbability: 0.5,
+		},
+		// If we are sure that the channel can send, a single failure
+		// will not decrease the outcome significantly.
+		{
+			name:                "success, single failure",
+			directProbability:   1.0,
+			otherResults:        []bool{false},
+			expectedProbability: 0.8333,
+		},
+		{
+			name:              "success, many failures",
+			directProbability: 1.0,
+			otherResults: []bool{
+				false, false, false, false, false, false, false,
+			},
+			expectedProbability: 0.416,
+		},
+		// Failures won't decrease the probability below zero.
+		{
+			name:                "fail, failures",
+			directProbability:   0.0,
+			otherResults:        []bool{false, false, false},
+			expectedProbability: 0.0,
+		},
+		{
+			name:              "fail, successes",
+			directProbability: 0.0,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			expectedProbability: 0.5,
+		},
+		// We test forgetting information with the time decay.
+		// A past success won't alter the certain success probability.
+		{
+			name: "success, single success, decay " +
+				"time",
+			directProbability:   1.0,
+			otherResults:        []bool{true},
+			delay:               decayTime,
+			expectedProbability: 1.00,
+		},
+		// A failure that was experienced some time ago won't influence
+		// as much as a recent one.
+		{
+			name:                "success, single fail, decay time",
+			directProbability:   1.0,
+			otherResults:        []bool{false},
+			delay:               decayTime,
+			expectedProbability: 0.9314,
+		},
+		// Information from a long time ago doesn't have any effect.
+		{
+			name:                "success, single fail, long ago",
+			directProbability:   1.0,
+			otherResults:        []bool{false},
+			delay:               10 * decayTime,
+			expectedProbability: 1.0,
+		},
+		{
+			name:              "fail, successes decay time",
+			directProbability: 0.0,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			delay:               decayTime,
+			expectedProbability: 0.269,
+		},
+		// Very recent info approaches the case with no time decay.
+		{
+			name:              "unknown, successes close",
+			directProbability: 0.5,
+			otherResults: []bool{
+				true, true, true, true, true,
+			},
+			delay:               decayTime / 10,
+			expectedProbability: 0.741,
+		},
+	}
+
+	estimator := BimodalEstimator{
+		BimodalConfig: BimodalConfig{
+			BimodalScaleMsat: scale, BimodalNodeWeight: nodeWeight,
+			BimodalDecayTime: decayTime,
+		},
+	}
+
+	for _, test := range tests {
+		test := test
+
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+
+			then := time.Unix(0, 0)
+			results := makeNodeResults(test.otherResults, then)
+			now := then.Add(test.delay)
+
+			p := estimator.calculateProbability(
+				test.directProbability, now, results, toNode,
+			)
+
+			require.InDelta(t, test.expectedProbability, p,
+				tolerance)
+		})
+	}
+}
+
+// TestLocalPairProbability tests that we reduce probability for failed direct
+// neighbors.
+func TestLocalPairProbability(t *testing.T) {
+	t.Parallel()
+
+	decayTime := time.Hour
+	now := time.Unix(1000000000, 0)
+	toNode := route.Vertex{1}
+
+	createFailedResult := func(timeAgo time.Duration) NodeResults {
+		return NodeResults{
+			toNode: TimedPairResult{
+				FailTime: now.Add(-timeAgo),
+			},
+		}
+	}
+
+	tests := []struct {
+		name                string
+		expectedProbability float64
+		results             NodeResults
+	}{
+		{
+			name:                "no results",
+			expectedProbability: 1.0,
+		},
+		{
+			name:                "recent failure",
+			results:             createFailedResult(0),
+			expectedProbability: 0.0,
+		},
+		{
+			name:                "after decay time",
+			results:             createFailedResult(decayTime),
+			expectedProbability: 1 - 1/math.E,
+		},
+		{
+			name:                "long ago",
+			results:             createFailedResult(10 * decayTime),
+			expectedProbability: 1.0,
+		},
+	}
+
+	estimator := BimodalEstimator{
+		BimodalConfig: BimodalConfig{BimodalDecayTime: decayTime},
+	}
+
+	for _, test := range tests {
+		test := test
+
+		t.Run(test.name, func(t *testing.T) {
+			t.Parallel()
+			p := estimator.LocalPairProbability(
+				now, test.results, toNode,
+			)
+			require.InDelta(t, test.expectedProbability, p, 0.001)
+		})
+	}
+}
+
+// FuzzProbability checks that we don't encounter errors related to NaNs.
+func FuzzProbability(f *testing.F) {
+	estimator := BimodalEstimator{
+		BimodalConfig: BimodalConfig{BimodalScaleMsat: scale},
+	}
+	f.Add(uint64(0), uint64(0), uint64(0), uint64(0))
+
+	f.Fuzz(func(t *testing.T, capacity, successAmt, failAmt, amt uint64) {
+		_, err := estimator.probabilityFormula(
+			lnwire.MilliSatoshi(capacity),
+			lnwire.MilliSatoshi(successAmt),
+			lnwire.MilliSatoshi(failAmt), lnwire.MilliSatoshi(amt),
+		)
+
+		require.NoError(t, err, "c: %v s: %v f: %v a: %v", capacity,
+			successAmt, failAmt, amt)
+	})
+}