Files
lnd/routing/probability_bimodal.go
bitromortac 07f863a2a5 routing: refine amount scaling
Mission control may have outdated success/failure amounts for node pairs
that have channels with differing capacities. In that case we assume to
still find the liquidity as before and rescale the amounts to the
according range.
2025-05-02 10:30:28 +02:00

599 lines
21 KiB
Go

package routing
import (
"fmt"
"math"
"time"
"github.com/btcsuite/btcd/btcutil"
"github.com/go-errors/errors"
"github.com/lightningnetwork/lnd/lnwire"
"github.com/lightningnetwork/lnd/routing/route"
)
const (
// DefaultBimodalScaleMsat is the default value for BimodalScaleMsat in
// BimodalConfig. It describes the distribution of funds in the LN based
// on empirical findings. We assume an unbalanced network by default.
DefaultBimodalScaleMsat = lnwire.MilliSatoshi(300_000_000)
// DefaultBimodalNodeWeight is the default value for the
// BimodalNodeWeight in BimodalConfig. It is chosen such that past
// forwardings on other channels of a router are only slightly taken
// into account.
DefaultBimodalNodeWeight = 0.2
// DefaultBimodalDecayTime is the default value for BimodalDecayTime.
// We will forget about previous learnings about channel liquidity on
// the timescale of about a week.
DefaultBimodalDecayTime = 7 * 24 * time.Hour
// BimodalScaleMsatMax is the maximum value for BimodalScaleMsat. We
// limit it here to the fakeHopHintCapacity to avoid issues with hop
// hint probability calculations.
BimodalScaleMsatMax = lnwire.MilliSatoshi(
1000 * fakeHopHintCapacity / 4,
)
// BimodalEstimatorName is used to identify the bimodal estimator.
BimodalEstimatorName = "bimodal"
)
var (
// ErrInvalidScale is returned when we get a scale below or equal zero.
ErrInvalidScale = errors.New("scale must be >= 0 and sane")
// ErrInvalidNodeWeight is returned when we get a node weight that is
// out of range.
ErrInvalidNodeWeight = errors.New("node weight must be in [0, 1]")
// ErrInvalidDecayTime is returned when we get a decay time below zero.
ErrInvalidDecayTime = errors.New("decay time must be larger than zero")
// ErrZeroCapacity is returned when we encounter a channel with zero
// capacity in probability estimation.
ErrZeroCapacity = errors.New("capacity must be larger than zero")
)
// BimodalConfig contains configuration for our probability estimator.
type BimodalConfig struct {
// BimodalNodeWeight defines how strongly other previous forwardings on
// channels of a router should be taken into account when computing a
// channel's probability to route. The allowed values are in the range
// [0, 1], where a value of 0 means that only direct information about a
// channel is taken into account.
BimodalNodeWeight float64
// BimodalScaleMsat describes the scale over which channels
// statistically have some liquidity left. The value determines how
// quickly the bimodal distribution drops off from the edges of a
// channel. A larger value (compared to typical channel capacities)
// means that the drop off is slow and that channel balances are
// distributed more uniformly. A small value leads to the assumption of
// very unbalanced channels.
BimodalScaleMsat lnwire.MilliSatoshi
// BimodalDecayTime is the scale for the exponential information decay
// over time for previous successes or failures.
BimodalDecayTime time.Duration
}
// validate checks the configuration of the estimator for allowed values.
func (p BimodalConfig) validate() error {
if p.BimodalDecayTime <= 0 {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidDecayTime)
}
if p.BimodalNodeWeight < 0 || p.BimodalNodeWeight > 1 {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidNodeWeight)
}
if p.BimodalScaleMsat == 0 || p.BimodalScaleMsat > BimodalScaleMsatMax {
return fmt.Errorf("%v: %w", BimodalEstimatorName,
ErrInvalidScale)
}
return nil
}
// DefaultBimodalConfig returns the default configuration for the estimator.
func DefaultBimodalConfig() BimodalConfig {
return BimodalConfig{
BimodalNodeWeight: DefaultBimodalNodeWeight,
BimodalScaleMsat: DefaultBimodalScaleMsat,
BimodalDecayTime: DefaultBimodalDecayTime,
}
}
// BimodalEstimator returns node and pair probabilities based on historical
// payment results based on a liquidity distribution model of the LN. The main
// function is to estimate the direct channel probability based on a depleted
// liquidity distribution model, with additional information decay over time. A
// per-node probability can be mixed with the direct probability, taking into
// account successes/failures on other channels of the forwarder.
type BimodalEstimator struct {
// BimodalConfig contains configuration options for our estimator.
BimodalConfig
}
// NewBimodalEstimator creates a new BimodalEstimator.
func NewBimodalEstimator(cfg BimodalConfig) (*BimodalEstimator, error) {
if err := cfg.validate(); err != nil {
return nil, err
}
return &BimodalEstimator{
BimodalConfig: cfg,
}, nil
}
// Compile-time checks that interfaces are implemented.
var _ Estimator = (*BimodalEstimator)(nil)
var _ estimatorConfig = (*BimodalConfig)(nil)
// config returns the current configuration of the estimator.
func (p *BimodalEstimator) Config() estimatorConfig {
return p.BimodalConfig
}
// String returns the estimator's configuration as a string representation.
func (p *BimodalEstimator) String() string {
return fmt.Sprintf("estimator type: %v, decay time: %v, liquidity "+
"scale: %v, node weight: %v", BimodalEstimatorName,
p.BimodalDecayTime, p.BimodalScaleMsat, p.BimodalNodeWeight)
}
// PairProbability estimates the probability of successfully traversing to
// toNode based on historical payment outcomes for the from node. Those outcomes
// are passed in via the results parameter.
func (p *BimodalEstimator) PairProbability(now time.Time,
results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
capacity btcutil.Amount) float64 {
// We first compute the probability for the desired hop taking into
// account previous knowledge.
directProbability := p.directProbability(
now, results, toNode, amt, lnwire.NewMSatFromSatoshis(capacity),
)
// The final probability is computed by taking into account other
// channels of the from node.
return p.calculateProbability(directProbability, now, results, toNode)
}
// LocalPairProbability computes the probability to reach toNode given a set of
// previous learnings.
func (p *BimodalEstimator) LocalPairProbability(now time.Time,
results NodeResults, toNode route.Vertex) float64 {
// For direct local probabilities we assume to know exactly how much we
// can send over a channel, which assumes that channels are active and
// have enough liquidity.
directProbability := 1.0
// If we had an unexpected failure for this node, we reduce the
// probability for some time to avoid infinite retries.
result, ok := results[toNode]
if ok && !result.FailTime.IsZero() {
timeAgo := now.Sub(result.FailTime)
// We only expect results in the past to get a probability
// between 0 and 1.
if timeAgo < 0 {
timeAgo = 0
}
exponent := -float64(timeAgo) / float64(p.BimodalDecayTime)
directProbability -= math.Exp(exponent)
}
return directProbability
}
// directProbability computes the probability to reach a node based on the
// liquidity distribution in the LN.
func (p *BimodalEstimator) directProbability(now time.Time,
results NodeResults, toNode route.Vertex, amt lnwire.MilliSatoshi,
capacity lnwire.MilliSatoshi) float64 {
// We first determine the time-adjusted success and failure amounts to
// then compute a probability. We know that we can send a zero amount.
successAmount := lnwire.MilliSatoshi(0)
// We know that we cannot send the full capacity.
failAmount := capacity
// If we have information about past successes or failures, we modify
// them with a time decay.
result, ok := results[toNode]
if ok {
// Apply a time decay for the amount we cannot send.
if !result.FailTime.IsZero() {
failAmount = cannotSend(
result.FailAmt, capacity, now, result.FailTime,
p.BimodalDecayTime,
)
}
// Apply a time decay for the amount we can send.
if !result.SuccessTime.IsZero() {
successAmount = canSend(
result.SuccessAmt, now, result.SuccessTime,
p.BimodalDecayTime,
)
}
}
// Compute the direct channel probability.
probability, err := p.probabilityFormula(
capacity, successAmount, failAmount, amt,
)
if err != nil {
log.Errorf("error computing probability to node: %v "+
"(node: %v, results: %v, amt: %v, capacity: %v)",
err, toNode, results, amt, capacity)
return 0.0
}
return probability
}
// calculateProbability computes the total hop probability combining the channel
// probability and historic forwarding data of other channels of the node we try
// to send from.
//
// Goals:
// * We want to incentivize good routing nodes: the more routable channels a
// node has, the more we want to incentivize (vice versa for failures).
// -> We reduce/increase the direct probability depending on past
// failures/successes for other channels of the node.
//
// * We want to be forgiving/give other nodes a chance as well: we want to
// forget about (non-)routable channels over time.
// -> We weight the successes/failures with a time decay such that they will not
// influence the total probability if a long time went by.
//
// * If we don't have other info, we want to solely rely on the direct
// probability.
//
// * We want to be able to specify how important the other channels are compared
// to the direct channel.
// -> Introduce a node weight factor that weights the direct probability against
// the node-wide average. The larger the node weight, the more important other
// channels of the node are.
//
// How do failures on low fee nodes redirect routing to higher fee nodes?
// Assumptions:
// * attemptCostPPM of 1000 PPM
// * constant direct channel probability of P0 (usually 0.5 for large amounts)
// * node weight w of 0.2
//
// The question we want to answer is:
// How often would a zero-fee node be tried (even if there were failures for its
// other channels) over trying a high-fee node with 2000 PPM and no direct
// knowledge about the channel to send over?
//
// The probability of a route of length l is P(l) = l * P0.
//
// The total probability after n failures (with the implemented method here) is:
// P(l, n) = P(l-1) * P(n)
// = P(l-1) * (P0 + n*0) / (1 + n*w)
// = P(l) / (1 + n*w)
//
// Condition for a high-fee channel to overcome a low fee channel in the
// Dijkstra weight function (only looking at fee and probability PPM terms):
// highFeePPM + attemptCostPPM * 1/P(l) = 0PPM + attemptCostPPM * 1/P(l, n)
// highFeePPM/attemptCostPPM = 1/P(l, n) - 1/P(l) =
// = (1 + n*w)/P(l) - 1/P(l) =
// = n*w/P(l)
//
// Therefore:
// n = (highFeePPM/attemptCostPPM) * (P(l)/w) =
// = (2000/1000) * 0.5 * l / w = l/w
//
// For a one-hop route we get:
// n = 1/0.2 = 5 tolerated failures
//
// For a three-hop route we get:
// n = 3/0.2 = 15 tolerated failures
//
// For more details on the behavior see tests.
func (p *BimodalEstimator) calculateProbability(directProbability float64,
now time.Time, results NodeResults, toNode route.Vertex) float64 {
// If we don't take other channels into account, we can return early.
if p.BimodalNodeWeight == 0.0 {
return directProbability
}
// If we have up-to-date information about the channel we want to use,
// i.e. the info stems from results not longer ago than the decay time,
// we will only use the direct probability. This is needed in order to
// avoid that other previous results (on all other channels of the same
// routing node) will distort and pin the calculated probability even if
// we have accurate direct information. This helps to dip the
// probability below the min probability in case of failures, to start
// the splitting process.
directResult, ok := results[toNode]
if ok {
latest := directResult.SuccessTime
if directResult.FailTime.After(latest) {
latest = directResult.FailTime
}
// We use BimonodalDecayTime to judge the currentness of the
// data. It is the time scale on which we assume to have lost
// information.
if now.Sub(latest) < p.BimodalDecayTime {
log.Tracef("Using direct probability for node %v: %v",
toNode, directResult)
return directProbability
}
}
// w is a parameter which determines how strongly the other channels of
// a node should be incorporated, the higher the stronger.
w := p.BimodalNodeWeight
// dt determines the timeliness of the previous successes/failures
// to be taken into account.
dt := float64(p.BimodalDecayTime)
// The direct channel probability is weighted fully, all other results
// are weighted according to how recent the information is.
totalProbabilities := directProbability
totalWeights := 1.0
for peer, result := range results {
// We don't include the direct hop probability here because it
// is already included in totalProbabilities.
if peer == toNode {
continue
}
// We add probabilities weighted by how recent the info is.
var weight float64
if result.SuccessAmt > 0 {
exponent := -float64(now.Sub(result.SuccessTime)) / dt
weight = math.Exp(exponent)
totalProbabilities += w * weight
totalWeights += w * weight
}
if result.FailAmt > 0 {
exponent := -float64(now.Sub(result.FailTime)) / dt
weight = math.Exp(exponent)
// Failures don't add to total success probability.
totalWeights += w * weight
}
}
return totalProbabilities / totalWeights
}
// canSend returns the sendable amount over the channel, respecting time decay.
// canSend approaches zero, if we wait for a much longer time than the decay
// time.
func canSend(successAmount lnwire.MilliSatoshi, now, successTime time.Time,
decayConstant time.Duration) lnwire.MilliSatoshi {
// The factor approaches 0 for successTime a long time in the past,
// is 1 when the successTime is now.
factor := math.Exp(
-float64(now.Sub(successTime)) / float64(decayConstant),
)
canSend := factor * float64(successAmount)
return lnwire.MilliSatoshi(canSend)
}
// cannotSend returns the not sendable amount over the channel, respecting time
// decay. cannotSend approaches the capacity, if we wait for a much longer time
// than the decay time.
func cannotSend(failAmount, capacity lnwire.MilliSatoshi, now,
failTime time.Time, decayConstant time.Duration) lnwire.MilliSatoshi {
if failAmount > capacity {
failAmount = capacity
}
// The factor approaches 0 for failTime a long time in the past and it
// is 1 when the failTime is now.
factor := math.Exp(
-float64(now.Sub(failTime)) / float64(decayConstant),
)
cannotSend := capacity - lnwire.MilliSatoshi(
factor*float64(capacity-failAmount),
)
return cannotSend
}
// primitive computes the indefinite integral of our assumed (normalized)
// liquidity probability distribution. The distribution of liquidity x here is
// the function P(x) ~ exp(-x/s) + exp((x-c)/s) + 1/c, i.e., two exponentials
// residing at the ends of channels. This means that we expect liquidity to be
// at either side of the channel with capacity c. The s parameter (scale)
// defines how far the liquidity leaks into the channel. A very low scale
// assumes completely unbalanced channels, a very high scale assumes a random
// distribution. More details can be found in
// https://github.com/lightningnetwork/lnd/issues/5988#issuecomment-1131234858.
// Additionally, we add a constant term 1/c to the distribution to avoid
// normalization issues and to fall back to a uniform distribution should the
// previous success and fail amounts contradict a bimodal distribution.
func (p *BimodalEstimator) primitive(c, x float64) float64 {
s := float64(p.BimodalScaleMsat)
// The indefinite integral of P(x) is given by
// Int P(x) dx = H(x) = s * (-e(-x/s) + e((x-c)/s) + x/(c*s)),
// and its norm from 0 to c can be computed from it,
// norm = [H(x)]_0^c = s * (-e(-c/s) + 1 + 1/s -(-1 + e(-c/s))) =
// = s * (-2*e(-c/s) + 2 + 1/s).
// The prefactors s are left out, as they cancel out in the end.
// norm can only become zero, if c is zero, which we sorted out before
// calling this method.
ecs := math.Exp(-c / s)
norm := -2*ecs + 2 + 1/s
// It would be possible to split the next term and reuse the factors
// from before, but this can lead to numerical issues with large
// numbers.
excs := math.Exp((x - c) / s)
exs := math.Exp(-x / s)
// We end up with the primitive function of the normalized P(x).
return (-exs + excs + x/(c*s)) / norm
}
// integral computes the integral of our liquidity distribution from the lower
// to the upper value.
func (p *BimodalEstimator) integral(capacity, lower, upper float64) float64 {
if lower < 0 || lower > upper {
log.Errorf("probability integral limits nonsensical: capacity:"+
"%v lower: %v upper: %v", capacity, lower, upper)
return 0.0
}
return p.primitive(capacity, upper) - p.primitive(capacity, lower)
}
// probabilityFormula computes the expected probability for a payment of
// amountMsat given prior learnings for a channel of certain capacity.
// successAmountMsat and failAmountMsat stand for the unsettled success and
// failure amounts, respectively. The formula is derived using the formalism
// presented in Pickhardt et al., https://arxiv.org/abs/2103.08576.
func (p *BimodalEstimator) probabilityFormula(capacityMsat, successAmountMsat,
failAmountMsat, amountMsat lnwire.MilliSatoshi) (float64, error) {
// Convert to positive-valued floats.
capacity := float64(capacityMsat)
successAmount := float64(successAmountMsat)
failAmount := float64(failAmountMsat)
amount := float64(amountMsat)
// In order for this formula to give reasonable results, we need to have
// an estimate of the capacity of a channel (or edge between nodes).
if capacity == 0.0 {
return 0, ErrZeroCapacity
}
// We cannot send more than the capacity.
if amount > capacity {
return 0.0, nil
}
// The next statement is a safety check against an illogical condition.
// We discard the knowledge for the channel in that case since we have
// inconsistent data.
if failAmount <= successAmount {
log.Warnf("Fail amount (%s) is smaller than or equal to the "+
"success amount (%s) for capacity (%s)",
failAmountMsat, successAmountMsat, capacityMsat)
successAmount = 0
failAmount = capacity
}
// Mission control may have some outdated values with regard to the
// current channel capacity between a node pair. This can happen in case
// a large parallel channel was closed or if a channel was downscaled
// and can lead to success and/or failure amounts to be out of the range
// [0, capacity]. We assume that the liquidity situation of the channel
// is similar as before due to flow bias.
// In case we have a large success we need to correct it to be in the
// valid range. We set the success amount close to the capacity, because
// we assume to still be able to send. Any possible failure (that must
// in this case be larger than the capacity) is corrected as well.
if successAmount >= capacity {
log.Debugf("Correcting success amount %s and failure amount "+
"%s to capacity %s", successAmountMsat,
failAmount, capacityMsat)
// We choose the success amount to be one less than the
// capacity, to both fit success and failure amounts into the
// capacity range in a consistent manner.
successAmount = capacity - 1
failAmount = capacity
}
// Having no or only a small success, but a large failure only needs
// adjustment of the failure amount.
if failAmount > capacity {
log.Debugf("Correcting failure amount %s to capacity %s",
failAmountMsat, capacityMsat)
failAmount = capacity
}
// We cannot send more than the fail amount.
if amount >= failAmount {
return 0.0, nil
}
// We can send the amount if it is smaller than the success amount.
if amount <= successAmount {
return 1.0, nil
}
// The success probability for payment amount a is the integral over the
// prior distribution P(x), the probability to find liquidity between
// the amount a and channel capacity c (or failAmount a_f):
// P(X >= a | X < a_f) = Integral_{a}^{a_f} P(x) dx
prob := p.integral(capacity, amount, failAmount)
if math.IsNaN(prob) {
return 0.0, fmt.Errorf("non-normalized probability is NaN, "+
"capacity: %v, amount: %v, fail amount: %v",
capacity, amount, failAmount)
}
// If we have payment information, we need to adjust the prior
// distribution P(x) and get the posterior distribution by renormalizing
// the prior distribution in such a way that the probability mass lies
// between a_s and a_f.
reNorm := p.integral(capacity, successAmount, failAmount)
if math.IsNaN(reNorm) {
return 0.0, fmt.Errorf("normalization factor is NaN, "+
"capacity: %v, success amount: %v, fail amount: %v",
capacity, successAmount, failAmount)
}
// The normalization factor can only be zero if the success amount is
// equal or larger than the fail amount. This should not happen as we
// have checked this scenario above.
if reNorm == 0.0 {
return 0.0, fmt.Errorf("normalization factor is zero, "+
"capacity: %v, success amount: %v, fail amount: %v",
capacity, successAmount, failAmount)
}
prob /= reNorm
// Note that for payment amounts smaller than successAmount, we can get
// a value larger than unity, which we cap here to get a proper
// probability.
if prob > 1.0 {
if amount > successAmount {
return 0.0, fmt.Errorf("unexpected large probability "+
"(%v) capacity: %v, amount: %v, success "+
"amount: %v, fail amount: %v", prob, capacity,
amount, successAmount, failAmount)
}
return 1.0, nil
} else if prob < 0.0 {
return 0.0, fmt.Errorf("negative probability "+
"(%v) capacity: %v, amount: %v, success "+
"amount: %v, fail amount: %v", prob, capacity,
amount, successAmount, failAmount)
}
return prob, nil
}