routing: isolate failure interpretation from mission control

This commit moves the payment outcome interpretation logic into a separate file. Also, mission control isn't updated directly anymore, but results are stored in an interpretedResult struct. This allows the mission control state to be locked for a minimum amount of time and makes it easier to unit test the result interpretation.
2025-08-28 22:50:58 +02:00 · 2019-08-05 12:29:16 +02:00
parent e7af6a077a
commit 45dacd0df1
3 changed files with 407 additions and 286 deletions
--- a/routing/result_interpretation.go
+++ b/routing/result_interpretation.go
@@ -0,0 +1,275 @@
+package routing
+
+import (
+	"github.com/lightningnetwork/lnd/channeldb"
+
+	"github.com/lightningnetwork/lnd/lnwire"
+	"github.com/lightningnetwork/lnd/routing/route"
+)
+
+// interpretedResult contains the result of the interpretation of a payment
+// outcome.
+type interpretedResult struct {
+	// nodeFailure points to a node pubkey if all channels of that node are
+	// responsible for the result.
+	nodeFailure *route.Vertex
+
+	// pairResults contains a map of node pairs that could be responsible
+	// for the failure. The map values are the minimum amounts for which a
+	// future penalty should be applied.
+	pairResults map[DirectedNodePair]lnwire.MilliSatoshi
+
+	// finalFailureReason is set to a non-nil value if it makes no more
+	// sense to start another payment attempt. It will contain the reason
+	// why.
+	finalFailureReason *channeldb.FailureReason
+
+	// policyFailure is set to a node pair if there is a policy failure on
+	// that connection. This is used to control the second chance logic for
+	// policy failures.
+	policyFailure *DirectedNodePair
+}
+
+// interpretResult interprets a payment outcome and returns an object that
+// contains information required to update mission control.
+func interpretResult(rt *route.Route, failureSrcIdx *int,
+	failure lnwire.FailureMessage) *interpretedResult {
+
+	i := &interpretedResult{
+		pairResults: make(map[DirectedNodePair]lnwire.MilliSatoshi),
+	}
+
+	final, reason := i.processFail(rt, failureSrcIdx, failure)
+	if final {
+		i.finalFailureReason = &reason
+	}
+
+	return i
+}
+
+// processFail processes a failed payment attempt.
+func (i *interpretedResult) processFail(
+	rt *route.Route, errSourceIdx *int,
+	failure lnwire.FailureMessage) (bool, channeldb.FailureReason) {
+
+	var failureSourceIdxInt int
+
+	if errSourceIdx == nil {
+		// If the failure message could not be decrypted, attribute the
+		// failure to our own outgoing channel.
+		//
+		// TODO(joostager): Penalize all channels in the route.
+		failureSourceIdxInt = 0
+		failure = lnwire.NewTemporaryChannelFailure(nil)
+	} else {
+		failureSourceIdxInt = *errSourceIdx
+	}
+
+	var failureVertex route.Vertex
+
+	if failureSourceIdxInt > 0 {
+		failureVertex = rt.Hops[failureSourceIdxInt-1].PubKeyBytes
+	} else {
+		failureVertex = rt.SourcePubKey
+	}
+	log.Tracef("Node %x (index %v) reported failure when sending htlc",
+		failureVertex, errSourceIdx)
+
+	// Always determine chan id ourselves, because a channel update with id
+	// may not be available.
+	failedPair, failedAmt := getFailedPair(
+		rt, failureSourceIdxInt,
+	)
+
+	switch failure.(type) {
+
+	// If the end destination didn't know the payment hash or we sent the
+	// wrong payment amount to the destination, then we'll terminate
+	// immediately.
+	case *lnwire.FailIncorrectDetails:
+		// TODO(joostjager): Check onionErr.Amount() whether it matches
+		// what we expect. (Will it ever not match, because if not
+		// final_incorrect_htlc_amount would be returned?)
+
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If we sent the wrong amount to the destination, then we'll exit
+	// early.
+	case *lnwire.FailIncorrectPaymentAmount:
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If the time-lock that was extended to the final node was incorrect,
+	// then we can't proceed.
+	case *lnwire.FailFinalIncorrectCltvExpiry:
+		// TODO(joostjager): Take into account that second last hop may
+		// have deliberately handed out an htlc that expires too soon.
+		// In that case we should continue routing.
+		return true, channeldb.FailureReasonError
+
+	// If we crafted an invalid onion payload for the final node, then we'll
+	// exit early.
+	case *lnwire.FailFinalIncorrectHtlcAmount:
+		// TODO(joostjager): Take into account that second last hop may
+		// have deliberately handed out an htlc with a too low value. In
+		// that case we should continue routing.
+		return true, channeldb.FailureReasonError
+
+	// Similarly, if the HTLC expiry that we extended to the final hop
+	// expires too soon, then will fail the payment.
+	//
+	// TODO(roasbeef): can happen to to race condition, try again with
+	// recent block height
+	case *lnwire.FailFinalExpiryTooSoon:
+		// TODO(joostjager): Take into account that any hop may have
+		// delayed. Ideally we should continue routing. Knowing the
+		// delaying node at this point would help.
+		return true, channeldb.FailureReasonIncorrectPaymentDetails
+
+	// If we erroneously attempted to cross a chain border, then we'll
+	// cancel the payment.
+	case *lnwire.FailInvalidRealm:
+		return true, channeldb.FailureReasonError
+
+	// If we get a notice that the expiry was too soon for an intermediate
+	// node, then we'll prune out the node that sent us this error, as it
+	// doesn't now what the correct block height is.
+	case *lnwire.FailExpiryTooSoon:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	// If we hit an instance of onion payload corruption or an invalid
+	// version, then we'll exit early as this shouldn't happen in the
+	// typical case.
+	//
+	// TODO(joostjager): Take into account that the previous hop may have
+	// tampered with the onion. Routing should continue using other paths.
+	case *lnwire.FailInvalidOnionVersion:
+		return true, channeldb.FailureReasonError
+	case *lnwire.FailInvalidOnionHmac:
+		return true, channeldb.FailureReasonError
+	case *lnwire.FailInvalidOnionKey:
+		return true, channeldb.FailureReasonError
+
+	// If we get a failure due to violating the minimum amount, we'll apply
+	// the new minimum amount and retry routing.
+	case *lnwire.FailAmountBelowMinimum:
+		i.policyFailure = &failedPair
+		i.pairResults[failedPair] = 0
+		return false, 0
+
+	// If we get a failure due to a fee, we'll apply the new fee update, and
+	// retry our attempt using the newly updated fees.
+	case *lnwire.FailFeeInsufficient:
+		i.policyFailure = &failedPair
+		i.pairResults[failedPair] = 0
+		return false, 0
+
+	// If we get the failure for an intermediate node that disagrees with
+	// our time lock values, then we'll apply the new delta value and try it
+	// once more.
+	case *lnwire.FailIncorrectCltvExpiry:
+		i.policyFailure = &failedPair
+		i.pairResults[failedPair] = 0
+		return false, 0
+
+	// The outgoing channel that this node was meant to forward one is
+	// currently disabled, so we'll apply the update and continue.
+	case *lnwire.FailChannelDisabled:
+		i.pairResults[failedPair] = 0
+		return false, 0
+
+	// It's likely that the outgoing channel didn't have sufficient
+	// capacity, so we'll prune this edge for now, and continue onwards with
+	// our path finding.
+	case *lnwire.FailTemporaryChannelFailure:
+		i.pairResults[failedPair] = failedAmt
+		return false, 0
+
+	// If the send fail due to a node not having the required features, then
+	// we'll note this error and continue.
+	case *lnwire.FailRequiredNodeFeatureMissing:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	// If the send fail due to a node not having the required features, then
+	// we'll note this error and continue.
+	case *lnwire.FailRequiredChannelFeatureMissing:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	// If the next hop in the route wasn't known or offline, we'll only the
+	// channel which we attempted to route over. This is conservative, and
+	// it can handle faulty channels between nodes properly. Additionally,
+	// this guards against routing nodes returning errors in order to
+	// attempt to black list another node.
+	case *lnwire.FailUnknownNextPeer:
+		i.pairResults[failedPair] = 0
+		return false, 0
+
+	// If the node wasn't able to forward for which ever reason, then we'll
+	// note this and continue with the routes.
+	case *lnwire.FailTemporaryNodeFailure:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	case *lnwire.FailPermanentNodeFailure:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	// If we crafted a route that contains a too long time lock for an
+	// intermediate node, we'll prune the node. As there currently is no way
+	// of knowing that node's maximum acceptable cltv, we cannot take this
+	// constraint into account during routing.
+	//
+	// TODO(joostjager): Record the rejected cltv and use that as a hint
+	// during future path finding through that node.
+	case *lnwire.FailExpiryTooFar:
+		i.nodeFailure = &failureVertex
+		return false, 0
+
+	// If we get a permanent channel or node failure, then we'll prune the
+	// channel in both directions and continue with the rest of the routes.
+	case *lnwire.FailPermanentChannelFailure:
+		i.pairResults[failedPair] = 0
+		i.pairResults[failedPair.Reverse()] = 0
+		return false, 0
+
+	// Any other failure or an empty failure will get the node pruned.
+	default:
+		i.nodeFailure = &failureVertex
+		return false, 0
+	}
+}
+
+// getFailedPair tries to locate the failing pair given a route and the pubkey
+// of the node that sent the failure. It will assume that the failure is
+// associated with the outgoing channel set of the failing node. As a second
+// result, it returns the amount sent between the pair.
+func getFailedPair(route *route.Route, failureSource int) (DirectedNodePair,
+	lnwire.MilliSatoshi) {
+
+	// Determine if we have a failure from the final hop. If it is, we
+	// assume that the failing channel is the incoming channel.
+	//
+	// TODO(joostjager): In this case, certain types of failures are not
+	// expected. For example FailUnknownNextPeer. This could be a reason to
+	// prune the node?
+	if failureSource == len(route.Hops) {
+		failureSource--
+	}
+
+	// As this failure indicates that the target channel was unable to carry
+	// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
+	// the source of the failure was meant to pass the HTLC along to.
+	if failureSource == 0 {
+		return NewDirectedNodePair(
+			route.SourcePubKey,
+			route.Hops[0].PubKeyBytes,
+		), route.TotalAmount
+	}
+
+	return NewDirectedNodePair(
+		route.Hops[failureSource-1].PubKeyBytes,
+		route.Hops[failureSource].PubKeyBytes,
+	), route.Hops[failureSource-1].AmtToForward
+}