routing: move failure interpretation into mission control

2025-07-19 09:32:48 +02:00 · 2019-06-26 09:49:16 +02:00
parent add905d17f
commit 934ea8e78d
4 changed files with 312 additions and 264 deletions
--- a/routing/router.go
+++ b/routing/router.go
@ -174,15 +174,13 @@ type PaymentSessionSource interface {
 // MissionController is an interface that exposes failure reporting and
 // probability estimation.
 type MissionController interface {
-	// ReportEdgeFailure reports a channel level failure.
-	ReportEdgeFailure(failedEdge edge,
-		minPenalizeAmt lnwire.MilliSatoshi)
-
-	// ReportEdgePolicyFailure reports a policy related failure.
-	ReportEdgePolicyFailure(failedEdge edge)
-
-	// ReportVertexFailure reports a node level failure.
-	ReportVertexFailure(v route.Vertex)
+	// ReportPaymentFail reports a failed payment to mission control as
+	// input for future probability estimates. It returns a bool indicating
+	// whether this error is a final error and no further payment attempts
+	// need to be made.
+	ReportPaymentFail(rt *route.Route,
+		failureSourceIdx int, failure lnwire.FailureMessage) (bool,
+		channeldb.FailureReason)

 	// GetEdgeProbability is expected to return the success probability of a
 	// payment from fromNode along edge.
@ -1929,195 +1927,9 @@ func (r *ChannelRouter) processSendError(rt *route.Route, sendErr error) (
 		}
 	}

-	var failureVertex route.Vertex
-
-	// For any non-self failure, look up the source pub key in the hops
-	// slice. Otherwise return the self node pubkey.
-	if failureSourceIdx > 0 {
-		failureVertex = rt.Hops[failureSourceIdx-1].PubKeyBytes
-	} else {
-		failureVertex = r.selfNode.PubKeyBytes
-	}
-	log.Tracef("Node %x (index %v) reported failure when sending htlc",
-		failureVertex, failureSourceIdx)
-
-	// Always determine chan id ourselves, because a channel
-	// update with id may not be available.
-	failedEdge, failedAmt := getFailedEdge(rt, failureSourceIdx)
-
-	switch fErr.FailureMessage.(type) {
-
-	// If the end destination didn't know the payment
-	// hash or we sent the wrong payment amount to the
-	// destination, then we'll terminate immediately.
-	case *lnwire.FailUnknownPaymentHash:
-		// TODO(joostjager): Check onionErr.Amount() whether it matches
-		// what we expect. (Will it ever not match, because if not
-		// final_incorrect_htlc_amount would be returned?)
-
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If we sent the wrong amount to the destination, then
-	// we'll exit early.
-	case *lnwire.FailIncorrectPaymentAmount:
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If the time-lock that was extended to the final node
-	// was incorrect, then we can't proceed.
-	case *lnwire.FailFinalIncorrectCltvExpiry:
-		// TODO(joostjager): Take into account that second last hop may
-		// have deliberately handed out an htlc that expires too soon.
-		// In that case we should continue routing.
-		return true, channeldb.FailureReasonError
-
-	// If we crafted an invalid onion payload for the final
-	// node, then we'll exit early.
-	case *lnwire.FailFinalIncorrectHtlcAmount:
-		// TODO(joostjager): Take into account that second last hop may
-		// have deliberately handed out an htlc with a too low value. In
-		// that case we should continue routing.
-
-		return true, channeldb.FailureReasonError
-
-	// Similarly, if the HTLC expiry that we extended to
-	// the final hop expires too soon, then will fail the
-	// payment.
-	//
-	// TODO(roasbeef): can happen to to race condition, try
-	// again with recent block height
-	case *lnwire.FailFinalExpiryTooSoon:
-		// TODO(joostjager): Take into account that any hop may have
-		// delayed. Ideally we should continue routing. Knowing the
-		// delaying node at this point would help.
-		return true, channeldb.FailureReasonIncorrectPaymentDetails
-
-	// If we erroneously attempted to cross a chain border,
-	// then we'll cancel the payment.
-	case *lnwire.FailInvalidRealm:
-		return true, channeldb.FailureReasonError
-
-	// If we get a notice that the expiry was too soon for
-	// an intermediate node, then we'll prune out the node
-	// that sent us this error, as it doesn't now what the
-	// correct block height is.
-	case *lnwire.FailExpiryTooSoon:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we hit an instance of onion payload corruption or an invalid
-	// version, then we'll exit early as this shouldn't happen in the
-	// typical case.
-	//
-	// TODO(joostjager): Take into account that the previous hop may have
-	// tampered with the onion. Routing should continue using other paths.
-	case *lnwire.FailInvalidOnionVersion:
-		return true, channeldb.FailureReasonError
-	case *lnwire.FailInvalidOnionHmac:
-		return true, channeldb.FailureReasonError
-	case *lnwire.FailInvalidOnionKey:
-		return true, channeldb.FailureReasonError
-
-	// If we get a failure due to violating the minimum
-	// amount, we'll apply the new minimum amount and retry
-	// routing.
-	case *lnwire.FailAmountBelowMinimum:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// If we get a failure due to a fee, we'll apply the
-	// new fee update, and retry our attempt using the
-	// newly updated fees.
-	case *lnwire.FailFeeInsufficient:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// If we get the failure for an intermediate node that
-	// disagrees with our time lock values, then we'll
-	// apply the new delta value and try it once more.
-	case *lnwire.FailIncorrectCltvExpiry:
-		r.cfg.MissionControl.ReportEdgePolicyFailure(failedEdge)
-		return false, 0
-
-	// The outgoing channel that this node was meant to
-	// forward one is currently disabled, so we'll apply
-	// the update and continue.
-	case *lnwire.FailChannelDisabled:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		return false, 0
-
-	// It's likely that the outgoing channel didn't have
-	// sufficient capacity, so we'll prune this edge for
-	// now, and continue onwards with our path finding.
-	case *lnwire.FailTemporaryChannelFailure:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, failedAmt)
-		return false, 0
-
-	// If the send fail due to a node not having the
-	// required features, then we'll note this error and
-	// continue.
-	case *lnwire.FailRequiredNodeFeatureMissing:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If the send fail due to a node not having the
-	// required features, then we'll note this error and
-	// continue.
-	case *lnwire.FailRequiredChannelFeatureMissing:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If the next hop in the route wasn't known or
-	// offline, we'll only the channel which we attempted
-	// to route over. This is conservative, and it can
-	// handle faulty channels between nodes properly.
-	// Additionally, this guards against routing nodes
-	// returning errors in order to attempt to black list
-	// another node.
-	case *lnwire.FailUnknownNextPeer:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		return false, 0
-
-	// If the node wasn't able to forward for which ever
-	// reason, then we'll note this and continue with the
-	// routes.
-	case *lnwire.FailTemporaryNodeFailure:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	case *lnwire.FailPermanentNodeFailure:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we crafted a route that contains a too long time
-	// lock for an intermediate node, we'll prune the node.
-	// As there currently is no way of knowing that node's
-	// maximum acceptable cltv, we cannot take this
-	// constraint into account during routing.
-	//
-	// TODO(joostjager): Record the rejected cltv and use
-	// that as a hint during future path finding through
-	// that node.
-	case *lnwire.FailExpiryTooFar:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-
-	// If we get a permanent channel or node failure, then
-	// we'll prune the channel in both directions and
-	// continue with the rest of the routes.
-	case *lnwire.FailPermanentChannelFailure:
-		r.cfg.MissionControl.ReportEdgeFailure(failedEdge, 0)
-		r.cfg.MissionControl.ReportEdgeFailure(edge{
-			from:    failedEdge.to,
-			to:      failedEdge.from,
-			channel: failedEdge.channel,
-		}, 0)
-		return false, 0
-
-	// Any other failure or an empty failure will get the node pruned.
-	default:
-		r.cfg.MissionControl.ReportVertexFailure(failureVertex)
-		return false, 0
-	}
+	return r.cfg.MissionControl.ReportPaymentFail(
+		rt, failureSourceIdx, failureMessage,
+	)
 }

 // extractChannelUpdate examines the error and extracts the channel update.
@ -2143,46 +1955,6 @@ func (r *ChannelRouter) extractChannelUpdate(
 	return update
 }

-// getFailedEdge tries to locate the failing channel given a route and the
-// pubkey of the node that sent the failure. It will assume that the failure is
-// associated with the outgoing channel of the failing node. As a second result,
-// it returns the amount sent over the edge.
-func getFailedEdge(route *route.Route, failureSource int) (edge,
-	lnwire.MilliSatoshi) {
-
-	// Determine if we have a failure from the final hop. If it is, we
-	// assume that the failing channel is the incoming channel. In this
-	// function the outgoing channel of the hop indicated by failureSource
-	// is returned, where index zero is the self node. By decrementing
-	// failureSource by one, the outgoing channel of the penultimate hop is
-	// returned, which is the same as the incoming channel of the final
-	// node.
-	//
-	// TODO(joostjager): In this case, certain types of failures are not
-	// expected. For example FailUnknownNextPeer. This could be a reason to
-	// prune the node?
-	if failureSource == len(route.Hops) {
-		failureSource--
-	}
-
-	// As this failure indicates that the target channel was unable to carry
-	// this HTLC (for w/e reason), we'll return the _outgoing_ channel that
-	// the source of the failure was meant to pass the HTLC along to.
-	if failureSource == 0 {
-		return edge{
-			from:    route.SourcePubKey,
-			to:      route.Hops[0].PubKeyBytes,
-			channel: route.Hops[0].ChannelID,
-		}, route.TotalAmount
-	}
-
-	return edge{
-		from:    route.Hops[failureSource-1].PubKeyBytes,
-		to:      route.Hops[failureSource].PubKeyBytes,
-		channel: route.Hops[failureSource].ChannelID,
-	}, route.Hops[failureSource-1].AmtToForward
-}
-
 // applyChannelUpdate validates a channel update and if valid, applies it to the
 // database. It returns a bool indicating whether the updates was successful.
 func (r *ChannelRouter) applyChannelUpdate(msg *lnwire.ChannelUpdate,