mirror of
https://github.com/lightningnetwork/lnd.git
synced 2025-06-06 21:20:32 +02:00
discovery+graph: track job set dependencies in ValidationBarrier
This commit does two things: - removes the concept of allow / deny. Having this in place was a minor optimization and removing it makes the solution simpler. - changes the job dependency tracking to track sets of abstact parent jobs rather than individual parent jobs. As a note, the purpose of the ValidationBarrier is that it allows us to launch gossip validation jobs in goroutines while still ensuring that the validation order of these goroutines is adhered to when it comes to validating ChannelAnnouncement _before_ ChannelUpdate and _before_ NodeAnnouncement.
This commit is contained in:
parent
2731d09a0b
commit
6a47a501c3
@ -517,6 +517,9 @@ type AuthenticatedGossiper struct {
|
|||||||
// AuthenticatedGossiper lock.
|
// AuthenticatedGossiper lock.
|
||||||
chanUpdateRateLimiter map[uint64][2]*rate.Limiter
|
chanUpdateRateLimiter map[uint64][2]*rate.Limiter
|
||||||
|
|
||||||
|
// vb is used to enforce job dependency ordering of gossip messages.
|
||||||
|
vb *graph.ValidationBarrier
|
||||||
|
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -542,6 +545,8 @@ func New(cfg Config, selfKeyDesc *keychain.KeyDescriptor) *AuthenticatedGossiper
|
|||||||
banman: newBanman(),
|
banman: newBanman(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gossiper.vb = graph.NewValidationBarrier(1000, gossiper.quit)
|
||||||
|
|
||||||
gossiper.syncMgr = newSyncManager(&SyncManagerCfg{
|
gossiper.syncMgr = newSyncManager(&SyncManagerCfg{
|
||||||
ChainHash: cfg.ChainHash,
|
ChainHash: cfg.ChainHash,
|
||||||
ChanSeries: cfg.ChanSeries,
|
ChanSeries: cfg.ChanSeries,
|
||||||
@ -1409,10 +1414,6 @@ func (d *AuthenticatedGossiper) networkHandler() {
|
|||||||
log.Errorf("Unable to rebroadcast stale announcements: %v", err)
|
log.Errorf("Unable to rebroadcast stale announcements: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// We'll use this validation to ensure that we process jobs in their
|
|
||||||
// dependency order during parallel validation.
|
|
||||||
validationBarrier := graph.NewValidationBarrier(1000, d.quit)
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
// A new policy update has arrived. We'll commit it to the
|
// A new policy update has arrived. We'll commit it to the
|
||||||
@ -1481,11 +1482,17 @@ func (d *AuthenticatedGossiper) networkHandler() {
|
|||||||
// We'll set up any dependent, and wait until a free
|
// We'll set up any dependent, and wait until a free
|
||||||
// slot for this job opens up, this allow us to not
|
// slot for this job opens up, this allow us to not
|
||||||
// have thousands of goroutines active.
|
// have thousands of goroutines active.
|
||||||
validationBarrier.InitJobDependencies(announcement.msg)
|
annJobID, err := d.vb.InitJobDependencies(
|
||||||
|
announcement.msg,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
announcement.err <- err
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
d.wg.Add(1)
|
d.wg.Add(1)
|
||||||
go d.handleNetworkMessages(
|
go d.handleNetworkMessages(
|
||||||
announcement, &announcements, validationBarrier,
|
announcement, &announcements, annJobID,
|
||||||
)
|
)
|
||||||
|
|
||||||
// The trickle timer has ticked, which indicates we should
|
// The trickle timer has ticked, which indicates we should
|
||||||
@ -1536,10 +1543,10 @@ func (d *AuthenticatedGossiper) networkHandler() {
|
|||||||
//
|
//
|
||||||
// NOTE: must be run as a goroutine.
|
// NOTE: must be run as a goroutine.
|
||||||
func (d *AuthenticatedGossiper) handleNetworkMessages(nMsg *networkMsg,
|
func (d *AuthenticatedGossiper) handleNetworkMessages(nMsg *networkMsg,
|
||||||
deDuped *deDupedAnnouncements, vb *graph.ValidationBarrier) {
|
deDuped *deDupedAnnouncements, jobID graph.JobID) {
|
||||||
|
|
||||||
defer d.wg.Done()
|
defer d.wg.Done()
|
||||||
defer vb.CompleteJob()
|
defer d.vb.CompleteJob()
|
||||||
|
|
||||||
// We should only broadcast this message forward if it originated from
|
// We should only broadcast this message forward if it originated from
|
||||||
// us or it wasn't received as part of our initial historical sync.
|
// us or it wasn't received as part of our initial historical sync.
|
||||||
@ -1547,7 +1554,7 @@ func (d *AuthenticatedGossiper) handleNetworkMessages(nMsg *networkMsg,
|
|||||||
|
|
||||||
// If this message has an existing dependency, then we'll wait until
|
// If this message has an existing dependency, then we'll wait until
|
||||||
// that has been fully validated before we proceed.
|
// that has been fully validated before we proceed.
|
||||||
err := vb.WaitForDependants(nMsg.msg)
|
err := d.vb.WaitForParents(jobID, nMsg.msg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("Validating network message %s got err: %v",
|
log.Debugf("Validating network message %s got err: %v",
|
||||||
nMsg.msg.MsgType(), err)
|
nMsg.msg.MsgType(), err)
|
||||||
@ -1577,7 +1584,16 @@ func (d *AuthenticatedGossiper) handleNetworkMessages(nMsg *networkMsg,
|
|||||||
|
|
||||||
// If this message had any dependencies, then we can now signal them to
|
// If this message had any dependencies, then we can now signal them to
|
||||||
// continue.
|
// continue.
|
||||||
vb.SignalDependants(nMsg.msg, allow)
|
err = d.vb.SignalDependents(nMsg.msg, jobID)
|
||||||
|
if err != nil {
|
||||||
|
// Something is wrong if SignalDependents returns an error.
|
||||||
|
log.Errorf("SignalDependents returned error for msg=%v with "+
|
||||||
|
"JobID=%v", spew.Sdump(nMsg.msg), jobID)
|
||||||
|
|
||||||
|
nMsg.err <- err
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// If the announcement was accepted, then add the emitted announcements
|
// If the announcement was accepted, then add the emitted announcements
|
||||||
// to our announce batch to be broadcast once the trickle timer ticks
|
// to our announce batch to be broadcast once the trickle timer ticks
|
||||||
|
@ -3,29 +3,34 @@ package graph
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/go-errors/errors"
|
||||||
|
"github.com/lightningnetwork/lnd/fn/v2"
|
||||||
"github.com/lightningnetwork/lnd/lnwire"
|
"github.com/lightningnetwork/lnd/lnwire"
|
||||||
"github.com/lightningnetwork/lnd/routing/route"
|
"github.com/lightningnetwork/lnd/routing/route"
|
||||||
)
|
)
|
||||||
|
|
||||||
// validationSignals contains two signals which allows the ValidationBarrier to
|
// JobID identifies an active job in the validation barrier. It is large so
|
||||||
// communicate back to the caller whether a dependent should be processed or not
|
// that we don't need to worry about overflows.
|
||||||
// based on whether its parent was successfully validated. Only one of these
|
type JobID uint64
|
||||||
// signals is to be used at a time.
|
|
||||||
type validationSignals struct {
|
|
||||||
// allow is the signal used to allow a dependent to be processed.
|
|
||||||
allow chan struct{}
|
|
||||||
|
|
||||||
// deny is the signal used to prevent a dependent from being processed.
|
// jobInfo stores job dependency info for a set of dependent gossip messages.
|
||||||
deny chan struct{}
|
type jobInfo struct {
|
||||||
|
// activeParentJobIDs is the set of active parent job ids.
|
||||||
|
activeParentJobIDs fn.Set[JobID]
|
||||||
|
|
||||||
|
// activeDependentJobs is the set of active dependent job ids.
|
||||||
|
activeDependentJobs fn.Set[JobID]
|
||||||
}
|
}
|
||||||
|
|
||||||
// ValidationBarrier is a barrier used to ensure proper validation order while
|
// ValidationBarrier is a barrier used to enforce a strict validation order
|
||||||
// concurrently validating new announcements for channel edges, and the
|
// while concurrently validating other updates for channel edges. It uses a set
|
||||||
// attributes of channel edges. It uses this set of maps (protected by this
|
// of maps to track validation dependencies. This is needed in practice because
|
||||||
// mutex) to track validation dependencies. For a given channel our
|
// gossip messages for a given channel may arive in order, but then due to
|
||||||
// dependencies look like this: chanAnn <- chanUp <- nodeAnn. That is we must
|
// scheduling in different goroutines, may be validated in the wrong order.
|
||||||
// validate the item on the left of the arrow before that on the right.
|
// With the ValidationBarrier, the dependent update will wait until the parent
|
||||||
|
// update completes.
|
||||||
type ValidationBarrier struct {
|
type ValidationBarrier struct {
|
||||||
// validationSemaphore is a channel of structs which is used as a
|
// validationSemaphore is a channel of structs which is used as a
|
||||||
// semaphore. Initially we'll fill this with a buffered channel of the
|
// semaphore. Initially we'll fill this with a buffered channel of the
|
||||||
@ -33,23 +38,27 @@ type ValidationBarrier struct {
|
|||||||
// from this channel, then restore the value upon completion.
|
// from this channel, then restore the value upon completion.
|
||||||
validationSemaphore chan struct{}
|
validationSemaphore chan struct{}
|
||||||
|
|
||||||
// chanAnnFinSignal is map that keep track of all the pending
|
// jobInfoMap stores the set of job ids for each channel.
|
||||||
// ChannelAnnouncement like validation job going on. Once the job has
|
// NOTE: This MUST be used with the mutex.
|
||||||
// been completed, the channel will be closed unblocking any
|
// NOTE: This currently stores string representations of
|
||||||
// dependants.
|
// lnwire.ShortChannelID and route.Vertex. Since these are of different
|
||||||
chanAnnFinSignal map[lnwire.ShortChannelID]*validationSignals
|
// lengths, collision cannot occur in their string representations.
|
||||||
|
// N.B.: Check that any new string-converted types don't collide with
|
||||||
|
// existing string-converted types.
|
||||||
|
jobInfoMap map[string]*jobInfo
|
||||||
|
|
||||||
// chanEdgeDependencies tracks any channel edge updates which should
|
// jobDependencies is a mapping from a child's JobID to the set of
|
||||||
// wait until the completion of the ChannelAnnouncement before
|
// parent JobID that it depends on.
|
||||||
// proceeding. This is a dependency, as we can't validate the update
|
// NOTE: This MUST be used with the mutex.
|
||||||
// before we validate the announcement which creates the channel
|
jobDependencies map[JobID]fn.Set[JobID]
|
||||||
// itself.
|
|
||||||
chanEdgeDependencies map[lnwire.ShortChannelID]*validationSignals
|
|
||||||
|
|
||||||
// nodeAnnDependencies tracks any pending NodeAnnouncement validation
|
// childJobChans stores the notification channel that each child job
|
||||||
// jobs which should wait until the completion of the
|
// listens on for parent job completions.
|
||||||
// ChannelAnnouncement before proceeding.
|
// NOTE: This MUST be used with the mutex.
|
||||||
nodeAnnDependencies map[route.Vertex]*validationSignals
|
childJobChans map[JobID]chan struct{}
|
||||||
|
|
||||||
|
// idCtr is an atomic integer that is used to assign JobIDs.
|
||||||
|
idCtr atomic.Uint64
|
||||||
|
|
||||||
quit chan struct{}
|
quit chan struct{}
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
@ -62,9 +71,9 @@ func NewValidationBarrier(numActiveReqs int,
|
|||||||
quitChan chan struct{}) *ValidationBarrier {
|
quitChan chan struct{}) *ValidationBarrier {
|
||||||
|
|
||||||
v := &ValidationBarrier{
|
v := &ValidationBarrier{
|
||||||
chanAnnFinSignal: make(map[lnwire.ShortChannelID]*validationSignals),
|
jobInfoMap: make(map[string]*jobInfo),
|
||||||
chanEdgeDependencies: make(map[lnwire.ShortChannelID]*validationSignals),
|
jobDependencies: make(map[JobID]fn.Set[JobID]),
|
||||||
nodeAnnDependencies: make(map[route.Vertex]*validationSignals),
|
childJobChans: make(map[JobID]chan struct{}),
|
||||||
quit: quitChan,
|
quit: quitChan,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +89,9 @@ func NewValidationBarrier(numActiveReqs int,
|
|||||||
|
|
||||||
// InitJobDependencies will wait for a new job slot to become open, and then
|
// InitJobDependencies will wait for a new job slot to become open, and then
|
||||||
// sets up any dependent signals/trigger for the new job
|
// sets up any dependent signals/trigger for the new job
|
||||||
func (v *ValidationBarrier) InitJobDependencies(job interface{}) {
|
func (v *ValidationBarrier) InitJobDependencies(job interface{}) (JobID,
|
||||||
|
error) {
|
||||||
|
|
||||||
// We'll wait for either a new slot to become open, or for the quit
|
// We'll wait for either a new slot to become open, or for the quit
|
||||||
// channel to be closed.
|
// channel to be closed.
|
||||||
select {
|
select {
|
||||||
@ -91,50 +102,104 @@ func (v *ValidationBarrier) InitJobDependencies(job interface{}) {
|
|||||||
v.Lock()
|
v.Lock()
|
||||||
defer v.Unlock()
|
defer v.Unlock()
|
||||||
|
|
||||||
|
// updateOrCreateJobInfo modifies the set of activeParentJobs for this
|
||||||
|
// annID and updates jobInfoMap.
|
||||||
|
updateOrCreateJobInfo := func(annID string, annJobID JobID) {
|
||||||
|
info, ok := v.jobInfoMap[annID]
|
||||||
|
if ok {
|
||||||
|
// If an entry already exists for annID, then a job
|
||||||
|
// related to it is being validated. Add to the set of
|
||||||
|
// parent job ids. This addition will only affect
|
||||||
|
// _later_, _child_ jobs for the annID.
|
||||||
|
info.activeParentJobIDs.Add(annJobID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// No entry exists for annID, meaning that we should create
|
||||||
|
// one.
|
||||||
|
parentJobSet := fn.NewSet(annJobID)
|
||||||
|
|
||||||
|
info = &jobInfo{
|
||||||
|
activeParentJobIDs: parentJobSet,
|
||||||
|
activeDependentJobs: fn.NewSet[JobID](),
|
||||||
|
}
|
||||||
|
v.jobInfoMap[annID] = info
|
||||||
|
}
|
||||||
|
|
||||||
|
// populateDependencies populates the job dependency mappings (i.e.
|
||||||
|
// which should complete after another) for the (annID, childJobID)
|
||||||
|
// tuple.
|
||||||
|
populateDependencies := func(annID string, childJobID JobID) {
|
||||||
|
// If there is no entry in the jobInfoMap, we don't have to
|
||||||
|
// wait on any parent jobs to finish.
|
||||||
|
info, ok := v.jobInfoMap[annID]
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to see a snapshot of active parent jobs for this
|
||||||
|
// annID that are already registered in activeParentJobIDs. The
|
||||||
|
// child job identified by childJobID can only run after these
|
||||||
|
// parent jobs have run. After grabbing the snapshot, we then
|
||||||
|
// want to persist a slice of these jobs.
|
||||||
|
|
||||||
|
// Create the notification chan that parent jobs will send (or
|
||||||
|
// close) on when they complete.
|
||||||
|
jobChan := make(chan struct{})
|
||||||
|
|
||||||
|
// Add to set of activeDependentJobs for this annID.
|
||||||
|
info.activeDependentJobs.Add(childJobID)
|
||||||
|
|
||||||
|
// Store in childJobChans. The parent jobs will fetch this chan
|
||||||
|
// to notify on. The child job will later fetch this chan to
|
||||||
|
// listen on when WaitForParents is called.
|
||||||
|
v.childJobChans[childJobID] = jobChan
|
||||||
|
|
||||||
|
// Copy over the parent job IDs at this moment for this annID.
|
||||||
|
// This job must be processed AFTER those parent IDs.
|
||||||
|
parentJobs := info.activeParentJobIDs.Copy()
|
||||||
|
|
||||||
|
// Populate the jobDependencies mapping.
|
||||||
|
v.jobDependencies[childJobID] = parentJobs
|
||||||
|
}
|
||||||
|
|
||||||
// Once a slot is open, we'll examine the message of the job, to see if
|
// Once a slot is open, we'll examine the message of the job, to see if
|
||||||
// there need to be any dependent barriers set up.
|
// there need to be any dependent barriers set up.
|
||||||
switch msg := job.(type) {
|
switch msg := job.(type) {
|
||||||
|
|
||||||
// If this is a channel announcement, then we'll need to set up den
|
|
||||||
// tenancies, as we'll need to verify this before we verify any
|
|
||||||
// ChannelUpdates for the same channel, or NodeAnnouncements of nodes
|
|
||||||
// that are involved in this channel. This goes for both the wire
|
|
||||||
// type,s and also the types that we use within the database.
|
|
||||||
case *lnwire.ChannelAnnouncement1:
|
case *lnwire.ChannelAnnouncement1:
|
||||||
|
id := JobID(v.idCtr.Add(1))
|
||||||
|
|
||||||
// We ensure that we only create a new announcement signal iff,
|
updateOrCreateJobInfo(msg.ShortChannelID.String(), id)
|
||||||
// one doesn't already exist, as there may be duplicate
|
updateOrCreateJobInfo(route.Vertex(msg.NodeID1).String(), id)
|
||||||
// announcements. We'll close this signal once the
|
updateOrCreateJobInfo(route.Vertex(msg.NodeID2).String(), id)
|
||||||
// ChannelAnnouncement has been validated. This will result in
|
|
||||||
// all the dependent jobs being unlocked so they can finish
|
|
||||||
// execution themselves.
|
|
||||||
if _, ok := v.chanAnnFinSignal[msg.ShortChannelID]; !ok {
|
|
||||||
// We'll create the channel that we close after we
|
|
||||||
// validate this announcement. All dependants will
|
|
||||||
// point to this same channel, so they'll be unblocked
|
|
||||||
// at the same time.
|
|
||||||
signals := &validationSignals{
|
|
||||||
allow: make(chan struct{}),
|
|
||||||
deny: make(chan struct{}),
|
|
||||||
}
|
|
||||||
|
|
||||||
v.chanAnnFinSignal[msg.ShortChannelID] = signals
|
return id, nil
|
||||||
v.chanEdgeDependencies[msg.ShortChannelID] = signals
|
|
||||||
|
|
||||||
v.nodeAnnDependencies[route.Vertex(msg.NodeID1)] = signals
|
// Populate the dependency mappings for the below child jobs.
|
||||||
v.nodeAnnDependencies[route.Vertex(msg.NodeID2)] = signals
|
|
||||||
}
|
|
||||||
|
|
||||||
// These other types don't have any dependants, so no further
|
|
||||||
// initialization needs to be done beyond just occupying a job slot.
|
|
||||||
case *lnwire.ChannelUpdate1:
|
case *lnwire.ChannelUpdate1:
|
||||||
return
|
childJobID := JobID(v.idCtr.Add(1))
|
||||||
|
populateDependencies(msg.ShortChannelID.String(), childJobID)
|
||||||
|
|
||||||
|
return childJobID, nil
|
||||||
case *lnwire.NodeAnnouncement:
|
case *lnwire.NodeAnnouncement:
|
||||||
// TODO(roasbeef): node ann needs to wait on existing channel updates
|
childJobID := JobID(v.idCtr.Add(1))
|
||||||
return
|
populateDependencies(
|
||||||
|
route.Vertex(msg.NodeID).String(), childJobID,
|
||||||
|
)
|
||||||
|
|
||||||
|
return childJobID, nil
|
||||||
case *lnwire.AnnounceSignatures1:
|
case *lnwire.AnnounceSignatures1:
|
||||||
// TODO(roasbeef): need to wait on chan ann?
|
// TODO(roasbeef): need to wait on chan ann?
|
||||||
return
|
// - We can do the above by calling populateDependencies. For
|
||||||
|
// now, while we evaluate potential side effects, don't do
|
||||||
|
// anything with childJobID and just return it.
|
||||||
|
childJobID := JobID(v.idCtr.Add(1))
|
||||||
|
return childJobID, nil
|
||||||
|
|
||||||
|
default:
|
||||||
|
// An invalid message was passed into InitJobDependencies.
|
||||||
|
// Return an error.
|
||||||
|
return JobID(0), errors.New("invalid message")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,16 +214,21 @@ func (v *ValidationBarrier) CompleteJob() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForDependants will block until any jobs that this job dependants on have
|
// WaitForParents will block until all parent job dependencies have went
|
||||||
// finished executing. This allows us a graceful way to schedule goroutines
|
// through the validation pipeline. This allows us a graceful way to run jobs
|
||||||
// based on any pending uncompleted dependent jobs. If this job doesn't have an
|
// in goroutines and still have strict ordering guarantees. If this job doesn't
|
||||||
// active dependent, then this function will return immediately.
|
// have any parent job dependencies, then this function will return
|
||||||
func (v *ValidationBarrier) WaitForDependants(job interface{}) error {
|
// immediately.
|
||||||
|
func (v *ValidationBarrier) WaitForParents(childJobID JobID,
|
||||||
|
job interface{}) error {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
signals *validationSignals
|
|
||||||
ok bool
|
ok bool
|
||||||
jobDesc string
|
jobDesc string
|
||||||
|
|
||||||
|
parentJobIDs fn.Set[JobID]
|
||||||
|
annID string
|
||||||
|
jobChan chan struct{}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Acquire a lock to read ValidationBarrier.
|
// Acquire a lock to read ValidationBarrier.
|
||||||
@ -168,88 +238,221 @@ func (v *ValidationBarrier) WaitForDependants(job interface{}) error {
|
|||||||
// Any ChannelUpdate or NodeAnnouncement jobs will need to wait on the
|
// Any ChannelUpdate or NodeAnnouncement jobs will need to wait on the
|
||||||
// completion of any active ChannelAnnouncement jobs related to them.
|
// completion of any active ChannelAnnouncement jobs related to them.
|
||||||
case *lnwire.ChannelUpdate1:
|
case *lnwire.ChannelUpdate1:
|
||||||
signals, ok = v.chanEdgeDependencies[msg.ShortChannelID]
|
annID = msg.ShortChannelID.String()
|
||||||
|
|
||||||
|
parentJobIDs, ok = v.jobDependencies[childJobID]
|
||||||
|
if !ok {
|
||||||
|
// If ok is false, it means that this child job never
|
||||||
|
// had any parent jobs to wait on.
|
||||||
|
v.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
jobDesc = fmt.Sprintf("job=lnwire.ChannelUpdate, scid=%v",
|
jobDesc = fmt.Sprintf("job=lnwire.ChannelUpdate, scid=%v",
|
||||||
msg.ShortChannelID.ToUint64())
|
msg.ShortChannelID.ToUint64())
|
||||||
|
|
||||||
case *lnwire.NodeAnnouncement:
|
case *lnwire.NodeAnnouncement:
|
||||||
vertex := route.Vertex(msg.NodeID)
|
annID = route.Vertex(msg.NodeID).String()
|
||||||
signals, ok = v.nodeAnnDependencies[vertex]
|
|
||||||
|
parentJobIDs, ok = v.jobDependencies[childJobID]
|
||||||
|
if !ok {
|
||||||
|
// If ok is false, it means that this child job never
|
||||||
|
// had any parent jobs to wait on.
|
||||||
|
v.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
jobDesc = fmt.Sprintf("job=lnwire.NodeAnnouncement, pub=%s",
|
jobDesc = fmt.Sprintf("job=lnwire.NodeAnnouncement, pub=%s",
|
||||||
vertex)
|
route.Vertex(msg.NodeID))
|
||||||
|
|
||||||
// Other types of jobs can be executed immediately, so we'll just
|
// Other types of jobs can be executed immediately, so we'll just
|
||||||
// return directly.
|
// return directly.
|
||||||
case *lnwire.AnnounceSignatures1:
|
case *lnwire.AnnounceSignatures1:
|
||||||
// TODO(roasbeef): need to wait on chan ann?
|
// TODO(roasbeef): need to wait on chan ann?
|
||||||
|
v.Unlock()
|
||||||
|
return nil
|
||||||
|
|
||||||
case *lnwire.ChannelAnnouncement1:
|
case *lnwire.ChannelAnnouncement1:
|
||||||
|
v.Unlock()
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release the lock once the above read is finished.
|
// Release the lock once the above read is finished.
|
||||||
v.Unlock()
|
v.Unlock()
|
||||||
|
|
||||||
// If it's not ok, it means either the job is not a dependent type, or
|
|
||||||
// it doesn't have a dependency signal. Either way, we can return
|
|
||||||
// early.
|
|
||||||
if !ok {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("Waiting for dependent on %s", jobDesc)
|
log.Debugf("Waiting for dependent on %s", jobDesc)
|
||||||
|
|
||||||
// If we do have an active job, then we'll wait until either the signal
|
v.Lock()
|
||||||
// is closed, or the set of jobs exits.
|
jobChan, ok = v.childJobChans[childJobID]
|
||||||
|
if !ok {
|
||||||
|
v.Unlock()
|
||||||
|
|
||||||
|
// The entry may not exist because this job does not depend on
|
||||||
|
// any parent jobs.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
v.Unlock()
|
||||||
|
|
||||||
|
for {
|
||||||
select {
|
select {
|
||||||
case <-v.quit:
|
case <-v.quit:
|
||||||
return NewErrf(ErrVBarrierShuttingDown,
|
return NewErrf(ErrVBarrierShuttingDown,
|
||||||
"validation barrier shutting down")
|
"validation barrier shutting down")
|
||||||
|
|
||||||
case <-signals.deny:
|
case <-jobChan:
|
||||||
log.Debugf("Signal deny for %s", jobDesc)
|
// Every time this is sent on or if it's closed, a
|
||||||
return NewErrf(ErrParentValidationFailed,
|
// parent job has finished. The parent jobs have to
|
||||||
"parent validation failed")
|
// also potentially close the channel because if all
|
||||||
|
// the parent jobs finish and call SignalDependents
|
||||||
|
// before the goroutine running WaitForParents has a
|
||||||
|
// chance to grab the notification chan from
|
||||||
|
// childJobChans, then the running goroutine will wait
|
||||||
|
// here for a notification forever. By having the last
|
||||||
|
// parent job close the notificiation chan, we avoid
|
||||||
|
// this issue.
|
||||||
|
|
||||||
case <-signals.allow:
|
// Check and see if we have any parent jobs left. If we
|
||||||
log.Tracef("Signal allow for %s", jobDesc)
|
// don't, we can finish up.
|
||||||
|
v.Lock()
|
||||||
|
info, found := v.jobInfoMap[annID]
|
||||||
|
if !found {
|
||||||
|
v.Unlock()
|
||||||
|
|
||||||
|
// No parent job info found, proceed with
|
||||||
|
// validation.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
x := parentJobIDs.Intersect(info.activeParentJobIDs)
|
||||||
|
v.Unlock()
|
||||||
|
if x.IsEmpty() {
|
||||||
|
// The parent jobs have all completed. We can
|
||||||
|
// proceed with validation.
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SignalDependants will allow/deny any jobs that are dependent on this job that
|
// If we've reached this point, we are still waiting on
|
||||||
// they can continue execution. If the job doesn't have any dependants, then
|
// a parent job to complete.
|
||||||
// this function sill exit immediately.
|
}
|
||||||
func (v *ValidationBarrier) SignalDependants(job interface{}, allow bool) {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SignalDependents signals to any child jobs that this parent job has
|
||||||
|
// finished.
|
||||||
|
func (v *ValidationBarrier) SignalDependents(job interface{}, id JobID) error {
|
||||||
v.Lock()
|
v.Lock()
|
||||||
defer v.Unlock()
|
defer v.Unlock()
|
||||||
|
|
||||||
switch msg := job.(type) {
|
// removeJob either removes a child job or a parent job. If it is
|
||||||
|
// removing a child job, then it removes the child's JobID from the set
|
||||||
// If we've just finished executing a ChannelAnnouncement, then we'll
|
// of dependent jobs for the announcement ID. If this is removing a
|
||||||
// close out the signal, and remove the signal from the map of active
|
// parent job, then it removes the parentJobID from the set of active
|
||||||
// ones. This will allow/deny any dependent jobs to continue execution.
|
// parent jobs and notifies the child jobs that it has finished
|
||||||
case *lnwire.ChannelAnnouncement1:
|
// validating.
|
||||||
finSignals, ok := v.chanAnnFinSignal[msg.ShortChannelID]
|
removeJob := func(annID string, id JobID, child bool) error {
|
||||||
|
if child {
|
||||||
|
// If we're removing a child job, check jobInfoMap and
|
||||||
|
// remove this job from activeDependentJobs.
|
||||||
|
info, ok := v.jobInfoMap[annID]
|
||||||
if ok {
|
if ok {
|
||||||
if allow {
|
info.activeDependentJobs.Remove(id)
|
||||||
close(finSignals.allow)
|
|
||||||
} else {
|
|
||||||
close(finSignals.deny)
|
|
||||||
}
|
|
||||||
delete(v.chanAnnFinSignal, msg.ShortChannelID)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(v.chanEdgeDependencies, msg.ShortChannelID)
|
// Remove the notification chan from childJobChans.
|
||||||
|
delete(v.childJobChans, id)
|
||||||
|
|
||||||
|
// Remove this job's dependency mapping.
|
||||||
|
delete(v.jobDependencies, id)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, we are removing a parent job.
|
||||||
|
jobInfo, found := v.jobInfoMap[annID]
|
||||||
|
if !found {
|
||||||
|
// NOTE: Some sort of consistency guarantee has been
|
||||||
|
// broken.
|
||||||
|
return fmt.Errorf("no job info found for "+
|
||||||
|
"identifier(%v)", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobInfo.activeParentJobIDs.Remove(id)
|
||||||
|
|
||||||
|
lastJob := jobInfo.activeParentJobIDs.IsEmpty()
|
||||||
|
|
||||||
|
// Notify all dependent jobs that a parent job has completed.
|
||||||
|
for child := range jobInfo.activeDependentJobs {
|
||||||
|
notifyChan, ok := v.childJobChans[child]
|
||||||
|
if !ok {
|
||||||
|
// NOTE: Some sort of consistency guarantee has
|
||||||
|
// been broken.
|
||||||
|
return fmt.Errorf("no job info found for "+
|
||||||
|
"identifier(%v)", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't want to block when sending out the signal.
|
||||||
|
select {
|
||||||
|
case notifyChan <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is the last parent job for this annID, also
|
||||||
|
// close the channel. This is needed because it's
|
||||||
|
// possible that the parent job cleans up the job
|
||||||
|
// mappings before the goroutine handling the child job
|
||||||
|
// has a chance to call WaitForParents and catch the
|
||||||
|
// signal sent above. We are allowed to close because
|
||||||
|
// no other parent job will be able to send along the
|
||||||
|
// channel (or close) as we're removing the entry from
|
||||||
|
// the jobInfoMap below.
|
||||||
|
if lastJob {
|
||||||
|
close(notifyChan)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove from jobInfoMap if last job.
|
||||||
|
if lastJob {
|
||||||
|
delete(v.jobInfoMap, annID)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch msg := job.(type) {
|
||||||
|
case *lnwire.ChannelAnnouncement1:
|
||||||
|
// Signal to the child jobs that parent validation has
|
||||||
|
// finished. We have to call removeJob for each annID
|
||||||
|
// that this ChannelAnnouncement can be associated with.
|
||||||
|
err := removeJob(msg.ShortChannelID.String(), id, false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = removeJob(route.Vertex(msg.NodeID1).String(), id, false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = removeJob(route.Vertex(msg.NodeID2).String(), id, false)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
|
||||||
// For all other job types, we'll delete the tracking entries from the
|
|
||||||
// map, as if we reach this point, then all dependants have already
|
|
||||||
// finished executing and we can proceed.
|
|
||||||
case *lnwire.NodeAnnouncement:
|
case *lnwire.NodeAnnouncement:
|
||||||
delete(v.nodeAnnDependencies, route.Vertex(msg.NodeID))
|
// Remove child job info.
|
||||||
|
return removeJob(route.Vertex(msg.NodeID).String(), id, true)
|
||||||
|
|
||||||
case *lnwire.ChannelUpdate1:
|
case *lnwire.ChannelUpdate1:
|
||||||
delete(v.chanEdgeDependencies, msg.ShortChannelID)
|
// Remove child job info.
|
||||||
|
return removeJob(msg.ShortChannelID.String(), id, true)
|
||||||
|
|
||||||
case *lnwire.AnnounceSignatures1:
|
case *lnwire.AnnounceSignatures1:
|
||||||
return
|
// No dependency mappings are stored for AnnounceSignatures1,
|
||||||
|
// so do nothing.
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return errors.New("invalid message - no job dependencies")
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
|
|
||||||
"github.com/lightningnetwork/lnd/graph"
|
"github.com/lightningnetwork/lnd/graph"
|
||||||
"github.com/lightningnetwork/lnd/lnwire"
|
"github.com/lightningnetwork/lnd/lnwire"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TestValidationBarrierSemaphore checks basic properties of the validation
|
// TestValidationBarrierSemaphore checks basic properties of the validation
|
||||||
@ -74,23 +75,31 @@ func TestValidationBarrierQuit(t *testing.T) {
|
|||||||
// Create a set of unique channel announcements that we will prep for
|
// Create a set of unique channel announcements that we will prep for
|
||||||
// validation.
|
// validation.
|
||||||
anns := make([]*lnwire.ChannelAnnouncement1, 0, numTasks)
|
anns := make([]*lnwire.ChannelAnnouncement1, 0, numTasks)
|
||||||
|
parentJobIDs := make([]graph.JobID, 0, numTasks)
|
||||||
for i := 0; i < numTasks; i++ {
|
for i := 0; i < numTasks; i++ {
|
||||||
anns = append(anns, &lnwire.ChannelAnnouncement1{
|
anns = append(anns, &lnwire.ChannelAnnouncement1{
|
||||||
ShortChannelID: lnwire.NewShortChanIDFromInt(uint64(i)),
|
ShortChannelID: lnwire.NewShortChanIDFromInt(uint64(i)),
|
||||||
NodeID1: nodeIDFromInt(uint64(2 * i)),
|
NodeID1: nodeIDFromInt(uint64(2 * i)),
|
||||||
NodeID2: nodeIDFromInt(uint64(2*i + 1)),
|
NodeID2: nodeIDFromInt(uint64(2*i + 1)),
|
||||||
})
|
})
|
||||||
barrier.InitJobDependencies(anns[i])
|
parentJobID, err := barrier.InitJobDependencies(anns[i])
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
parentJobIDs = append(parentJobIDs, parentJobID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a set of channel updates, that must wait until their
|
// Create a set of channel updates, that must wait until their
|
||||||
// associated channel announcement has been verified.
|
// associated channel announcement has been verified.
|
||||||
chanUpds := make([]*lnwire.ChannelUpdate1, 0, numTasks)
|
chanUpds := make([]*lnwire.ChannelUpdate1, 0, numTasks)
|
||||||
|
childJobIDs := make([]graph.JobID, 0, numTasks)
|
||||||
for i := 0; i < numTasks; i++ {
|
for i := 0; i < numTasks; i++ {
|
||||||
chanUpds = append(chanUpds, &lnwire.ChannelUpdate1{
|
chanUpds = append(chanUpds, &lnwire.ChannelUpdate1{
|
||||||
ShortChannelID: lnwire.NewShortChanIDFromInt(uint64(i)),
|
ShortChannelID: lnwire.NewShortChanIDFromInt(uint64(i)),
|
||||||
})
|
})
|
||||||
barrier.InitJobDependencies(chanUpds[i])
|
childJob, err := barrier.InitJobDependencies(chanUpds[i])
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
childJobIDs = append(childJobIDs, childJob)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Spawn additional tasks that will send the error returned after
|
// Spawn additional tasks that will send the error returned after
|
||||||
@ -100,7 +109,9 @@ func TestValidationBarrierQuit(t *testing.T) {
|
|||||||
jobErrs := make(chan error)
|
jobErrs := make(chan error)
|
||||||
for i := 0; i < numTasks; i++ {
|
for i := 0; i < numTasks; i++ {
|
||||||
go func(ii int) {
|
go func(ii int) {
|
||||||
jobErrs <- barrier.WaitForDependants(chanUpds[ii])
|
jobErrs <- barrier.WaitForParents(
|
||||||
|
childJobIDs[ii], chanUpds[ii],
|
||||||
|
)
|
||||||
}(i)
|
}(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,14 +128,12 @@ func TestValidationBarrierQuit(t *testing.T) {
|
|||||||
// with the correct error.
|
// with the correct error.
|
||||||
for i := 0; i < numTasks; i++ {
|
for i := 0; i < numTasks; i++ {
|
||||||
switch {
|
switch {
|
||||||
// Signal completion for the first half of tasks, but only allow
|
|
||||||
// dependents to be processed as well for the second quarter.
|
|
||||||
case i < numTasks/4:
|
|
||||||
barrier.SignalDependants(anns[i], false)
|
|
||||||
barrier.CompleteJob()
|
|
||||||
|
|
||||||
case i < numTasks/2:
|
case i < numTasks/2:
|
||||||
barrier.SignalDependants(anns[i], true)
|
err := barrier.SignalDependents(
|
||||||
|
anns[i], parentJobIDs[i],
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
barrier.CompleteJob()
|
barrier.CompleteJob()
|
||||||
|
|
||||||
// At midpoint, quit the validation barrier.
|
// At midpoint, quit the validation barrier.
|
||||||
@ -141,12 +150,7 @@ func TestValidationBarrierQuit(t *testing.T) {
|
|||||||
|
|
||||||
switch {
|
switch {
|
||||||
// First half should return without failure.
|
// First half should return without failure.
|
||||||
case i < numTasks/4 && !graph.IsError(
|
case i < numTasks/2 && err != nil:
|
||||||
err, graph.ErrParentValidationFailed,
|
|
||||||
):
|
|
||||||
t.Fatalf("unexpected failure while waiting: %v", err)
|
|
||||||
|
|
||||||
case i >= numTasks/4 && i < numTasks/2 && err != nil:
|
|
||||||
t.Fatalf("unexpected failure while waiting: %v", err)
|
t.Fatalf("unexpected failure while waiting: %v", err)
|
||||||
|
|
||||||
// Last half should return the shutdown error.
|
// Last half should return the shutdown error.
|
||||||
@ -159,6 +163,128 @@ func TestValidationBarrierQuit(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestValidationBarrierParentJobsClear tests that creating two parent jobs for
|
||||||
|
// ChannelUpdate / NodeAnnouncement will pause child jobs until the set of
|
||||||
|
// parent jobs has cleared.
|
||||||
|
func TestValidationBarrierParentJobsClear(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
const (
|
||||||
|
numTasks = 8
|
||||||
|
timeout = time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
quit := make(chan struct{})
|
||||||
|
barrier := graph.NewValidationBarrier(numTasks, quit)
|
||||||
|
|
||||||
|
sharedScid := lnwire.NewShortChanIDFromInt(0)
|
||||||
|
sharedNodeID := nodeIDFromInt(0)
|
||||||
|
|
||||||
|
// Create a set of gossip messages that depend on each other. ann1 and
|
||||||
|
// ann2 share the ShortChannelID field. ann1 and ann3 share both the
|
||||||
|
// ShortChannelID field and the NodeID1 field. These shared values let
|
||||||
|
// us test the "set" properties of the ValidationBarrier.
|
||||||
|
ann1 := &lnwire.ChannelAnnouncement1{
|
||||||
|
ShortChannelID: sharedScid,
|
||||||
|
NodeID1: sharedNodeID,
|
||||||
|
NodeID2: nodeIDFromInt(1),
|
||||||
|
}
|
||||||
|
parentID1, err := barrier.InitJobDependencies(ann1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ann2 := &lnwire.ChannelAnnouncement1{
|
||||||
|
ShortChannelID: sharedScid,
|
||||||
|
NodeID1: nodeIDFromInt(2),
|
||||||
|
NodeID2: nodeIDFromInt(3),
|
||||||
|
}
|
||||||
|
parentID2, err := barrier.InitJobDependencies(ann2)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
ann3 := &lnwire.ChannelAnnouncement1{
|
||||||
|
ShortChannelID: sharedScid,
|
||||||
|
NodeID1: sharedNodeID,
|
||||||
|
NodeID2: nodeIDFromInt(10),
|
||||||
|
}
|
||||||
|
parentID3, err := barrier.InitJobDependencies(ann3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Create the ChannelUpdate & NodeAnnouncement messages.
|
||||||
|
upd1 := &lnwire.ChannelUpdate1{
|
||||||
|
ShortChannelID: sharedScid,
|
||||||
|
}
|
||||||
|
childID1, err := barrier.InitJobDependencies(upd1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
node1 := &lnwire.NodeAnnouncement{
|
||||||
|
NodeID: sharedNodeID,
|
||||||
|
}
|
||||||
|
childID2, err := barrier.InitJobDependencies(node1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
run := func(vb *graph.ValidationBarrier, childJobID graph.JobID,
|
||||||
|
job interface{}, resp chan error, start chan error) {
|
||||||
|
|
||||||
|
close(start)
|
||||||
|
|
||||||
|
err := vb.WaitForParents(childJobID, job)
|
||||||
|
resp <- err
|
||||||
|
}
|
||||||
|
|
||||||
|
errChan := make(chan error, 2)
|
||||||
|
|
||||||
|
startChan1 := make(chan error, 1)
|
||||||
|
startChan2 := make(chan error, 1)
|
||||||
|
|
||||||
|
go run(barrier, childID1, upd1, errChan, startChan1)
|
||||||
|
go run(barrier, childID2, node1, errChan, startChan2)
|
||||||
|
|
||||||
|
// Wait for the start signal since we are testing the case where the
|
||||||
|
// parent jobs only complete _after_ the child jobs have called. Note
|
||||||
|
// that there is technically an edge case where we receive the start
|
||||||
|
// signal and call SignalDependents before WaitForParents can actually
|
||||||
|
// be called in the goroutine launched above. In this case, which
|
||||||
|
// arises due to our inability to control precisely when these VB
|
||||||
|
// methods are scheduled (as they are in different goroutines), the
|
||||||
|
// test should still pass as we want to test that validation jobs are
|
||||||
|
// completing and not stalling. In other words, this issue with the
|
||||||
|
// test itself is good as it actually randomizes some of the ordering,
|
||||||
|
// occasionally. This tests that the VB is robust against ordering /
|
||||||
|
// concurrency issues.
|
||||||
|
select {
|
||||||
|
case <-startChan1:
|
||||||
|
case <-time.After(timeout):
|
||||||
|
t.Fatal("timed out waiting for startChan1")
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-startChan2:
|
||||||
|
case <-time.After(timeout):
|
||||||
|
t.Fatal("timed out waiting for startChan2")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we can call SignalDependents for our parent jobs.
|
||||||
|
err = barrier.SignalDependents(ann1, parentID1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = barrier.SignalDependents(ann2, parentID2)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = barrier.SignalDependents(ann3, parentID3)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-errChan:
|
||||||
|
case <-time.After(timeout):
|
||||||
|
t.Fatal("unexpected timeout waiting for first error signal")
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-errChan:
|
||||||
|
case <-time.After(timeout):
|
||||||
|
t.Fatal("unexpected timeout waiting for second error signal")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// nodeIDFromInt creates a node ID by writing a uint64 to the first 8 bytes.
|
// nodeIDFromInt creates a node ID by writing a uint64 to the first 8 bytes.
|
||||||
func nodeIDFromInt(i uint64) [33]byte {
|
func nodeIDFromInt(i uint64) [33]byte {
|
||||||
var nodeID [33]byte
|
var nodeID [33]byte
|
||||||
|
Loading…
x
Reference in New Issue
Block a user