etcd: redesign commit queue to make it more robust and scalable

This commit builds on the ideas of @cfromknecht in lnd/5153. The
addition is that the design is now simpler and more robust by queueing
up everything, but allowing maximal parallelism where txns don't block.
Furthermore the commit makes CommitQueue.Done() private essentially
removing the need to understand the queue externally.
This commit is contained in:
Andras Banki-Horvath
2021-06-24 22:10:40 +02:00
parent 02aa77261d
commit b29ae94e10
5 changed files with 136 additions and 62 deletions

View File

@@ -3,14 +3,11 @@
package etcd
import (
"container/list"
"context"
"sync"
)
// commitQueueSize is the maximum number of commits we let to queue up. All
// remaining commits will block on commitQueue.Add().
const commitQueueSize = 100
// commitQueue is a simple execution queue to manage conflicts for transactions
// and thereby reduce the number of times conflicting transactions need to be
// retried. When a new transaction is added to the queue, we first upgrade the
@@ -25,9 +22,18 @@ type commitQueue struct {
readerMap map[string]int
writerMap map[string]int
commitMutex sync.RWMutex
queue chan (func())
wg sync.WaitGroup
queue *list.List
queueMx sync.Mutex
queueCond *sync.Cond
shutdown chan struct{}
}
type commitQueueTxn struct {
commitLoop func()
blocked bool
rset []string
wset []string
}
// NewCommitQueue creates a new commit queue, with the passed abort context.
@@ -36,19 +42,24 @@ func NewCommitQueue(ctx context.Context) *commitQueue {
ctx: ctx,
readerMap: make(map[string]int),
writerMap: make(map[string]int),
queue: make(chan func(), commitQueueSize),
queue: list.New(),
shutdown: make(chan struct{}),
}
q.queueCond = sync.NewCond(&q.queueMx)
// Start the queue consumer loop.
q.wg.Add(1)
go q.mainLoop()
return q
}
// Wait waits for the queue to stop (after the queue context has been canceled).
func (c *commitQueue) Wait() {
c.wg.Wait()
// Stop signals the queue to stop after the queue context has been canceled and
// waits until the has stopped.
func (c *commitQueue) Stop() {
// Signal the queue's condition variable to ensure the mainLoop reliably
// unblocks to check for the exit condition.
c.queueCond.Signal()
<-c.shutdown
}
// Add increases lock counts and queues up tx commit closure for execution.
@@ -82,33 +93,22 @@ func (c *commitQueue) Add(commitLoop func(), rset []string, wset []string) {
c.readerMap[key] += 1
}
if blocked {
// Add the transaction to the queue if conflicts with an already
// queued one.
c.mx.Unlock()
c.queueCond.L.Lock()
c.queue.PushBack(&commitQueueTxn{
commitLoop: commitLoop,
blocked: blocked,
rset: rset,
wset: wset,
})
c.queueCond.L.Unlock()
select {
case c.queue <- commitLoop:
case <-c.ctx.Done():
}
} else {
// To make sure we don't add a new tx to the queue that depends
// on this "unblocked" tx, grab the commitMutex before lifting
// the mutex guarding the lock maps.
c.commitMutex.RLock()
c.mx.Unlock()
c.mx.Unlock()
// At this point we're safe to execute the "unblocked" tx, as
// we cannot execute blocked tx that may have been read from the
// queue until the commitMutex is held.
commitLoop()
c.commitMutex.RUnlock()
}
c.queueCond.Signal()
}
// Done decreases lock counts of the keys in the read/write sets.
func (c *commitQueue) Done(rset []string, wset []string) {
// done decreases lock counts of the keys in the read/write sets.
func (c *commitQueue) done(rset []string, wset []string) {
c.mx.Lock()
defer c.mx.Unlock()
@@ -131,20 +131,82 @@ func (c *commitQueue) Done(rset []string, wset []string) {
// dependencies. The queue ensures that the top element doesn't conflict with
// any other transactions and therefore can be executed freely.
func (c *commitQueue) mainLoop() {
defer c.wg.Done()
defer close(c.shutdown)
for {
select {
case top := <-c.queue:
// Execute the next blocked transaction. As it is
// the top element in the queue it means that it doesn't
// depend on any other transactions anymore.
c.commitMutex.Lock()
top()
c.commitMutex.Unlock()
// Wait until there are no unblocked transactions being
// executed, and for there to be at least one blocked
// transaction in our queue.
c.queueCond.L.Lock()
for c.queue.Front() == nil {
c.queueCond.Wait()
// Check the exit condition before looping again.
select {
case <-c.ctx.Done():
c.queueCond.L.Unlock()
return
default:
}
}
// Now collect all txns until we find the next blocking one.
// These shouldn't conflict (if the precollected read/write
// keys sets don't grow), meaning we can safely commit them
// in parallel.
work := make([]*commitQueueTxn, 1)
e := c.queue.Front()
work[0] = c.queue.Remove(e).(*commitQueueTxn)
for {
e := c.queue.Front()
if e == nil {
break
}
next := e.Value.(*commitQueueTxn)
if !next.blocked {
work = append(work, next)
c.queue.Remove(e)
} else {
// We found the next blocking txn which means
// the block of work needs to be cut here.
break
}
}
c.queueCond.L.Unlock()
// Check if we need to exit before continuing.
select {
case <-c.ctx.Done():
return
default:
}
var wg sync.WaitGroup
wg.Add(len(work))
// Fire up N goroutines where each will run its commit loop
// and then clean up the reader/writer maps.
for _, txn := range work {
go func(txn *commitQueueTxn) {
defer wg.Done()
txn.commitLoop()
// We can safely cleanup here as done only
// holds the main mutex.
c.done(txn.rset, txn.wset)
}(txn)
}
wg.Wait()
// Check if we need to exit before continuing.
select {
case <-c.ctx.Done():
return
default:
}
}
}