clusterlin: sort tx in chunk by feerate and size (feature)

This changes the order of transactions within a chunk to be:
1. Topology (parents before children)
2. Individual transaction feerate (high to low)
3. Individual transaction weight (small to large)
4. Random tiebreak (will be changed in a future commit)

To do so, use a heap of topology-ready transactions within
GetLinearization(), sorted by (2), (3), and (4).

This is analogous to the order of chunks within a cluster, which is
unchanged:
1. Topology (chunks after chunks they depend on)
2. Chunk feerate (high to low)
3. Chunk weight (small to large)
4. Random tiebreak (will be changed in a future commit)
This commit is contained in:
Pieter Wuille
2026-01-07 10:36:03 -05:00
parent 6c1bcb2c7c
commit e0bc73ba92
2 changed files with 99 additions and 23 deletions

View File

@@ -12,6 +12,7 @@
#include <utility>
#include <vector>
#include <attributes.h>
#include <memusage.h>
#include <random.h>
#include <span.h>
@@ -687,6 +688,9 @@ private:
/** The number of updated transactions in activations/deactivations. */
uint64_t m_cost{0};
/** The DepGraph we are trying to linearize. */
const DepGraph<SetType>& m_depgraph;
/** Pick a random transaction within a set (which must be non-empty). */
TxIdx PickRandomTx(const SetType& tx_idxs) noexcept
{
@@ -959,7 +963,8 @@ private:
public:
/** Construct a spanning forest for the given DepGraph, with every transaction in its own chunk
* (not topological). */
explicit SpanningForestState(const DepGraph<SetType>& depgraph, uint64_t rng_seed) noexcept : m_rng(rng_seed)
explicit SpanningForestState(const DepGraph<SetType>& depgraph LIFETIMEBOUND, uint64_t rng_seed) noexcept :
m_rng(rng_seed), m_depgraph(depgraph)
{
m_transaction_idxs = depgraph.Positions();
auto num_transactions = m_transaction_idxs.Count();
@@ -1242,7 +1247,7 @@ public:
std::vector<DepGraphIndex> ret;
ret.reserve(m_transaction_idxs.Count());
/** A heap with all chunks (by representative) that can currently be included, sorted by
* chunk feerate and a random tie-breaker. */
* chunk feerate (high to low), chunk size (small to large), and a random tie-breaker. */
std::vector<std::pair<TxIdx, uint64_t>> ready_chunks;
/** Information about chunks:
* - The first value is only used for chunk representatives, and counts the number of
@@ -1253,8 +1258,9 @@ public:
std::vector<std::pair<TxIdx, TxIdx>> chunk_deps(m_tx_data.size(), {0, 0});
/** The set of all chunk representatives. */
SetType chunk_reps;
/** A list with all transactions within the current chunk that can be included. */
std::vector<TxIdx> ready_tx;
/** A heap with all transactions within the current chunk that can be included, sorted by
* tx feerate (high to low), tx size (small to large), and a random tie-breaker. */
std::vector<std::pair<TxIdx, uint64_t>> ready_tx;
// Populate chunk_deps[c] with the number of {out-of-chunk dependencies, dependencies} the
// child has.
for (TxIdx chl_idx : m_transaction_idxs) {
@@ -1267,16 +1273,35 @@ public:
chunk_deps[chl_chunk_rep].first += (par_chunk_rep != chl_chunk_rep);
}
}
/** Comparison function for the transaction heap. Note that it is a max-heap, so
* tx_cmp_fn(a, b) == true means "a appears after b in the linearization". */
auto tx_cmp_fn = [&](const auto& a, const auto& b) noexcept {
// First sort by increasing transaction feerate.
auto& a_feerate = m_depgraph.FeeRate(a.first);
auto& b_feerate = m_depgraph.FeeRate(b.first);
auto feerate_cmp = FeeRateCompare(a_feerate, b_feerate);
if (feerate_cmp != 0) return feerate_cmp < 0;
// Then by decreasing transaction size.
if (a_feerate.size != b_feerate.size) {
return a_feerate.size > b_feerate.size;
}
// Tie-break randomly.
if (a.second != b.second) return a.second < b.second;
// Lastly, tie-break by TxIdx.
return a.first < b.first;
};
// Construct a heap with all chunks that have no out-of-chunk dependencies.
/** Comparison function for the heap. */
/** Comparison function for the chunk heap. Note that it is a max-heap, so
* chunk_cmp_fn(a, b) == true means "a appears after b in the linearization". */
auto chunk_cmp_fn = [&](const std::pair<TxIdx, uint64_t>& a, const std::pair<TxIdx, uint64_t>& b) noexcept {
auto& chunk_a = m_tx_data[a.first];
auto& chunk_b = m_tx_data[b.first];
Assume(chunk_a.chunk_rep == a.first);
Assume(chunk_b.chunk_rep == b.first);
// First sort by chunk feerate.
if (chunk_a.chunk_setinfo.feerate != chunk_b.chunk_setinfo.feerate) {
return chunk_a.chunk_setinfo.feerate < chunk_b.chunk_setinfo.feerate;
// First sort by increasing chunk feerate.
auto& chunk_feerate_a = m_tx_data[a.first].chunk_setinfo.feerate;
auto& chunk_feerate_b = m_tx_data[b.first].chunk_setinfo.feerate;
auto feerate_cmp = FeeRateCompare(chunk_feerate_a, chunk_feerate_b);
if (feerate_cmp != 0) return feerate_cmp < 0;
// Then by decreasing chunk size.
if (chunk_feerate_a.size != chunk_feerate_b.size) {
return chunk_feerate_a.size > chunk_feerate_b.size;
}
// Tie-break randomly.
if (a.second != b.second) return a.second < b.second;
@@ -1287,7 +1312,7 @@ public:
if (chunk_deps[chunk_rep].first == 0) ready_chunks.emplace_back(chunk_rep, m_rng.rand64());
}
std::make_heap(ready_chunks.begin(), ready_chunks.end(), chunk_cmp_fn);
// Pop chunks off the heap, highest-feerate ones first.
// Pop chunks off the heap.
while (!ready_chunks.empty()) {
auto [chunk_rep, _rnd] = ready_chunks.front();
std::pop_heap(ready_chunks.begin(), ready_chunks.end(), chunk_cmp_fn);
@@ -1296,21 +1321,20 @@ public:
Assume(chunk_deps[chunk_rep].first == 0);
const auto& chunk_txn = m_tx_data[chunk_rep].chunk_setinfo.transactions;
// Build heap of all includable transactions in chunk.
Assume(ready_tx.empty());
for (TxIdx tx_idx : chunk_txn) {
if (chunk_deps[tx_idx].second == 0) {
ready_tx.push_back(tx_idx);
ready_tx.emplace_back(tx_idx, m_rng.rand64());
}
}
Assume(!ready_tx.empty());
// Pick transactions from the ready queue, append them to linearization, and decrement
std::make_heap(ready_tx.begin(), ready_tx.end(), tx_cmp_fn);
// Pick transactions from the ready heap, append them to linearization, and decrement
// dependency counts.
while (!ready_tx.empty()) {
// Move a random queue element to the back.
auto pos = m_rng.randrange(ready_tx.size());
if (pos != ready_tx.size() - 1) std::swap(ready_tx.back(), ready_tx[pos]);
// Pop from the back.
auto tx_idx = ready_tx.back();
Assume(chunk_txn[tx_idx]);
// Pop an element from the tx_ready heap.
auto [tx_idx, _rnd] = ready_tx.front();
std::pop_heap(ready_tx.begin(), ready_tx.end(), tx_cmp_fn);
ready_tx.pop_back();
// Append to linearization.
ret.push_back(tx_idx);
@@ -1321,8 +1345,9 @@ public:
// Decrement tx dependency count.
Assume(chunk_deps[chl_idx].second > 0);
if (--chunk_deps[chl_idx].second == 0 && chunk_txn[chl_idx]) {
// Child tx has no dependencies left, and is in this chunk. Add it to the tx queue.
ready_tx.push_back(chl_idx);
// Child tx has no dependencies left, and is in this chunk. Add it to the tx heap.
ready_tx.emplace_back(chl_idx, m_rng.rand64());
std::push_heap(ready_tx.begin(), ready_tx.end(), tx_cmp_fn);
}
// Decrement chunk dependency count if this is out-of-chunk dependency.
if (chl_data.chunk_rep != chunk_rep) {

View File

@@ -1082,6 +1082,57 @@ FUZZ_TARGET(clusterlin_linearize)
auto read_chunking = ChunkLinearization(depgraph, read);
auto cmp_read = CompareChunks(chunking, read_chunking);
assert(cmp_read >= 0);
// Verify that within every chunk, the transactions are in a valid order. For any pair of
// transactions, it should not be possible to swap them; either due to a missing
// dependency, or because the order would be inconsistent with decreasing feerate and
// increasing size.
auto chunking_info = ChunkLinearizationInfo(depgraph, linearization);
/** The set of all transactions (strictly) before tx1 (see below), or (strictly) before
* chunk1 (see even further below). */
TestBitSet done;
unsigned pos{0};
for (const auto& chunk : chunking_info) {
auto chunk_start = pos;
auto chunk_end = pos + chunk.transactions.Count() - 1;
// Go over all pairs of transactions. done is the set of transactions seen before pos1.
for (unsigned pos1 = chunk_start; pos1 <= chunk_end; ++pos1) {
auto tx1 = linearization[pos1];
for (unsigned pos2 = pos1 + 1; pos2 <= chunk_end; ++pos2) {
auto tx2 = linearization[pos2];
// Check whether tx2 only depends on transactions that precede tx1.
if ((depgraph.Ancestors(tx2) - done).Count() == 1) {
// tx2 could take position pos1.
// Verify that individual transaction feerate is decreasing (note that >=
// tie-breaks by size).
assert(depgraph.FeeRate(tx1) >= depgraph.FeeRate(tx2));
}
}
done.Set(tx1);
}
pos += chunk.transactions.Count();
}
// Verify that chunks themselves are in a valid order. For any pair of chunks, it should
// not be possible to swap them; either due to a missing dependency, or because the order
// would be inconsistent with decreasing chunk feerate and increasing chunk size.
done = {};
// Go over all pairs of chunks. done is the set of transactions seen before chunk_num1.
for (unsigned chunk_num1 = 0; chunk_num1 < chunking_info.size(); ++chunk_num1) {
const auto& chunk1 = chunking_info[chunk_num1];
for (unsigned chunk_num2 = chunk_num1 + 1; chunk_num2 < chunking_info.size(); ++chunk_num2) {
const auto& chunk2 = chunking_info[chunk_num2];
TestBitSet chunk2_ancestors;
for (auto tx : chunk2.transactions) chunk2_ancestors |= depgraph.Ancestors(tx);
// Check whether chunk2 only depends on transactions that precede chunk1.
if ((chunk2_ancestors - done).IsSubsetOf(chunk2.transactions)) {
// chunk2 could take position chunk_num1.
// Verify that chunk feerate is decreasing (note that >= tie-breaks by size).
assert(chunk1.feerate >= chunk2.feerate);
}
}
done |= chunk1.transactions;
}
}
}