mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-06-04 02:02:42 +02:00
Merge bitcoin/bitcoin#30286: cluster mempool: optimized candidate search
9ad2fe7e69clusterlin: only start/use search when enough iterations left (Pieter Wuille)bd044356edclusterlin: improve heuristic to decide split transaction (optimization) (Pieter Wuille)71f2629398clusterlin: include topological pot subsets automatically (optimization) (Pieter Wuille)e20fda77a2clusterlin: reduce computation of unnecessary pot sets (optimization) (Pieter Wuille)6060a948caclusterlin bench: add example hard cluster benchmarks (Pieter Wuille)2965fbf203clusterlin: track upper bound potential set for work items (optimization) (Pieter Wuille)9e43e4ce10clusterlin: use feerate-sorted depgraph in SearchCandidateFinder (Pieter Wuille)b80e6dfe78clusterlin: add reordering support for DepGraph (Pieter Wuille)85a285a306clusterlin: separate initial search entries per component (optimization) (Pieter Wuille)e4faea9ca7clusterlin bench: have low/high iter benchmarks instead of per-iter (Pieter Wuille) Pull request description: Part of cluster mempool: #30289 Depends on #30126, and was split off from it. This improves the candidate search algorithm introduced in the previous PR with a variety of optimizations. The resulting search algorithm largely follows Section 2 of [How to linearize your cluster](https://delvingbitcoin.org/t/how-to-linearize-your-cluster/303#h-2-finding-high-feerate-subsets-5), though with a few changes: * Connected component analysis is performed inside the search algorithm (creating initial work items per component for each candidate), rather than once at a higher level. This duplicates some work but is significantly simpler in implementation. * No ancestor-set based presplitting inside the search is performed; instead, the `best` value is initialized with the best topologically valid set known to the LIMO algorithm before search starts: the better one out of the highest-feerate remaining ancestor set, and the highest-feerate prefix of remaining transactions in `old_linearization`. * Work items are represented using an included set *inc* and an undefined set *und*, rather than included and excluded. * Potential sets *pot* are not computed for work items with empty *inc*. At a high level, the only missing optimization from that post is bottleneck analysis; my thinking is that it only really helps with clusters that are already relatively cheap to linearize (doing so would need to be done at a higher level, not inside the search algorithm). --- Overview of the impact of each commit here on linearize performance: * **[clusterlin bench: have low/high iter benchmarks instead of per-iter](21a184db63)**: no impact * **[separate initial search entries per component (optimization)](c84c5c86ba)**: reduce iterations, increase start-up cost * **[add reordering support for DepGraph](019ff29609)**: no impact * **[use feerate-sorted depgraph in SearchCandidateFinder](8e27dd5a22)**: typically reduce iterations, increase start-up cost * **[track upper bound potential set for work items](781e0fb3aa)**: reduce iterations, increase cost per iteration * **[reduce computation of unnecessary pot sets](9fe834fa97)**: reduce cost per iteration * **[include topological pot subsets automatically](30612710a4)**: reduce iterations, increase cost per iteration * **[improve heuristic to decide split transaction](1880c00ab1)**: typically reduce iterations, increase cost per iteration * **[only start/use search when enough iterations left](12760a57b3)**: just account for start-up cost as equivalent iterations ACKs for top commit: sdaftuar: ACK9ad2fe7e69instagibbs: reACK9ad2fe7e69glozow: reACK9ad2fe7e69, just have a question about the docs Tree-SHA512: 108bcbb0676f36071eb83954059b5f3d6646c745015b644a2a5d7f5a8ac9424c2d01d339fa6318a3aff4cf313308e85bb80b0090899720a3fcba027b8025590a
This commit is contained in:
@@ -165,6 +165,23 @@ std::pair<std::vector<ClusterIndex>, bool> SimpleLinearize(const DepGraph<SetTyp
|
||||
return {std::move(linearization), optimal};
|
||||
}
|
||||
|
||||
/** Stitch connected components together in a DepGraph, guaranteeing its corresponding cluster is connected. */
|
||||
template<typename BS>
|
||||
void MakeConnected(DepGraph<BS>& depgraph)
|
||||
{
|
||||
auto todo = BS::Fill(depgraph.TxCount());
|
||||
auto comp = depgraph.FindConnectedComponent(todo);
|
||||
Assume(depgraph.IsConnected(comp));
|
||||
todo -= comp;
|
||||
while (todo.Any()) {
|
||||
auto nextcomp = depgraph.FindConnectedComponent(todo);
|
||||
Assume(depgraph.IsConnected(nextcomp));
|
||||
depgraph.AddDependency(comp.Last(), nextcomp.First());
|
||||
todo -= nextcomp;
|
||||
comp = nextcomp;
|
||||
}
|
||||
}
|
||||
|
||||
/** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */
|
||||
template<typename SetType>
|
||||
SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader)
|
||||
@@ -369,6 +386,20 @@ FUZZ_TARGET(clusterlin_components)
|
||||
assert(depgraph.FindConnectedComponent(todo).None());
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_make_connected)
|
||||
{
|
||||
// Verify that MakeConnected makes graphs connected.
|
||||
|
||||
SpanReader reader(buffer);
|
||||
DepGraph<TestBitSet> depgraph;
|
||||
try {
|
||||
reader >> Using<DepGraphFormatter>(depgraph);
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
MakeConnected(depgraph);
|
||||
SanityCheck(depgraph);
|
||||
assert(depgraph.IsConnected());
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_chunking)
|
||||
{
|
||||
// Verify the correctness of the ChunkLinearization function.
|
||||
@@ -398,7 +429,7 @@ FUZZ_TARGET(clusterlin_chunking)
|
||||
SetInfo<TestBitSet> accumulator, best;
|
||||
for (ClusterIndex idx : linearization) {
|
||||
if (todo[idx]) {
|
||||
accumulator |= SetInfo(depgraph, idx);
|
||||
accumulator.Set(depgraph, idx);
|
||||
if (best.feerate.IsEmpty() || accumulator.feerate >> best.feerate) {
|
||||
best = accumulator;
|
||||
}
|
||||
@@ -427,6 +458,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder)
|
||||
while (todo.Any()) {
|
||||
// Call the ancestor finder's FindCandidateSet for what remains of the graph.
|
||||
assert(!anc_finder.AllDone());
|
||||
assert(todo.Count() == anc_finder.NumRemaining());
|
||||
auto best_anc = anc_finder.FindCandidateSet();
|
||||
// Sanity check the result.
|
||||
assert(best_anc.transactions.Any());
|
||||
@@ -458,6 +490,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder)
|
||||
anc_finder.MarkDone(del_set);
|
||||
}
|
||||
assert(anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == 0);
|
||||
}
|
||||
|
||||
static constexpr auto MAX_SIMPLE_ITERATIONS = 300000;
|
||||
@@ -468,13 +501,17 @@ FUZZ_TARGET(clusterlin_search_finder)
|
||||
// and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and
|
||||
// AncestorCandidateFinder.
|
||||
|
||||
// Retrieve an RNG seed and a depgraph from the fuzz input.
|
||||
// Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input.
|
||||
SpanReader reader(buffer);
|
||||
DepGraph<TestBitSet> depgraph;
|
||||
uint64_t rng_seed{0};
|
||||
uint8_t make_connected{1};
|
||||
try {
|
||||
reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed;
|
||||
reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
// The most complicated graphs are connected ones (other ones just split up). Optionally force
|
||||
// the graph to be connected.
|
||||
if (make_connected) MakeConnected(depgraph);
|
||||
|
||||
// Instantiate ALL the candidate finders.
|
||||
SearchCandidateFinder src_finder(depgraph, rng_seed);
|
||||
@@ -488,6 +525,7 @@ FUZZ_TARGET(clusterlin_search_finder)
|
||||
assert(!smp_finder.AllDone());
|
||||
assert(!exh_finder.AllDone());
|
||||
assert(!anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == todo.Count());
|
||||
|
||||
// For each iteration, read an iteration count limit from the fuzz input.
|
||||
uint64_t max_iterations = 1;
|
||||
@@ -513,9 +551,17 @@ FUZZ_TARGET(clusterlin_search_finder)
|
||||
assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo));
|
||||
}
|
||||
|
||||
// At most 2^N-1 iterations can be required: the number of non-empty subsets a graph with N
|
||||
// transactions has.
|
||||
assert(iterations_done <= ((uint64_t{1} << todo.Count()) - 1));
|
||||
// At most 2^(N-1) iterations can be required: the maximum number of non-empty topological
|
||||
// subsets a (connected) cluster with N transactions can have. Even when the cluster is no
|
||||
// longer connected after removing certain transactions, this holds, because the connected
|
||||
// components are searched separately.
|
||||
assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
|
||||
// Additionally, test that no more than sqrt(2^N)+1 iterations are required. This is just
|
||||
// an empirical bound that seems to hold, without proof. Still, add a test for it so we
|
||||
// can learn about counterexamples if they exist.
|
||||
if (iterations_done >= 1 && todo.Count() <= 63) {
|
||||
Assume((iterations_done - 1) * (iterations_done - 1) <= uint64_t{1} << todo.Count());
|
||||
}
|
||||
|
||||
// Perform quality checks only if SearchCandidateFinder claims an optimal result.
|
||||
if (iterations_done < max_iterations) {
|
||||
@@ -562,6 +608,7 @@ FUZZ_TARGET(clusterlin_search_finder)
|
||||
assert(smp_finder.AllDone());
|
||||
assert(exh_finder.AllDone());
|
||||
assert(anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == 0);
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_linearization_chunking)
|
||||
@@ -621,7 +668,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking)
|
||||
SetInfo<TestBitSet> accumulator, best;
|
||||
for (auto j : linearization) {
|
||||
if (todo[j] && !combined[j]) {
|
||||
accumulator |= SetInfo(depgraph, j);
|
||||
accumulator.Set(depgraph, j);
|
||||
if (best.feerate.IsEmpty() || accumulator.feerate > best.feerate) {
|
||||
best = accumulator;
|
||||
}
|
||||
@@ -685,14 +732,19 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
{
|
||||
// Verify the behavior of Linearize().
|
||||
|
||||
// Retrieve an RNG seed, an iteration count, and a depgraph from the fuzz input.
|
||||
// Retrieve an RNG seed, an iteration count, a depgraph, and whether to make it connected from
|
||||
// the fuzz input.
|
||||
SpanReader reader(buffer);
|
||||
DepGraph<TestBitSet> depgraph;
|
||||
uint64_t rng_seed{0};
|
||||
uint64_t iter_count{0};
|
||||
uint8_t make_connected{1};
|
||||
try {
|
||||
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed;
|
||||
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
// The most complicated graphs are connected ones (other ones just split up). Optionally force
|
||||
// the graph to be connected.
|
||||
if (make_connected) MakeConnected(depgraph);
|
||||
|
||||
// Optionally construct an old linearization for it.
|
||||
std::vector<ClusterIndex> old_linearization;
|
||||
@@ -721,12 +773,24 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
}
|
||||
|
||||
// If the iteration count is sufficiently high, an optimal linearization must be found.
|
||||
// Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is
|
||||
// 2 * (2^n - 1)
|
||||
// Each linearization step can use up to 2^(k-1) iterations, with steps k=1..n. That sum is
|
||||
// 2^n - 1.
|
||||
const uint64_t n = depgraph.TxCount();
|
||||
if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) {
|
||||
if (n <= 19 && iter_count > (uint64_t{1} << n)) {
|
||||
assert(optimal);
|
||||
}
|
||||
// Additionally, if the assumption of sqrt(2^k)+1 iterations per step holds, plus ceil(k/4)
|
||||
// start-up cost per step, plus ceil(n^2/64) start-up cost overall, we can compute the upper
|
||||
// bound for a whole linearization (summing for k=1..n) using the Python expression
|
||||
// [sum((k+3)//4 + int(math.sqrt(2**k)) + 1 for k in range(1, n + 1)) + (n**2 + 63) // 64 for n in range(0, 35)]:
|
||||
static constexpr uint64_t MAX_OPTIMAL_ITERS[] = {
|
||||
0, 4, 8, 12, 18, 26, 37, 51, 70, 97, 133, 182, 251, 346, 480, 666, 927, 1296, 1815, 2545,
|
||||
3576, 5031, 7087, 9991, 14094, 19895, 28096, 39690, 56083, 79263, 112041, 158391, 223936,
|
||||
316629, 447712
|
||||
};
|
||||
if (n < std::size(MAX_OPTIMAL_ITERS) && iter_count >= MAX_OPTIMAL_ITERS[n]) {
|
||||
Assume(optimal);
|
||||
}
|
||||
|
||||
// If Linearize claims optimal result, run quality tests.
|
||||
if (optimal) {
|
||||
|
||||
@@ -102,7 +102,7 @@ bool IsAcyclic(const DepGraph<SetType>& depgraph) noexcept
|
||||
struct DepGraphFormatter
|
||||
{
|
||||
/** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */
|
||||
static uint64_t SignedToUnsigned(int64_t x) noexcept
|
||||
[[maybe_unused]] static uint64_t SignedToUnsigned(int64_t x) noexcept
|
||||
{
|
||||
if (x < 0) {
|
||||
return 2 * uint64_t(-(x + 1)) + 1;
|
||||
@@ -112,7 +112,7 @@ struct DepGraphFormatter
|
||||
}
|
||||
|
||||
/** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */
|
||||
static int64_t UnsignedToSigned(uint64_t x) noexcept
|
||||
[[maybe_unused]] static int64_t UnsignedToSigned(uint64_t x) noexcept
|
||||
{
|
||||
if (x & 1) {
|
||||
return -int64_t(x / 2) - 1;
|
||||
@@ -186,7 +186,7 @@ struct DepGraphFormatter
|
||||
/** The dependency graph which we deserialize into first, with transactions in
|
||||
* topological serialization order, not original cluster order. */
|
||||
DepGraph<SetType> topo_depgraph;
|
||||
/** Mapping from cluster order to serialization order, used later to reconstruct the
|
||||
/** Mapping from serialization order to cluster order, used later to reconstruct the
|
||||
* cluster order. */
|
||||
std::vector<ClusterIndex> reordering;
|
||||
|
||||
@@ -205,9 +205,9 @@ struct DepGraphFormatter
|
||||
coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC.
|
||||
static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000);
|
||||
auto fee = UnsignedToSigned(coded_fee);
|
||||
// Extend topo_depgraph with the new transaction (at the end).
|
||||
// Extend topo_depgraph with the new transaction (preliminarily at the end).
|
||||
auto topo_idx = topo_depgraph.AddTransaction({fee, size});
|
||||
reordering.push_back(topo_idx);
|
||||
reordering.push_back(reordering.size());
|
||||
// Read dependency information.
|
||||
uint64_t diff = 0; //!< How many potential parents we have to skip.
|
||||
s >> VARINT(diff);
|
||||
@@ -226,31 +226,23 @@ struct DepGraphFormatter
|
||||
--diff;
|
||||
}
|
||||
}
|
||||
// If we reach this point, we can interpret the remaining skip value as how far from the
|
||||
// end of reordering topo_idx should be placed (wrapping around), so move it to its
|
||||
// correct location. The preliminary reordering.push_back(topo_idx) above was to make
|
||||
// sure that if a deserialization exception occurs, topo_idx still appears somewhere.
|
||||
// If we reach this point, we can interpret the remaining skip value as how far
|
||||
// from the end of reordering the new transaction should be placed (wrapping
|
||||
// around), so remove the preliminary position it was put in above (which was to
|
||||
// make sure that if a deserialization exception occurs, the new transaction still
|
||||
// has some entry in reordering).
|
||||
reordering.pop_back();
|
||||
reordering.insert(reordering.end() - (diff % (reordering.size() + 1)), topo_idx);
|
||||
ClusterIndex insert_distance = diff % (reordering.size() + 1);
|
||||
// And then update reordering to reflect this new transaction's insertion.
|
||||
for (auto& pos : reordering) {
|
||||
pos += (pos >= reordering.size() - insert_distance);
|
||||
}
|
||||
reordering.push_back(reordering.size() - insert_distance);
|
||||
}
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
|
||||
// Construct the original cluster order depgraph.
|
||||
depgraph = {};
|
||||
// Add transactions to depgraph in the original cluster order.
|
||||
for (auto topo_idx : reordering) {
|
||||
depgraph.AddTransaction(topo_depgraph.FeeRate(topo_idx));
|
||||
}
|
||||
// Translate dependencies from topological to cluster order.
|
||||
for (ClusterIndex idx = 0; idx < reordering.size(); ++idx) {
|
||||
ClusterIndex topo_idx = reordering[idx];
|
||||
for (ClusterIndex dep_idx = 0; dep_idx < reordering.size(); ++dep_idx) {
|
||||
ClusterIndex dep_topo_idx = reordering[dep_idx];
|
||||
if (topo_depgraph.Ancestors(topo_idx)[dep_topo_idx]) {
|
||||
depgraph.AddDependency(dep_idx, idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
depgraph = DepGraph(topo_depgraph, reordering);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user