From da56ef239b12786e3a177afda14352dda4a70bc6 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 14 Oct 2025 16:15:19 -0400 Subject: [PATCH] clusterlin: minimize chunks (feature) After the normal optimization process finishes, and finds an optimal spanning forest, run a second process (while computation budget remains) to split chunks into minimal equal-feerate chunks. --- src/cluster_linearize.h | 207 +++++++++++++++++++++++++-- src/test/cluster_linearize_tests.cpp | 6 +- src/test/fuzz/cluster_linearize.cpp | 32 ++++- src/test/util/cluster_linearize.h | 16 +-- src/txgraph.cpp | 3 +- 5 files changed, 235 insertions(+), 29 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index c397e879de9..8be16625afc 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -479,7 +479,7 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: * feerate, each individually sorted in an arbitrary but topological (= no child before parent) * way. * - * We define three quality properties the state can have, each being stronger than the previous: + * We define four quality properties the state can have: * * - acyclic: The state is acyclic whenever no cycle of active dependencies exists within the * graph, ignoring the parent/child direction. This is equivalent to saying that within @@ -529,7 +529,19 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: * satisfied. Thus, the state being optimal is more a "the eventual output is *known* * to be optimal". * - * The algorithm terminates whenever an optimal state is reached. + * - minimal: We say the state is minimal when it is: + * - acyclic + * - topological, except that inactive dependencies between equal-feerate chunks are + * allowed as long as they do not form a loop. + * - like optimal, no active dependencies whose top feerate is strictly higher than + * the bottom feerate are allowed. + * - no chunk contains a proper non-empty subset which includes all its own in-chunk + * dependencies of the same feerate as the chunk itself. + * + * A minimal state effectively corresponds to an optimal state, where every chunk has + * been split into its minimal equal-feerate components. + * + * The algorithm terminates whenever a minimal state is reached. * * * This leads to the following high-level algorithm: @@ -539,6 +551,10 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: * - Loop until optimal (no dependencies with higher-feerate top than bottom), or time runs out: * - Deactivate a violating dependency, potentially making the state non-topological. * - Activate other dependencies to make the state topological again. + * - If there is time left and the state is optimal: + * - Attempt to split chunks into equal-feerate parts without mutual dependencies between them. + * When this succeeds, recurse into them. + * - If no such chunks can be found, the state is minimal. * - Output the chunks from high to low feerate, each internally sorted topologically. * * When merging, we always either: @@ -558,6 +574,7 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: * - Deactivate D, causing the chunk it is in to split into top T and bottom B. * - Do an upwards merge of T, if possible. If so, repeat the same with the merged result. * - Do a downwards merge of B, if possible. If so, repeat the same with the merged result. + * - Split chunks further to obtain a minimal state, see below. * - Output the chunks from high to low feerate, each internally sorted topologically. * * Instead of performing merges arbitrarily to make the initial state topological, it is possible @@ -569,6 +586,21 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: * - Do an upwards merge of C, if possible. If so, repeat the same with the merged result. * No downwards merges are needed in this case. * + * After reaching an optimal state, it can be transformed into a minimal state by attempting to + * split chunks further into equal-feerate parts. To do so, pick a specific transaction in each + * chunk (the pivot), and rerun the above split-then-merge procedure again: + * - first, while pretending the pivot transaction has an infinitesimally higher (or lower) fee + * than it really has. If a split exists with the pivot in the top part (or bottom part), this + * will find it. + * - if that fails to split, repeat while pretending the pivot transaction has an infinitesimally + * lower (or higher) fee. If a split exists with the pivot in the bottom part (or top part), this + * will find it. + * - if either succeeds, repeat the procedure for the newly found chunks to split them further. + * If not, the chunk is already minimal. + * If the chunk can be split into equal-feerate parts, then the pivot must exist in either the top + * or bottom part of that potential split. By trying both with the same pivot, if a split exists, + * it will be found. + * * What remains to be specified are a number of heuristics: * * - How to decide which chunks to merge: @@ -644,10 +676,30 @@ private: std::vector m_dep_data; /** A FIFO of chunk representatives of chunks that may be improved still. */ VecDeque m_suboptimal_chunks; + /** A FIFO of chunk representatives with a pivot transaction in them, and a flag to indicate + * their status: + * - bit 1: currently attempting to move the pivot down, rather than up. + * - bit 2: this is the second stage, so we have already tried moving the pivot in the other + * direction. + */ + VecDeque> m_nonminimal_chunks; /** The number of updated transactions in activations/deactivations. */ uint64_t m_cost{0}; + /** Pick a random transaction within a set (which must be non-empty). */ + TxIdx PickRandomTx(const SetType& tx_idxs) noexcept + { + Assume(tx_idxs.Any()); + unsigned pos = m_rng.randrange(tx_idxs.Count()); + for (auto tx_idx : tx_idxs) { + if (pos == 0) return tx_idx; + --pos; + } + Assume(false); + return TxIdx(-1); + } + /** Update a chunk: * - All transactions have their chunk representative set to `chunk_rep`. * - All dependencies which have `query` in their top_setinfo get `dep_change` added to it @@ -769,8 +821,9 @@ private: /*chunk_rep=*/bottom_rep, /*dep_change=*/top_part); } - /** Activate a dependency from the chunk represented by bottom_rep to the chunk represented by - * top_rep, which must exist. Return the representative of the merged chunk. */ + /** Activate a dependency from the chunk represented by bottom_idx to the chunk represented by + * top_idx. Return the representative of the merged chunk, or TxIdx(-1) if no merge is + * possible. */ TxIdx MergeChunks(TxIdx top_rep, TxIdx bottom_rep) noexcept { auto& top_chunk = m_tx_data[top_rep]; @@ -783,7 +836,7 @@ private: auto& tx_data = m_tx_data[tx]; num_deps += (tx_data.children & bottom_chunk.chunk_setinfo.transactions).Count(); } - Assume(num_deps > 0); + if (num_deps == 0) return TxIdx(-1); // Uniformly randomly pick one of them and activate it. TxIdx pick = m_rng.randrange(num_deps); for (auto tx : top_chunk.chunk_setinfo.transactions) { @@ -1068,6 +1121,119 @@ public: return false; } + /** Initialize data structure for minimizing the chunks. Can only be called if state is known + * to be optimal. OptimizeStep() cannot be called anymore afterwards. */ + void StartMinimizing() noexcept + { + m_nonminimal_chunks.clear(); + m_nonminimal_chunks.reserve(m_transaction_idxs.Count()); + // Gather all chunks, and for each, add it with a random pivot in it, and a random initial + // direction, to m_nonminimal_chunks. + for (auto tx : m_transaction_idxs) { + auto& tx_data = m_tx_data[tx]; + if (tx_data.chunk_rep == tx) { + TxIdx pivot_idx = PickRandomTx(tx_data.chunk_setinfo.transactions); + m_nonminimal_chunks.emplace_back(tx, pivot_idx, m_rng.randbits<1>()); + // Randomize the initial order of nonminimal chunks in the queue. + TxIdx j = m_rng.randrange(m_nonminimal_chunks.size()); + if (j != m_nonminimal_chunks.size() - 1) { + std::swap(m_nonminimal_chunks.back(), m_nonminimal_chunks[j]); + } + } + } + } + + /** Try to reduce a chunk's size. Returns false if all chunks are minimal, true otherwise. */ + bool MinimizeStep() noexcept + { + // If the queue of potentially-non-minimal chunks is empty, we are done. + if (m_nonminimal_chunks.empty()) return false; + // Pop an entry from the potentially-non-minimal chunk queue. + auto [chunk_rep, pivot_idx, flags] = m_nonminimal_chunks.front(); + m_nonminimal_chunks.pop_front(); + auto& chunk_data = m_tx_data[chunk_rep]; + Assume(chunk_data.chunk_rep == chunk_rep); + /** Whether to move the pivot down rather than up. */ + bool move_pivot_down = flags & 1; + /** Whether this is already the second stage. */ + bool second_stage = flags & 2; + + // Find a random dependency whose top and bottom set feerates are equal, and which has + // pivot in bottom set (if move_pivot_down) or in top set (if !move_pivot_down). + DepIdx candidate_dep = DepIdx(-1); + uint64_t candidate_tiebreak{0}; + bool have_any = false; + // Iterate over all transactions. + for (auto tx_idx : chunk_data.chunk_setinfo.transactions) { + const auto& tx_data = m_tx_data[tx_idx]; + // Iterate over all active child dependencies of the transaction. + for (auto dep_idx : tx_data.child_deps) { + auto& dep_data = m_dep_data[dep_idx]; + // Skip inactive child dependencies. + if (!dep_data.active) continue; + // Skip if this dependency does not have equal top and bottom set feerates. Note + // that the top cannot have higher feerate than the bottom, or OptimizeSteps would + // have dealt with it. + if (dep_data.top_setinfo.feerate << chunk_data.chunk_setinfo.feerate) continue; + have_any = true; + // Skip if this dependency does not have pivot in the right place. + if (move_pivot_down == dep_data.top_setinfo.transactions[pivot_idx]) continue; + // Remember this as our chosen dependency if it has a better tiebreak. + uint64_t tiebreak = m_rng.rand64() | 1; + if (tiebreak > candidate_tiebreak) { + candidate_tiebreak = tiebreak; + candidate_dep = dep_idx; + } + } + } + // If no dependencies have equal top and bottom set feerate, this chunk is minimal. + if (!have_any) return true; + // If all found dependencies have the pivot in the wrong place, try moving it in the other + // direction. If this was the second stage already, we are done. + if (candidate_tiebreak == 0) { + // Switch to other direction, and to second phase. + flags ^= 3; + if (!second_stage) m_nonminimal_chunks.emplace_back(chunk_rep, pivot_idx, flags); + return true; + } + + // Otherwise, deactivate the dependency that was found. + Deactivate(candidate_dep); + auto& dep_data = m_dep_data[candidate_dep]; + auto parent_chunk_rep = m_tx_data[dep_data.parent].chunk_rep; + auto child_chunk_rep = m_tx_data[dep_data.child].chunk_rep; + // Try to activate a dependency between the new bottom and the new top (opposite from the + // dependency that was just deactivated). + auto merged_chunk_rep = MergeChunks(child_chunk_rep, parent_chunk_rep); + if (merged_chunk_rep != TxIdx(-1)) { + // A self-merge happened. + // Re-insert the chunk into the queue, in the same direction. Note that the chunk_rep + // will have changed. + m_nonminimal_chunks.emplace_back(merged_chunk_rep, pivot_idx, flags); + } else { + // No self-merge happens, and thus we have found a way to split the chunk. Create two + // smaller chunks, and add them to the queue. The one that contains the current pivot + // gets to continue with it in the same direction, to minimize the number of times we + // alternate direction. If we were in the second phase already, the newly created chunk + // inherits that too, because we know no split with the pivot on the other side is + // possible already. The new chunk without the current pivot gets a new randomly-chosen + // one. + if (move_pivot_down) { + auto parent_pivot_idx = PickRandomTx(m_tx_data[parent_chunk_rep].chunk_setinfo.transactions); + m_nonminimal_chunks.emplace_back(parent_chunk_rep, parent_pivot_idx, m_rng.randbits<1>()); + m_nonminimal_chunks.emplace_back(child_chunk_rep, pivot_idx, flags); + } else { + auto child_pivot_idx = PickRandomTx(m_tx_data[child_chunk_rep].chunk_setinfo.transactions); + m_nonminimal_chunks.emplace_back(parent_chunk_rep, pivot_idx, flags); + m_nonminimal_chunks.emplace_back(child_chunk_rep, child_pivot_idx, m_rng.randbits<1>()); + } + if (m_rng.randbool()) { + std::swap(m_nonminimal_chunks.back(), m_nonminimal_chunks[m_nonminimal_chunks.size() - 2]); + } + } + return true; + } + /** Construct a topologically-valid linearization from the current forest state. Must be * topological. */ std::vector GetLinearization() noexcept @@ -1182,6 +1348,10 @@ public: * After an OptimizeStep(), the diagram will always be at least as good as before. Once * OptimizeStep() returns false, the diagram will be equivalent to that produced by * GetLinearization(), and optimal. + * + * After a MinimizeStep(), the diagram cannot change anymore (in the CompareChunks() sense), + * but its number of segments can increase still. Once MinimizeStep() returns false, the number + * of chunks of the produced linearization will match the number of segments in the diagram. */ std::vector GetDiagram() const noexcept { @@ -1328,6 +1498,19 @@ public: auto tx_idx = m_suboptimal_chunks[i]; assert(m_transaction_idxs[tx_idx]); } + + // + // Verify m_nonminimal_chunks. + // + SetType nonminimal_reps; + for (size_t i = 0; i < m_nonminimal_chunks.size(); ++i) { + auto [chunk_rep, pivot, flags] = m_nonminimal_chunks[i]; + assert(m_tx_data[chunk_rep].chunk_rep == chunk_rep); + assert(m_tx_data[pivot].chunk_rep == chunk_rep); + assert(!nonminimal_reps[chunk_rep]); + nonminimal_reps.Set(chunk_rep); + } + assert(nonminimal_reps.IsSubsetOf(m_transaction_idxs)); } }; @@ -1345,7 +1528,7 @@ public: * - The resulting linearization. It is guaranteed to be at least as * good (in the feerate diagram sense) as old_linearization. * - A boolean indicating whether the result is guaranteed to be - * optimal. + * optimal with minimal chunks. * - How many optimization steps were actually performed. */ template @@ -1361,11 +1544,19 @@ std::tuple, bool, uint64_t> Linearize(const DepGraph< } // Make improvement steps to it until we hit the max_iterations limit, or an optimal result // is found. - bool optimal = false; if (forest.GetCost() < max_iterations) { forest.StartOptimizing(); do { - if (!forest.OptimizeStep()) { + if (!forest.OptimizeStep()) break; + } while (forest.GetCost() < max_iterations); + } + // Make chunk minimization steps until we hit the max_iterations limit, or all chunks are + // minimal. + bool optimal = false; + if (forest.GetCost() < max_iterations) { + forest.StartMinimizing(); + do { + if (!forest.MinimizeStep()) { optimal = true; break; } diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp index a2be29776cb..92e80f7cda3 100644 --- a/src/test/cluster_linearize_tests.cpp +++ b/src/test/cluster_linearize_tests.cpp @@ -94,10 +94,8 @@ void TestOptimalLinearization(const std::vector& enc, const std::vector SanityCheck(depgraph, lin); auto chunking = ChunkLinearization(depgraph, lin); BOOST_CHECK(std::is_eq(CompareChunks(chunking, optimal_diagram))); - // TODO: temporarily disabled; SFL does not guarantee minimal chunks. This will be - // reinstated in a future commit. - // // Verify that the chunks are minimal. - // BOOST_CHECK(chunking.size() == optimal_diagram.size()); + // Verify that the chunks are minimal. + BOOST_CHECK_EQUAL(chunking.size(), optimal_diagram.size()); } tx_count = depgraph.PositionRange(); }; diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index cb5267a4ff0..6e7e73e1649 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -914,7 +914,8 @@ FUZZ_TARGET(clusterlin_sfl) // Function to test the state. std::vector last_diagram; - auto test_fn = [&](bool is_optimal = false) { + bool was_optimal{false}; + auto test_fn = [&](bool is_optimal = false, bool is_minimal = false) { if (rng.randbits(4) == 0) { // Perform sanity checks from time to time (too computationally expensive to do after // every step). @@ -930,13 +931,21 @@ FUZZ_TARGET(clusterlin_sfl) assert(cmp_lin >= 0); // If we're in an allegedly optimal state, they must match. if (is_optimal) assert(cmp_lin == 0); + // If we're in an allegedly minimal state, they must also have the same number of + // segments. + if (is_minimal) assert(diagram.size() == lin_diagram.size()); } // Verify that subsequent calls to GetDiagram() never get worse/incomparable. if (!last_diagram.empty()) { auto cmp = CompareChunks(diagram, last_diagram); assert(cmp >= 0); + // If the last diagram was already optimal, the new one cannot be better. + if (was_optimal) assert(cmp == 0); + // Also, if the diagram was already optimal, the number of segments can only increase. + if (was_optimal) assert(diagram.size() >= last_diagram.size()); } last_diagram = std::move(diagram); + was_optimal = is_optimal; }; if (load_linearization) { @@ -963,7 +972,15 @@ FUZZ_TARGET(clusterlin_sfl) test_fn(); if (!sfl.OptimizeStep()) break; } + + // Loop until minimal. test_fn(/*is_optimal=*/true); + sfl.StartMinimizing(); + while (true) { + test_fn(/*is_optimal=*/true); + if (!sfl.MinimizeStep()) break; + } + test_fn(/*is_optimal=*/true, /*is_minimal=*/true); // Verify that optimality is reached within an expected amount of work. This protects against // hypothetical bugs that hugely increase the amount of work needed to reach optimality. @@ -975,6 +992,9 @@ FUZZ_TARGET(clusterlin_sfl) auto simple_cmp = CompareChunks(last_diagram, simple_diagram); assert(simple_cmp >= 0); if (simple_optimal) assert(simple_cmp == 0); + // If the diagram matches, we must also have at least as many segments (because the SFL state + // and its produced diagram are minimal); + if (simple_cmp == 0) assert(last_diagram.size() >= simple_diagram.size()); // We can compare with any arbitrary linearization, and the diagram must be at least as good as // each. @@ -983,6 +1003,7 @@ FUZZ_TARGET(clusterlin_sfl) auto read_diagram = ChunkLinearization(depgraph, read_lin); auto cmp = CompareChunks(last_diagram, read_diagram); assert(cmp >= 0); + if (cmp == 0) assert(last_diagram.size() >= read_diagram.size()); } } @@ -1052,12 +1073,9 @@ FUZZ_TARGET(clusterlin_linearize) // SimpleLinearize is broken). if (simple_optimal) assert(cmp == 0); - // Temporarily disabled, as Linearize() currently does not guarantee minimal chunks, even - // when it reports an optimal result. This will be re-introduced in a later commit. - // - // // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as - // // chunking is claimed to be optimal, which implies minimal chunks). - // if (cmp == 0) assert(chunking.size() >= simple_chunking.size()); + // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as + // chunking is claimed to be optimal, which implies minimal chunks). + if (cmp == 0) assert(chunking.size() >= simple_chunking.size()); // Compare with a linearization read from the fuzz input. auto read = ReadLinearization(depgraph, reader); diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index f57c486bd97..40ce7f62723 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -402,14 +402,14 @@ inline uint64_t MaxOptimalLinearizationIters(DepGraphIndex cluster_count) // *some* reasonable cost bound, optimal linearizations are always found. static constexpr uint64_t ITERS[65] = { 0, - 0, 2, 8, 21, 51, 96, 162, 200, - 273, 323, 413, 506, 602, 788, 883, 948, - 1153, 1187, 1367, 1619, 1854, 2271, 2257, 2707, - 2904, 3275, 3342, 4209, 4648, 4146, 4273, 4905, - 5358, 5767, 5977, 6777, 7812, 7689, 8478, 8425, - 9561, 11765, 10743, 11806, 12812, 12838, 15421, 16778, - 16661, 19393, 17995, 23947, 23314, 24564, 26209, 29267, - 24719, 31065, 31794, 29185, 32465, 35432, 39986, 36865 + 0, 4, 10, 34, 76, 118, 184, 225, + 320, 376, 464, 573, 830, 868, 1019, 1468, + 1375, 1785, 1880, 1854, 2551, 2559, 4336, 4784, + 5547, 5807, 6157, 6075, 6961, 7403, 7756, 8001, + 8041, 7579, 8483, 10077, 9015, 9388, 9626, 12371, + 12847, 12102, 15173, 15800, 20319, 22190, 23183, 24361, + 24909, 19225, 27419, 23789, 25909, 21993, 25596, 24130, + 26349, 31823, 31855, 31250, 32688, 34825, 41710, 45478 }; assert(cluster_count < std::size(ITERS)); // Multiply the table number by two, to account for the fact that they are not absolutes. diff --git a/src/txgraph.cpp b/src/txgraph.cpp index d76ee60e032..f60d8ca2a66 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -2063,8 +2063,7 @@ std::pair GenericClusterImpl::Relinearize(TxGraphImpl& graph, in uint64_t rng_seed = graph.m_rng.rand64(); auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization, /*is_topological=*/IsTopological()); // Postlinearize to undo some of the non-determinism caused by randomizing the linearization. - // This also guarantees that all chunks are connected (which is not guaranteed by SFL - // currently, even when optimal). + // This also guarantees that all chunks are connected (even when non-optimal). PostLinearize(m_depgraph, linearization); // Update the linearization. m_linearization = std::move(linearization);