From 75bdb925f404f41874adf0fcefca0f1641fcb4e6 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 12 Oct 2025 09:48:19 -0400 Subject: [PATCH] clusterlin: drop support for improvable chunking (simplification) With MergeLinearizations() gone and the LIMO-based Linearize() replaced by SFL, we do not need a class (LinearizationChunking) that can maintain an incrementally-improving chunk set anymore. Replace it with a function (ChunkLinearizationInfo) that just computes the chunks as SetInfos once, and returns them as a vector. This simplifies several call sites too. --- src/cluster_linearize.h | 150 ++++------------------------ src/test/fuzz/cluster_linearize.cpp | 138 +++---------------------- src/test/fuzz/txgraph.cpp | 5 +- src/txgraph.cpp | 26 ++--- 4 files changed, 51 insertions(+), 268 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 120bc527f3e..d65e61d4458 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -422,7 +422,27 @@ struct SetInfo friend bool operator==(const SetInfo&, const SetInfo&) noexcept = default; }; -/** Compute the feerates of the chunks of linearization. */ +/** Compute the chunks of linearization as SetInfos. */ +template +std::vector> ChunkLinearizationInfo(const DepGraph& depgraph, std::span linearization) noexcept +{ + std::vector> ret; + for (DepGraphIndex i : linearization) { + /** The new chunk to be added, initially a singleton. */ + SetInfo new_chunk(depgraph, i); + // As long as the new chunk has a higher feerate than the last chunk so far, absorb it. + while (!ret.empty() && new_chunk.feerate >> ret.back().feerate) { + new_chunk |= ret.back(); + ret.pop_back(); + } + // Actually move that new chunk into the chunking. + ret.emplace_back(std::move(new_chunk)); + } + return ret; +} + +/** Compute the feerates of the chunks of linearization. Identical to ChunkLinearizationInfo, but + * only returns the chunk feerates, not the corresponding transaction sets. */ template std::vector ChunkLinearization(const DepGraph& depgraph, std::span linearization) noexcept { @@ -441,134 +461,6 @@ std::vector ChunkLinearization(const DepGraph& depgraph, std:: return ret; } -/** Data structure encapsulating the chunking of a linearization, permitting removal of subsets. */ -template -class LinearizationChunking -{ - /** The depgraph this linearization is for. */ - const DepGraph& m_depgraph; - - /** The linearization we started from, possibly with removed prefix stripped. */ - std::span m_linearization; - - /** Chunk sets and their feerates, of what remains of the linearization. */ - std::vector> m_chunks; - - /** How large a prefix of m_chunks corresponds to removed transactions. */ - DepGraphIndex m_chunks_skip{0}; - - /** Which transactions remain in the linearization. */ - SetType m_todo; - - /** Fill the m_chunks variable, and remove the done prefix of m_linearization. */ - void BuildChunks() noexcept - { - // Caller must clear m_chunks. - Assume(m_chunks.empty()); - - // Chop off the initial part of m_linearization that is already done. - while (!m_linearization.empty() && !m_todo[m_linearization.front()]) { - m_linearization = m_linearization.subspan(1); - } - - // Iterate over the remaining entries in m_linearization. This is effectively the same - // algorithm as ChunkLinearization, but supports skipping parts of the linearization and - // keeps track of the sets themselves instead of just their feerates. - for (auto idx : m_linearization) { - if (!m_todo[idx]) continue; - // Start with an initial chunk containing just element idx. - SetInfo add(m_depgraph, idx); - // Absorb existing final chunks into add while they have lower feerate. - while (!m_chunks.empty() && add.feerate >> m_chunks.back().feerate) { - add |= m_chunks.back(); - m_chunks.pop_back(); - } - // Remember new chunk. - m_chunks.push_back(std::move(add)); - } - } - -public: - /** Initialize a LinearizationSubset object for a given length of linearization. */ - explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, std::span lin LIFETIMEBOUND) noexcept : - m_depgraph(depgraph), m_linearization(lin) - { - // Mark everything in lin as todo still. - for (auto i : m_linearization) m_todo.Set(i); - // Compute the initial chunking. - m_chunks.reserve(depgraph.TxCount()); - BuildChunks(); - } - - /** Determine how many chunks remain in the linearization. */ - DepGraphIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } - - /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ - const SetInfo& GetChunk(DepGraphIndex n) const noexcept - { - Assume(n + m_chunks_skip < m_chunks.size()); - return m_chunks[n + m_chunks_skip]; - } - - /** Remove some subset of transactions from the linearization. */ - void MarkDone(SetType subset) noexcept - { - Assume(subset.Any()); - Assume(subset.IsSubsetOf(m_todo)); - m_todo -= subset; - if (GetChunk(0).transactions == subset) { - // If the newly done transactions exactly match the first chunk of the remainder of - // the linearization, we do not need to rechunk; just remember to skip one - // additional chunk. - ++m_chunks_skip; - // With subset marked done, some prefix of m_linearization will be done now. How long - // that prefix is depends on how many done elements were interspersed with subset, - // but at least as many transactions as there are in subset. - m_linearization = m_linearization.subspan(subset.Count()); - } else { - // Otherwise rechunk what remains of m_linearization. - m_chunks.clear(); - m_chunks_skip = 0; - BuildChunks(); - } - } - - /** Find the shortest intersection between subset and the prefixes of remaining chunks - * of the linearization that has a feerate not below subset's. - * - * This is a crucial operation in guaranteeing improvements to linearizations. If subset has - * a feerate not below GetChunk(0)'s, then moving IntersectPrefixes(subset) to the front of - * (what remains of) the linearization is guaranteed not to make it worse at any point. - * - * See https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 for background. - */ - SetInfo IntersectPrefixes(const SetInfo& subset) const noexcept - { - Assume(subset.transactions.IsSubsetOf(m_todo)); - SetInfo accumulator; - // Iterate over all chunks of the remaining linearization. - for (DepGraphIndex i = 0; i < NumChunksLeft(); ++i) { - // Find what (if any) intersection the chunk has with subset. - const SetType to_add = GetChunk(i).transactions & subset.transactions; - if (to_add.Any()) { - // If adding that to accumulator makes us hit all of subset, we are done as no - // shorter intersection with higher/equal feerate exists. - accumulator.transactions |= to_add; - if (accumulator.transactions == subset.transactions) break; - // Otherwise update the accumulator feerate. - accumulator.feerate += m_depgraph.FeeRate(to_add); - // If that does result in something better, or something with the same feerate but - // smaller, return that. Even if a longer, higher-feerate intersection exists, it - // does not hurt to return the shorter one (the remainder of the longer intersection - // will generally be found in the next call to Intersect, but even if not, it is not - // required for the improvement guarantee this function makes). - if (!(accumulator.feerate << subset.feerate)) return accumulator; - } - } - return subset; - } -}; - /** Class to represent the internal state of the spanning-forest linearization (SFL) algorithm. * * At all times, each dependency is marked as either "active" or "inactive". The subset of active diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 32848179331..ccf1be68768 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -52,9 +52,8 @@ * - clusterlin_depgraph_sim * - clusterlin_depgraph_serialization * - clusterlin_components - * - ChunkLinearization and LinearizationChunking tests: + * - ChunkLinearization and ChunkLinearizationInfo tests: * - clusterlin_chunking - * - clusterlin_linearization_chunking * - PostLinearize tests: * - clusterlin_postlinearize * - clusterlin_postlinearize_tree @@ -727,8 +726,16 @@ FUZZ_TARGET(clusterlin_chunking) // Read a valid linearization for depgraph. auto linearization = ReadLinearization(depgraph, reader); - // Invoke the chunking function. + // Invoke the chunking functions. auto chunking = ChunkLinearization(depgraph, linearization); + auto chunking_info = ChunkLinearizationInfo(depgraph, linearization); + + // Verify consistency between the two functions. + assert(chunking.size() == chunking_info.size()); + for (size_t i = 0; i < chunking.size(); ++i) { + assert(chunking[i] == chunking_info[i].feerate); + assert(SetInfo(depgraph, chunking_info[i].transactions) == chunking_info[i]); + } // Verify that chunk feerates are monotonically non-increasing. for (size_t i = 1; i < chunking.size(); ++i) { @@ -737,7 +744,7 @@ FUZZ_TARGET(clusterlin_chunking) // Naively recompute the chunks (each is the highest-feerate prefix of what remains). auto todo = depgraph.Positions(); - for (const auto& chunk_feerate : chunking) { + for (const auto& [chunk_set, chunk_feerate] : chunking_info) { assert(todo.Any()); SetInfo accumulator, best; for (DepGraphIndex idx : linearization) { @@ -749,6 +756,7 @@ FUZZ_TARGET(clusterlin_chunking) } } assert(chunk_feerate == best.feerate); + assert(chunk_set == best.transactions); assert(best.transactions.IsSubsetOf(todo)); todo -= best.transactions; } @@ -835,121 +843,6 @@ FUZZ_TARGET(clusterlin_simple_finder) assert(exh_finder.AllDone()); } -FUZZ_TARGET(clusterlin_linearization_chunking) -{ - // Verify the behavior of LinearizationChunking. - - // Retrieve a depgraph from the fuzz input. - SpanReader reader(buffer); - DepGraph depgraph; - try { - reader >> Using(depgraph); - } catch (const std::ios_base::failure&) {} - - // Retrieve a topologically-valid subset of depgraph (allowed to be empty, because the argument - // to LinearizationChunking::Intersect is allowed to be empty). - auto todo = depgraph.Positions(); - auto subset = SetInfo(depgraph, ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false)); - - // Retrieve a valid linearization for depgraph. - auto linearization = ReadLinearization(depgraph, reader); - - // Construct a LinearizationChunking object, initially for the whole linearization. - LinearizationChunking chunking(depgraph, linearization); - - // Incrementally remove transactions from the chunking object, and check various properties at - // every step. - while (todo.Any()) { - assert(chunking.NumChunksLeft() > 0); - - // Construct linearization with just todo. - std::vector linearization_left; - for (auto i : linearization) { - if (todo[i]) linearization_left.push_back(i); - } - - // Compute the chunking for linearization_left. - auto chunking_left = ChunkLinearization(depgraph, linearization_left); - - // Verify that it matches the feerates of the chunks of chunking. - assert(chunking.NumChunksLeft() == chunking_left.size()); - for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { - assert(chunking.GetChunk(i).feerate == chunking_left[i]); - } - - // Check consistency of chunking. - TestBitSet combined; - for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { - const auto& chunk_info = chunking.GetChunk(i); - // Chunks must be non-empty. - assert(chunk_info.transactions.Any()); - // Chunk feerates must be monotonically non-increasing. - if (i > 0) assert(!(chunk_info.feerate >> chunking.GetChunk(i - 1).feerate)); - // Chunks must be a subset of what is left of the linearization. - assert(chunk_info.transactions.IsSubsetOf(todo)); - // Chunks' claimed feerates must match their transactions' aggregate feerate. - assert(depgraph.FeeRate(chunk_info.transactions) == chunk_info.feerate); - // Chunks must be the highest-feerate remaining prefix. - SetInfo accumulator, best; - for (auto j : linearization) { - if (todo[j] && !combined[j]) { - accumulator.Set(depgraph, j); - if (best.feerate.IsEmpty() || accumulator.feerate > best.feerate) { - best = accumulator; - } - } - } - assert(best.transactions == chunk_info.transactions); - assert(best.feerate == chunk_info.feerate); - // Chunks cannot overlap. - assert(!chunk_info.transactions.Overlaps(combined)); - combined |= chunk_info.transactions; - // Chunks must be topological. - for (auto idx : chunk_info.transactions) { - assert((depgraph.Ancestors(idx) & todo).IsSubsetOf(combined)); - } - } - assert(combined == todo); - - // Verify the expected properties of LinearizationChunking::IntersectPrefixes: - auto intersect = chunking.IntersectPrefixes(subset); - // - Intersecting again doesn't change the result. - assert(chunking.IntersectPrefixes(intersect) == intersect); - // - The intersection is topological. - TestBitSet intersect_anc; - for (auto idx : intersect.transactions) { - intersect_anc |= (depgraph.Ancestors(idx) & todo); - } - assert(intersect.transactions == intersect_anc); - // - The claimed intersection feerate matches its transactions. - assert(intersect.feerate == depgraph.FeeRate(intersect.transactions)); - // - The intersection may only be empty if its input is empty. - assert(intersect.transactions.Any() == subset.transactions.Any()); - // - The intersection feerate must be as high as the input. - assert(intersect.feerate >= subset.feerate); - // - No non-empty intersection between the intersection and a prefix of the chunks of the - // remainder of the linearization may be better than the intersection. - TestBitSet prefix; - for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { - prefix |= chunking.GetChunk(i).transactions; - auto reintersect = SetInfo(depgraph, prefix & intersect.transactions); - if (!reintersect.feerate.IsEmpty()) { - assert(reintersect.feerate <= intersect.feerate); - } - } - - // Find a non-empty topologically valid subset of transactions to remove from the graph. - // Using an empty set would mean the next iteration is identical to the current one, and - // could cause an infinite loop. - auto done = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true); - todo -= done; - chunking.MarkDone(done); - subset = SetInfo(depgraph, subset.transactions - done); - } - - assert(chunking.NumChunksLeft() == 0); -} - FUZZ_TARGET(clusterlin_simple_linearize) { // Verify the behavior of SimpleLinearize(). Note that SimpleLinearize is only used in tests; @@ -1207,10 +1100,9 @@ FUZZ_TARGET(clusterlin_postlinearize) assert(cmp >= 0); // The chunks that come out of postlinearizing are always connected. - LinearizationChunking linchunking(depgraph, post_linearization); - while (linchunking.NumChunksLeft()) { - assert(depgraph.IsConnected(linchunking.GetChunk(0).transactions)); - linchunking.MarkDone(linchunking.GetChunk(0).transactions); + auto linchunking = ChunkLinearizationInfo(depgraph, post_linearization); + for (const auto& [chunk_set, _chunk_feerate] : linchunking) { + assert(depgraph.IsConnected(chunk_set)); } } diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 79ee0a16e44..d44cf559e0c 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -1230,10 +1230,9 @@ FUZZ_TARGET(txgraph) // Construct a chunking object for the simulated graph, using the reported cluster // linearization as ordering, and compare it against the reported chunk feerates. if (sims.size() == 1 || level == TxGraph::Level::MAIN) { - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + auto simlinchunk = ChunkLinearizationInfo(sim.graph, simlin); DepGraphIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); + for (auto& chunk : simlinchunk) { // Require that the chunks of cluster linearizations are connected (this must // be the case as all linearizations inside are PostLinearized). assert(sim.graph.IsConnected(chunk.transactions)); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 3bc5cae6740..8a4fc28cf36 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -1013,11 +1013,11 @@ void GenericClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level // yet. if (level == 0 && IsAcceptable()) { - const LinearizationChunking chunking(m_depgraph, m_linearization); + auto chunking = ChunkLinearizationInfo(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; // Iterate over the chunks. - for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { - auto chunk = chunking.GetChunk(chunk_idx); + for (unsigned chunk_idx = 0; chunk_idx < chunking.size(); ++chunk_idx) { + auto& chunk = chunking[chunk_idx]; auto chunk_count = chunk.transactions.Count(); Assume(chunk_count > 0); // Iterate over the transactions in the linearization, which must match those in chunk. @@ -1031,7 +1031,7 @@ void GenericClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept chunk.transactions.Reset(idx); if (chunk.transactions.None()) { // Last transaction in the chunk. - if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) { + if (chunk_count == 1 && chunk_idx + 1 == chunking.size()) { // If this is the final chunk of the cluster, and it contains just a single // transaction (which will always be true for the very common singleton // clusters), store the special value -1 as chunk count. @@ -1311,13 +1311,12 @@ void SingletonClusterImpl::AppendChunkFeerates(std::vector& ret) const uint64_t GenericClusterImpl::AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept { - const LinearizationChunking linchunking(m_depgraph, m_linearization); + auto linchunking = ChunkLinearizationInfo(m_depgraph, m_linearization); LinearizationIndex pos{0}; uint64_t size{0}; auto prev_index = GraphIndex(-1); // Iterate over the chunks of this cluster's linearization. - for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) { - const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i); + for (const auto& [chunk, chunk_feerate] : linchunking) { // Iterate over the transactions of that chunk, in linearization order. auto chunk_tx_count = chunk.Count(); for (unsigned j = 0; j < chunk_tx_count; ++j) { @@ -2759,7 +2758,8 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const } // Compute the chunking of m_linearization. - LinearizationChunking linchunking(m_depgraph, m_linearization); + auto linchunking = ChunkLinearizationInfo(m_depgraph, m_linearization); + unsigned chunk_num{0}; // Verify m_linearization. SetType m_done; @@ -2779,14 +2779,14 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const if (level == 0 && IsAcceptable()) { assert(entry.m_main_lin_index == linindex); ++linindex; - if (!linchunking.GetChunk(0).transactions[lin_pos]) { - linchunking.MarkDone(linchunking.GetChunk(0).transactions); + if (!linchunking[chunk_num].transactions[lin_pos]) { + ++chunk_num; chunk_pos = 0; } - assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); + assert(entry.m_main_chunk_feerate == linchunking[chunk_num].feerate); // Verify that an entry in the chunk index exists for every chunk-ending transaction. ++chunk_pos; - bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count()); + bool is_chunk_end = (chunk_pos == linchunking[chunk_num].transactions.Count()); assert((entry.m_main_chunkindex_iterator != graph.m_main_chunkindex.end()) == is_chunk_end); if (is_chunk_end) { auto& chunk_data = *entry.m_main_chunkindex_iterator; @@ -2797,7 +2797,7 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const } } // If this Cluster has an acceptable quality level, its chunks must be connected. - assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + assert(m_depgraph.IsConnected(linchunking[chunk_num].transactions)); } } // Verify that each element of m_depgraph occurred in m_linearization.