clusterlin: drop support for improvable chunking (simplification)

With MergeLinearizations() gone and the LIMO-based Linearize() replaced by SFL, we do not need a class (LinearizationChunking) that can maintain an incrementally-improving chunk set anymore. Replace it with a function (ChunkLinearizationInfo) that just computes the chunks as SetInfos once, and returns them as a vector. This simplifies several call sites too.
2026-03-12 08:35:31 +01:00 · 2025-10-12 09:48:19 -04:00
parent 91399a7912
commit 75bdb925f4
4 changed files with 51 additions and 268 deletions
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -422,7 +422,27 @@ struct SetInfo
    friend bool operator==(const SetInfo&, const SetInfo&) noexcept = default;
 };

-/** Compute the feerates of the chunks of linearization. */
+/** Compute the chunks of linearization as SetInfos. */
+template<typename SetType>
+std::vector<SetInfo<SetType>> ChunkLinearizationInfo(const DepGraph<SetType>& depgraph, std::span<const DepGraphIndex> linearization) noexcept
+{
+    std::vector<SetInfo<SetType>> ret;
+    for (DepGraphIndex i : linearization) {
+        /** The new chunk to be added, initially a singleton. */
+        SetInfo<SetType> new_chunk(depgraph, i);
+        // As long as the new chunk has a higher feerate than the last chunk so far, absorb it.
+        while (!ret.empty() && new_chunk.feerate >> ret.back().feerate) {
+            new_chunk |= ret.back();
+            ret.pop_back();
+        }
+        // Actually move that new chunk into the chunking.
+        ret.emplace_back(std::move(new_chunk));
+    }
+    return ret;
+}
+
+/** Compute the feerates of the chunks of linearization. Identical to ChunkLinearizationInfo, but
+ *  only returns the chunk feerates, not the corresponding transaction sets. */
 template<typename SetType>
 std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::span<const DepGraphIndex> linearization) noexcept
 {
@@ -441,134 +461,6 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
    return ret;
 }

-/** Data structure encapsulating the chunking of a linearization, permitting removal of subsets. */
-template<typename SetType>
-class LinearizationChunking
-{
-    /** The depgraph this linearization is for. */
-    const DepGraph<SetType>& m_depgraph;
-
-    /** The linearization we started from, possibly with removed prefix stripped. */
-    std::span<const DepGraphIndex> m_linearization;
-
-    /** Chunk sets and their feerates, of what remains of the linearization. */
-    std::vector<SetInfo<SetType>> m_chunks;
-
-    /** How large a prefix of m_chunks corresponds to removed transactions. */
-    DepGraphIndex m_chunks_skip{0};
-
-    /** Which transactions remain in the linearization. */
-    SetType m_todo;
-
-    /** Fill the m_chunks variable, and remove the done prefix of m_linearization. */
-    void BuildChunks() noexcept
-    {
-        // Caller must clear m_chunks.
-        Assume(m_chunks.empty());
-
-        // Chop off the initial part of m_linearization that is already done.
-        while (!m_linearization.empty() && !m_todo[m_linearization.front()]) {
-            m_linearization = m_linearization.subspan(1);
-        }
-
-        // Iterate over the remaining entries in m_linearization. This is effectively the same
-        // algorithm as ChunkLinearization, but supports skipping parts of the linearization and
-        // keeps track of the sets themselves instead of just their feerates.
-        for (auto idx : m_linearization) {
-            if (!m_todo[idx]) continue;
-            // Start with an initial chunk containing just element idx.
-            SetInfo add(m_depgraph, idx);
-            // Absorb existing final chunks into add while they have lower feerate.
-            while (!m_chunks.empty() && add.feerate >> m_chunks.back().feerate) {
-                add |= m_chunks.back();
-                m_chunks.pop_back();
-            }
-            // Remember new chunk.
-            m_chunks.push_back(std::move(add));
-        }
-    }
-
-public:
-    /** Initialize a LinearizationSubset object for a given length of linearization. */
-    explicit LinearizationChunking(const DepGraph<SetType>& depgraph LIFETIMEBOUND, std::span<const DepGraphIndex> lin LIFETIMEBOUND) noexcept :
-        m_depgraph(depgraph), m_linearization(lin)
-    {
-        // Mark everything in lin as todo still.
-        for (auto i : m_linearization) m_todo.Set(i);
-        // Compute the initial chunking.
-        m_chunks.reserve(depgraph.TxCount());
-        BuildChunks();
-    }
-
-    /** Determine how many chunks remain in the linearization. */
-    DepGraphIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; }
-
-    /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */
-    const SetInfo<SetType>& GetChunk(DepGraphIndex n) const noexcept
-    {
-        Assume(n + m_chunks_skip < m_chunks.size());
-        return m_chunks[n + m_chunks_skip];
-    }
-
-    /** Remove some subset of transactions from the linearization. */
-    void MarkDone(SetType subset) noexcept
-    {
-        Assume(subset.Any());
-        Assume(subset.IsSubsetOf(m_todo));
-        m_todo -= subset;
-        if (GetChunk(0).transactions == subset) {
-            // If the newly done transactions exactly match the first chunk of the remainder of
-            // the linearization, we do not need to rechunk; just remember to skip one
-            // additional chunk.
-            ++m_chunks_skip;
-            // With subset marked done, some prefix of m_linearization will be done now. How long
-            // that prefix is depends on how many done elements were interspersed with subset,
-            // but at least as many transactions as there are in subset.
-            m_linearization = m_linearization.subspan(subset.Count());
-        } else {
-            // Otherwise rechunk what remains of m_linearization.
-            m_chunks.clear();
-            m_chunks_skip = 0;
-            BuildChunks();
-        }
-    }
-
-    /** Find the shortest intersection between subset and the prefixes of remaining chunks
-     *  of the linearization that has a feerate not below subset's.
-     *
-     * This is a crucial operation in guaranteeing improvements to linearizations. If subset has
-     * a feerate not below GetChunk(0)'s, then moving IntersectPrefixes(subset) to the front of
-     * (what remains of) the linearization is guaranteed not to make it worse at any point.
-     *
-     * See https://delvingbitcoin.org/t/introduction-to-cluster-linearization/1032 for background.
-     */
-    SetInfo<SetType> IntersectPrefixes(const SetInfo<SetType>& subset) const noexcept
-    {
-        Assume(subset.transactions.IsSubsetOf(m_todo));
-        SetInfo<SetType> accumulator;
-        // Iterate over all chunks of the remaining linearization.
-        for (DepGraphIndex i = 0; i < NumChunksLeft(); ++i) {
-            // Find what (if any) intersection the chunk has with subset.
-            const SetType to_add = GetChunk(i).transactions & subset.transactions;
-            if (to_add.Any()) {
-                // If adding that to accumulator makes us hit all of subset, we are done as no
-                // shorter intersection with higher/equal feerate exists.
-                accumulator.transactions |= to_add;
-                if (accumulator.transactions == subset.transactions) break;
-                // Otherwise update the accumulator feerate.
-                accumulator.feerate += m_depgraph.FeeRate(to_add);
-                // If that does result in something better, or something with the same feerate but
-                // smaller, return that. Even if a longer, higher-feerate intersection exists, it
-                // does not hurt to return the shorter one (the remainder of the longer intersection
-                // will generally be found in the next call to Intersect, but even if not, it is not
-                // required for the improvement guarantee this function makes).
-                if (!(accumulator.feerate << subset.feerate)) return accumulator;
-            }
-        }
-        return subset;
-    }
-};
-
 /** Class to represent the internal state of the spanning-forest linearization (SFL) algorithm.
 *
 * At all times, each dependency is marked as either "active" or "inactive". The subset of active
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -52,9 +52,8 @@
 *   - clusterlin_depgraph_sim
 *   - clusterlin_depgraph_serialization
 *   - clusterlin_components
- * - ChunkLinearization and LinearizationChunking tests:
+ * - ChunkLinearization and ChunkLinearizationInfo tests:
 *   - clusterlin_chunking
- *   - clusterlin_linearization_chunking
 * - PostLinearize tests:
 *   - clusterlin_postlinearize
 *   - clusterlin_postlinearize_tree
@@ -727,8 +726,16 @@ FUZZ_TARGET(clusterlin_chunking)
    // Read a valid linearization for depgraph.
    auto linearization = ReadLinearization(depgraph, reader);

-    // Invoke the chunking function.
+    // Invoke the chunking functions.
    auto chunking = ChunkLinearization(depgraph, linearization);
+    auto chunking_info = ChunkLinearizationInfo(depgraph, linearization);
+
+    // Verify consistency between the two functions.
+    assert(chunking.size() == chunking_info.size());
+    for (size_t i = 0; i < chunking.size(); ++i) {
+        assert(chunking[i] == chunking_info[i].feerate);
+        assert(SetInfo(depgraph, chunking_info[i].transactions) == chunking_info[i]);
+    }

    // Verify that chunk feerates are monotonically non-increasing.
    for (size_t i = 1; i < chunking.size(); ++i) {
@@ -737,7 +744,7 @@ FUZZ_TARGET(clusterlin_chunking)

    // Naively recompute the chunks (each is the highest-feerate prefix of what remains).
    auto todo = depgraph.Positions();
-    for (const auto& chunk_feerate : chunking) {
+    for (const auto& [chunk_set, chunk_feerate] : chunking_info) {
        assert(todo.Any());
        SetInfo<TestBitSet> accumulator, best;
        for (DepGraphIndex idx : linearization) {
@@ -749,6 +756,7 @@ FUZZ_TARGET(clusterlin_chunking)
            }
        }
        assert(chunk_feerate == best.feerate);
+        assert(chunk_set == best.transactions);
        assert(best.transactions.IsSubsetOf(todo));
        todo -= best.transactions;
    }
@@ -835,121 +843,6 @@ FUZZ_TARGET(clusterlin_simple_finder)
    assert(exh_finder.AllDone());
 }

-FUZZ_TARGET(clusterlin_linearization_chunking)
-{
-    // Verify the behavior of LinearizationChunking.
-
-    // Retrieve a depgraph from the fuzz input.
-    SpanReader reader(buffer);
-    DepGraph<TestBitSet> depgraph;
-    try {
-        reader >> Using<DepGraphFormatter>(depgraph);
-    } catch (const std::ios_base::failure&) {}
-
-    // Retrieve a topologically-valid subset of depgraph (allowed to be empty, because the argument
-    // to LinearizationChunking::Intersect is allowed to be empty).
-    auto todo = depgraph.Positions();
-    auto subset = SetInfo(depgraph, ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false));
-
-    // Retrieve a valid linearization for depgraph.
-    auto linearization = ReadLinearization(depgraph, reader);
-
-    // Construct a LinearizationChunking object, initially for the whole linearization.
-    LinearizationChunking chunking(depgraph, linearization);
-
-    // Incrementally remove transactions from the chunking object, and check various properties at
-    // every step.
-    while (todo.Any()) {
-        assert(chunking.NumChunksLeft() > 0);
-
-        // Construct linearization with just todo.
-        std::vector<DepGraphIndex> linearization_left;
-        for (auto i : linearization) {
-            if (todo[i]) linearization_left.push_back(i);
-        }
-
-        // Compute the chunking for linearization_left.
-        auto chunking_left = ChunkLinearization(depgraph, linearization_left);
-
-        // Verify that it matches the feerates of the chunks of chunking.
-        assert(chunking.NumChunksLeft() == chunking_left.size());
-        for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) {
-            assert(chunking.GetChunk(i).feerate == chunking_left[i]);
-        }
-
-        // Check consistency of chunking.
-        TestBitSet combined;
-        for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) {
-            const auto& chunk_info = chunking.GetChunk(i);
-            // Chunks must be non-empty.
-            assert(chunk_info.transactions.Any());
-            // Chunk feerates must be monotonically non-increasing.
-            if (i > 0) assert(!(chunk_info.feerate >> chunking.GetChunk(i - 1).feerate));
-            // Chunks must be a subset of what is left of the linearization.
-            assert(chunk_info.transactions.IsSubsetOf(todo));
-            // Chunks' claimed feerates must match their transactions' aggregate feerate.
-            assert(depgraph.FeeRate(chunk_info.transactions) == chunk_info.feerate);
-            // Chunks must be the highest-feerate remaining prefix.
-            SetInfo<TestBitSet> accumulator, best;
-            for (auto j : linearization) {
-                if (todo[j] && !combined[j]) {
-                    accumulator.Set(depgraph, j);
-                    if (best.feerate.IsEmpty() || accumulator.feerate > best.feerate) {
-                        best = accumulator;
-                    }
-                }
-            }
-            assert(best.transactions == chunk_info.transactions);
-            assert(best.feerate == chunk_info.feerate);
-            // Chunks cannot overlap.
-            assert(!chunk_info.transactions.Overlaps(combined));
-            combined |= chunk_info.transactions;
-            // Chunks must be topological.
-            for (auto idx : chunk_info.transactions) {
-                assert((depgraph.Ancestors(idx) & todo).IsSubsetOf(combined));
-            }
-        }
-        assert(combined == todo);
-
-        // Verify the expected properties of LinearizationChunking::IntersectPrefixes:
-        auto intersect = chunking.IntersectPrefixes(subset);
-        // - Intersecting again doesn't change the result.
-        assert(chunking.IntersectPrefixes(intersect) == intersect);
-        // - The intersection is topological.
-        TestBitSet intersect_anc;
-        for (auto idx : intersect.transactions) {
-            intersect_anc |= (depgraph.Ancestors(idx) & todo);
-        }
-        assert(intersect.transactions == intersect_anc);
-        // - The claimed intersection feerate matches its transactions.
-        assert(intersect.feerate == depgraph.FeeRate(intersect.transactions));
-        // - The intersection may only be empty if its input is empty.
-        assert(intersect.transactions.Any() == subset.transactions.Any());
-        // - The intersection feerate must be as high as the input.
-        assert(intersect.feerate >= subset.feerate);
-        // - No non-empty intersection between the intersection and a prefix of the chunks of the
-        //   remainder of the linearization may be better than the intersection.
-        TestBitSet prefix;
-        for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) {
-            prefix |= chunking.GetChunk(i).transactions;
-            auto reintersect = SetInfo(depgraph, prefix & intersect.transactions);
-            if (!reintersect.feerate.IsEmpty()) {
-                assert(reintersect.feerate <= intersect.feerate);
-            }
-        }
-
-        // Find a non-empty topologically valid subset of transactions to remove from the graph.
-        // Using an empty set would mean the next iteration is identical to the current one, and
-        // could cause an infinite loop.
-        auto done = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
-        todo -= done;
-        chunking.MarkDone(done);
-        subset = SetInfo(depgraph, subset.transactions - done);
-    }
-
-    assert(chunking.NumChunksLeft() == 0);
-}
-
 FUZZ_TARGET(clusterlin_simple_linearize)
 {
    // Verify the behavior of SimpleLinearize(). Note that SimpleLinearize is only used in tests;
@@ -1207,10 +1100,9 @@ FUZZ_TARGET(clusterlin_postlinearize)
    assert(cmp >= 0);

    // The chunks that come out of postlinearizing are always connected.
-    LinearizationChunking linchunking(depgraph, post_linearization);
-    while (linchunking.NumChunksLeft()) {
-        assert(depgraph.IsConnected(linchunking.GetChunk(0).transactions));
-        linchunking.MarkDone(linchunking.GetChunk(0).transactions);
+    auto linchunking = ChunkLinearizationInfo(depgraph, post_linearization);
+    for (const auto& [chunk_set, _chunk_feerate] : linchunking) {
+        assert(depgraph.IsConnected(chunk_set));
    }
 }

--- a/src/test/fuzz/txgraph.cpp
+++ b/src/test/fuzz/txgraph.cpp
@@ -1230,10 +1230,9 @@ FUZZ_TARGET(txgraph)
                    // Construct a chunking object for the simulated graph, using the reported cluster
                    // linearization as ordering, and compare it against the reported chunk feerates.
                    if (sims.size() == 1 || level == TxGraph::Level::MAIN) {
-                        cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin);
+                        auto simlinchunk = ChunkLinearizationInfo(sim.graph, simlin);
                        DepGraphIndex idx{0};
-                        for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) {
-                            auto chunk = simlinchunk.GetChunk(chunknum);
+                        for (auto& chunk : simlinchunk) {
                            // Require that the chunks of cluster linearizations are connected (this must
                            // be the case as all linearizations inside are PostLinearized).
                            assert(sim.graph.IsConnected(chunk.transactions));
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@@ -1013,11 +1013,11 @@ void GenericClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept
    // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level
    // yet.
    if (level == 0 && IsAcceptable()) {
-        const LinearizationChunking chunking(m_depgraph, m_linearization);
+        auto chunking = ChunkLinearizationInfo(m_depgraph, m_linearization);
        LinearizationIndex lin_idx{0};
        // Iterate over the chunks.
-        for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) {
-            auto chunk = chunking.GetChunk(chunk_idx);
+        for (unsigned chunk_idx = 0; chunk_idx < chunking.size(); ++chunk_idx) {
+            auto& chunk = chunking[chunk_idx];
            auto chunk_count = chunk.transactions.Count();
            Assume(chunk_count > 0);
            // Iterate over the transactions in the linearization, which must match those in chunk.
@@ -1031,7 +1031,7 @@ void GenericClusterImpl::Updated(TxGraphImpl& graph, int level) noexcept
                chunk.transactions.Reset(idx);
                if (chunk.transactions.None()) {
                    // Last transaction in the chunk.
-                    if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) {
+                    if (chunk_count == 1 && chunk_idx + 1 == chunking.size()) {
                        // If this is the final chunk of the cluster, and it contains just a single
                        // transaction (which will always be true for the very common singleton
                        // clusters), store the special value -1 as chunk count.
@@ -1311,13 +1311,12 @@ void SingletonClusterImpl::AppendChunkFeerates(std::vector<FeeFrac>& ret) const

 uint64_t GenericClusterImpl::AppendTrimData(std::vector<TrimTxData>& ret, std::vector<std::pair<GraphIndex, GraphIndex>>& deps) const noexcept
 {
-    const LinearizationChunking linchunking(m_depgraph, m_linearization);
+    auto linchunking = ChunkLinearizationInfo(m_depgraph, m_linearization);
    LinearizationIndex pos{0};
    uint64_t size{0};
    auto prev_index = GraphIndex(-1);
    // Iterate over the chunks of this cluster's linearization.
-    for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) {
-        const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i);
+    for (const auto& [chunk, chunk_feerate] : linchunking) {
        // Iterate over the transactions of that chunk, in linearization order.
        auto chunk_tx_count = chunk.Count();
        for (unsigned j = 0; j < chunk_tx_count; ++j) {
@@ -2759,7 +2758,8 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const
    }

    // Compute the chunking of m_linearization.
-    LinearizationChunking linchunking(m_depgraph, m_linearization);
+    auto linchunking = ChunkLinearizationInfo(m_depgraph, m_linearization);
+    unsigned chunk_num{0};

    // Verify m_linearization.
    SetType m_done;
@@ -2779,14 +2779,14 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const
        if (level == 0 && IsAcceptable()) {
            assert(entry.m_main_lin_index == linindex);
            ++linindex;
-            if (!linchunking.GetChunk(0).transactions[lin_pos]) {
-                linchunking.MarkDone(linchunking.GetChunk(0).transactions);
+            if (!linchunking[chunk_num].transactions[lin_pos]) {
+                ++chunk_num;
                chunk_pos = 0;
            }
-            assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate);
+            assert(entry.m_main_chunk_feerate == linchunking[chunk_num].feerate);
            // Verify that an entry in the chunk index exists for every chunk-ending transaction.
            ++chunk_pos;
-            bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count());
+            bool is_chunk_end = (chunk_pos == linchunking[chunk_num].transactions.Count());
            assert((entry.m_main_chunkindex_iterator != graph.m_main_chunkindex.end()) == is_chunk_end);
            if (is_chunk_end) {
                auto& chunk_data = *entry.m_main_chunkindex_iterator;
@@ -2797,7 +2797,7 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const
                }
            }
            // If this Cluster has an acceptable quality level, its chunks must be connected.
-            assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions));
+            assert(m_depgraph.IsConnected(linchunking[chunk_num].transactions));
        }
    }
    // Verify that each element of m_depgraph occurred in m_linearization.