From da56ef239b12786e3a177afda14352dda4a70bc6 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Tue, 14 Oct 2025 16:15:19 -0400
Subject: [PATCH] clusterlin: minimize chunks (feature)

After the normal optimization process finishes, and finds an optimal
spanning forest, run a second process (while computation budget remains)
to split chunks into minimal equal-feerate chunks.
---
 src/cluster_linearize.h              | 207 +++++++++++++++++++++++++--
 src/test/cluster_linearize_tests.cpp |   6 +-
 src/test/fuzz/cluster_linearize.cpp  |  32 ++++-
 src/test/util/cluster_linearize.h    |  16 +--
 src/txgraph.cpp                      |   3 +-
 5 files changed, 235 insertions(+), 29 deletions(-)
diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h
index c397e879de9..8be16625afc 100644
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -479,7 +479,7 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
  * feerate, each individually sorted in an arbitrary but topological (= no child before parent)
  * way.
  *
- * We define three quality properties the state can have, each being stronger than the previous:
+ * We define four quality properties the state can have:
  *
  * - acyclic: The state is acyclic whenever no cycle of active dependencies exists within the
  *            graph, ignoring the parent/child direction. This is equivalent to saying that within
@@ -529,7 +529,19 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
  *            satisfied. Thus, the state being optimal is more a "the eventual output is *known*
  *            to be optimal".
  *
- *            The algorithm terminates whenever an optimal state is reached.
+ * - minimal: We say the state is minimal when it is:
+ *            - acyclic
+ *            - topological, except that inactive dependencies between equal-feerate chunks are
+ *              allowed as long as they do not form a loop.
+ *            - like optimal, no active dependencies whose top feerate is strictly higher than
+ *              the bottom feerate are allowed.
+ *            - no chunk contains a proper non-empty subset which includes all its own in-chunk
+ *              dependencies of the same feerate as the chunk itself.
+ *
+ *            A minimal state effectively corresponds to an optimal state, where every chunk has
+ *            been split into its minimal equal-feerate components.
+ *
+ *            The algorithm terminates whenever a minimal state is reached.
  *
  *
  * This leads to the following high-level algorithm:
@@ -539,6 +551,10 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
  * - Loop until optimal (no dependencies with higher-feerate top than bottom), or time runs out:
  *   - Deactivate a violating dependency, potentially making the state non-topological.
  *   - Activate other dependencies to make the state topological again.
+ * - If there is time left and the state is optimal:
+ *   - Attempt to split chunks into equal-feerate parts without mutual dependencies between them.
+ *     When this succeeds, recurse into them.
+ *   - If no such chunks can be found, the state is minimal.
  * - Output the chunks from high to low feerate, each internally sorted topologically.
  *
  * When merging, we always either:
@@ -558,6 +574,7 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
  *   - Deactivate D, causing the chunk it is in to split into top T and bottom B.
  *   - Do an upwards merge of T, if possible. If so, repeat the same with the merged result.
  *   - Do a downwards merge of B, if possible. If so, repeat the same with the merged result.
+ * - Split chunks further to obtain a minimal state, see below.
  * - Output the chunks from high to low feerate, each internally sorted topologically.
  *
  * Instead of performing merges arbitrarily to make the initial state topological, it is possible
@@ -569,6 +586,21 @@ std::vector<FeeFrac> ChunkLinearization(const DepGraph<SetType>& depgraph, std::
  *   - Do an upwards merge of C, if possible. If so, repeat the same with the merged result.
  * No downwards merges are needed in this case.
  *
+ * After reaching an optimal state, it can be transformed into a minimal state by attempting to
+ * split chunks further into equal-feerate parts. To do so, pick a specific transaction in each
+ * chunk (the pivot), and rerun the above split-then-merge procedure again:
+ * - first, while pretending the pivot transaction has an infinitesimally higher (or lower) fee
+ *   than it really has. If a split exists with the pivot in the top part (or bottom part), this
+ *   will find it.
+ * - if that fails to split, repeat while pretending the pivot transaction has an infinitesimally
+ *   lower (or higher) fee. If a split exists with the pivot in the bottom part (or top part), this
+ *   will find it.
+ * - if either succeeds, repeat the procedure for the newly found chunks to split them further.
+ *   If not, the chunk is already minimal.
+ * If the chunk can be split into equal-feerate parts, then the pivot must exist in either the top
+ * or bottom part of that potential split. By trying both with the same pivot, if a split exists,
+ * it will be found.
+ *
  * What remains to be specified are a number of heuristics:
  *
  * - How to decide which chunks to merge:
@@ -644,10 +676,30 @@ private:
     std::vector<DepData> m_dep_data;
     /** A FIFO of chunk representatives of chunks that may be improved still. */
     VecDeque<TxIdx> m_suboptimal_chunks;
+    /** A FIFO of chunk representatives with a pivot transaction in them, and a flag to indicate
+     *  their status:
+     *  - bit 1: currently attempting to move the pivot down, rather than up.
+     *  - bit 2: this is the second stage, so we have already tried moving the pivot in the other
+     *           direction.
+     */
+    VecDeque<std::tuple<TxIdx, TxIdx, unsigned>> m_nonminimal_chunks;
 
     /** The number of updated transactions in activations/deactivations. */
     uint64_t m_cost{0};
 
+    /** Pick a random transaction within a set (which must be non-empty). */
+    TxIdx PickRandomTx(const SetType& tx_idxs) noexcept
+    {
+        Assume(tx_idxs.Any());
+        unsigned pos = m_rng.randrange<unsigned>(tx_idxs.Count());
+        for (auto tx_idx : tx_idxs) {
+            if (pos == 0) return tx_idx;
+            --pos;
+        }
+        Assume(false);
+        return TxIdx(-1);
+    }
+
     /** Update a chunk:
      *  - All transactions have their chunk representative set to `chunk_rep`.
      *  - All dependencies which have `query` in their top_setinfo get `dep_change` added to it
@@ -769,8 +821,9 @@ private:
                           /*chunk_rep=*/bottom_rep, /*dep_change=*/top_part);
     }
 
-    /** Activate a dependency from the chunk represented by bottom_rep to the chunk represented by
-     *  top_rep, which must exist. Return the representative of the merged chunk. */
+    /** Activate a dependency from the chunk represented by bottom_idx to the chunk represented by
+     *  top_idx. Return the representative of the merged chunk, or TxIdx(-1) if no merge is
+     *  possible. */
     TxIdx MergeChunks(TxIdx top_rep, TxIdx bottom_rep) noexcept
     {
         auto& top_chunk = m_tx_data[top_rep];
@@ -783,7 +836,7 @@ private:
             auto& tx_data = m_tx_data[tx];
             num_deps += (tx_data.children & bottom_chunk.chunk_setinfo.transactions).Count();
         }
-        Assume(num_deps > 0);
+        if (num_deps == 0) return TxIdx(-1);
         // Uniformly randomly pick one of them and activate it.
         TxIdx pick = m_rng.randrange(num_deps);
         for (auto tx : top_chunk.chunk_setinfo.transactions) {
@@ -1068,6 +1121,119 @@ public:
         return false;
     }
 
+    /** Initialize data structure for minimizing the chunks. Can only be called if state is known
+     *  to be optimal. OptimizeStep() cannot be called anymore afterwards. */
+    void StartMinimizing() noexcept
+    {
+        m_nonminimal_chunks.clear();
+        m_nonminimal_chunks.reserve(m_transaction_idxs.Count());
+        // Gather all chunks, and for each, add it with a random pivot in it, and a random initial
+        // direction, to m_nonminimal_chunks.
+        for (auto tx : m_transaction_idxs) {
+            auto& tx_data = m_tx_data[tx];
+            if (tx_data.chunk_rep == tx) {
+                TxIdx pivot_idx = PickRandomTx(tx_data.chunk_setinfo.transactions);
+                m_nonminimal_chunks.emplace_back(tx, pivot_idx, m_rng.randbits<1>());
+                // Randomize the initial order of nonminimal chunks in the queue.
+                TxIdx j = m_rng.randrange<TxIdx>(m_nonminimal_chunks.size());
+                if (j != m_nonminimal_chunks.size() - 1) {
+                    std::swap(m_nonminimal_chunks.back(), m_nonminimal_chunks[j]);
+                }
+            }
+        }
+    }
+
+    /** Try to reduce a chunk's size. Returns false if all chunks are minimal, true otherwise. */
+    bool MinimizeStep() noexcept
+    {
+        // If the queue of potentially-non-minimal chunks is empty, we are done.
+        if (m_nonminimal_chunks.empty()) return false;
+        // Pop an entry from the potentially-non-minimal chunk queue.
+        auto [chunk_rep, pivot_idx, flags] = m_nonminimal_chunks.front();
+        m_nonminimal_chunks.pop_front();
+        auto& chunk_data = m_tx_data[chunk_rep];
+        Assume(chunk_data.chunk_rep == chunk_rep);
+        /** Whether to move the pivot down rather than up. */
+        bool move_pivot_down = flags & 1;
+        /** Whether this is already the second stage. */
+        bool second_stage = flags & 2;
+
+        // Find a random dependency whose top and bottom set feerates are equal, and which has
+        // pivot in bottom set (if move_pivot_down) or in top set (if !move_pivot_down).
+        DepIdx candidate_dep = DepIdx(-1);
+        uint64_t candidate_tiebreak{0};
+        bool have_any = false;
+        // Iterate over all transactions.
+        for (auto tx_idx : chunk_data.chunk_setinfo.transactions) {
+            const auto& tx_data = m_tx_data[tx_idx];
+            // Iterate over all active child dependencies of the transaction.
+            for (auto dep_idx : tx_data.child_deps) {
+                auto& dep_data = m_dep_data[dep_idx];
+                // Skip inactive child dependencies.
+                if (!dep_data.active) continue;
+                // Skip if this dependency does not have equal top and bottom set feerates. Note
+                // that the top cannot have higher feerate than the bottom, or OptimizeSteps would
+                // have dealt with it.
+                if (dep_data.top_setinfo.feerate << chunk_data.chunk_setinfo.feerate) continue;
+                have_any = true;
+                // Skip if this dependency does not have pivot in the right place.
+                if (move_pivot_down == dep_data.top_setinfo.transactions[pivot_idx]) continue;
+                // Remember this as our chosen dependency if it has a better tiebreak.
+                uint64_t tiebreak = m_rng.rand64() | 1;
+                if (tiebreak > candidate_tiebreak) {
+                    candidate_tiebreak = tiebreak;
+                    candidate_dep = dep_idx;
+                }
+            }
+        }
+        // If no dependencies have equal top and bottom set feerate, this chunk is minimal.
+        if (!have_any) return true;
+        // If all found dependencies have the pivot in the wrong place, try moving it in the other
+        // direction. If this was the second stage already, we are done.
+        if (candidate_tiebreak == 0) {
+            // Switch to other direction, and to second phase.
+            flags ^= 3;
+            if (!second_stage) m_nonminimal_chunks.emplace_back(chunk_rep, pivot_idx, flags);
+            return true;
+        }
+
+        // Otherwise, deactivate the dependency that was found.
+        Deactivate(candidate_dep);
+        auto& dep_data = m_dep_data[candidate_dep];
+        auto parent_chunk_rep = m_tx_data[dep_data.parent].chunk_rep;
+        auto child_chunk_rep = m_tx_data[dep_data.child].chunk_rep;
+        // Try to activate a dependency between the new bottom and the new top (opposite from the
+        // dependency that was just deactivated).
+        auto merged_chunk_rep = MergeChunks(child_chunk_rep, parent_chunk_rep);
+        if (merged_chunk_rep != TxIdx(-1)) {
+            // A self-merge happened.
+            // Re-insert the chunk into the queue, in the same direction. Note that the chunk_rep
+            // will have changed.
+            m_nonminimal_chunks.emplace_back(merged_chunk_rep, pivot_idx, flags);
+        } else {
+            // No self-merge happens, and thus we have found a way to split the chunk. Create two
+            // smaller chunks, and add them to the queue. The one that contains the current pivot
+            // gets to continue with it in the same direction, to minimize the number of times we
+            // alternate direction. If we were in the second phase already, the newly created chunk
+            // inherits that too, because we know no split with the pivot on the other side is
+            // possible already. The new chunk without the current pivot gets a new randomly-chosen
+            // one.
+            if (move_pivot_down) {
+                auto parent_pivot_idx = PickRandomTx(m_tx_data[parent_chunk_rep].chunk_setinfo.transactions);
+                m_nonminimal_chunks.emplace_back(parent_chunk_rep, parent_pivot_idx, m_rng.randbits<1>());
+                m_nonminimal_chunks.emplace_back(child_chunk_rep, pivot_idx, flags);
+            } else {
+                auto child_pivot_idx = PickRandomTx(m_tx_data[child_chunk_rep].chunk_setinfo.transactions);
+                m_nonminimal_chunks.emplace_back(parent_chunk_rep, pivot_idx, flags);
+                m_nonminimal_chunks.emplace_back(child_chunk_rep, child_pivot_idx, m_rng.randbits<1>());
+            }
+            if (m_rng.randbool()) {
+                std::swap(m_nonminimal_chunks.back(), m_nonminimal_chunks[m_nonminimal_chunks.size() - 2]);
+            }
+        }
+        return true;
+    }
+
     /** Construct a topologically-valid linearization from the current forest state. Must be
      *  topological. */
     std::vector<DepGraphIndex> GetLinearization() noexcept
@@ -1182,6 +1348,10 @@ public:
      * After an OptimizeStep(), the diagram will always be at least as good as before. Once
      * OptimizeStep() returns false, the diagram will be equivalent to that produced by
      * GetLinearization(), and optimal.
+     *
+     * After a MinimizeStep(), the diagram cannot change anymore (in the CompareChunks() sense),
+     * but its number of segments can increase still. Once MinimizeStep() returns false, the number
+     * of chunks of the produced linearization will match the number of segments in the diagram.
      */
     std::vector<FeeFrac> GetDiagram() const noexcept
     {
@@ -1328,6 +1498,19 @@ public:
             auto tx_idx = m_suboptimal_chunks[i];
             assert(m_transaction_idxs[tx_idx]);
         }
+
+        //
+        // Verify m_nonminimal_chunks.
+        //
+        SetType nonminimal_reps;
+        for (size_t i = 0; i < m_nonminimal_chunks.size(); ++i) {
+            auto [chunk_rep, pivot, flags] = m_nonminimal_chunks[i];
+            assert(m_tx_data[chunk_rep].chunk_rep == chunk_rep);
+            assert(m_tx_data[pivot].chunk_rep == chunk_rep);
+            assert(!nonminimal_reps[chunk_rep]);
+            nonminimal_reps.Set(chunk_rep);
+        }
+        assert(nonminimal_reps.IsSubsetOf(m_transaction_idxs));
     }
 };
 
@@ -1345,7 +1528,7 @@ public:
  *                                - The resulting linearization. It is guaranteed to be at least as
  *                                  good (in the feerate diagram sense) as old_linearization.
  *                                - A boolean indicating whether the result is guaranteed to be
- *                                  optimal.
+ *                                  optimal with minimal chunks.
  *                                - How many optimization steps were actually performed.
  */
 template<typename SetType>
@@ -1361,11 +1544,19 @@ std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<
     }
     // Make improvement steps to it until we hit the max_iterations limit, or an optimal result
     // is found.
-    bool optimal = false;
     if (forest.GetCost() < max_iterations) {
         forest.StartOptimizing();
         do {
-            if (!forest.OptimizeStep()) {
+            if (!forest.OptimizeStep()) break;
+        } while (forest.GetCost() < max_iterations);
+    }
+    // Make chunk minimization steps until we hit the max_iterations limit, or all chunks are
+    // minimal.
+    bool optimal = false;
+    if (forest.GetCost() < max_iterations) {
+        forest.StartMinimizing();
+        do {
+            if (!forest.MinimizeStep()) {
                 optimal = true;
                 break;
             }
diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp
index a2be29776cb..92e80f7cda3 100644
--- a/src/test/cluster_linearize_tests.cpp
+++ b/src/test/cluster_linearize_tests.cpp
@@ -94,10 +94,8 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
             SanityCheck(depgraph, lin);
             auto chunking = ChunkLinearization(depgraph, lin);
             BOOST_CHECK(std::is_eq(CompareChunks(chunking, optimal_diagram)));
-            // TODO: temporarily disabled; SFL does not guarantee minimal chunks. This will be
-            // reinstated in a future commit.
-            // // Verify that the chunks are minimal.
-            // BOOST_CHECK(chunking.size() == optimal_diagram.size());
+            // Verify that the chunks are minimal.
+            BOOST_CHECK_EQUAL(chunking.size(), optimal_diagram.size());
         }
         tx_count = depgraph.PositionRange();
     };
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index cb5267a4ff0..6e7e73e1649 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -914,7 +914,8 @@ FUZZ_TARGET(clusterlin_sfl)
 
     // Function to test the state.
     std::vector<FeeFrac> last_diagram;
-    auto test_fn = [&](bool is_optimal = false) {
+    bool was_optimal{false};
+    auto test_fn = [&](bool is_optimal = false, bool is_minimal = false) {
         if (rng.randbits(4) == 0) {
             // Perform sanity checks from time to time (too computationally expensive to do after
             // every step).
@@ -930,13 +931,21 @@ FUZZ_TARGET(clusterlin_sfl)
             assert(cmp_lin >= 0);
             // If we're in an allegedly optimal state, they must match.
             if (is_optimal) assert(cmp_lin == 0);
+            // If we're in an allegedly minimal state, they must also have the same number of
+            // segments.
+            if (is_minimal) assert(diagram.size() == lin_diagram.size());
         }
         // Verify that subsequent calls to GetDiagram() never get worse/incomparable.
         if (!last_diagram.empty()) {
             auto cmp = CompareChunks(diagram, last_diagram);
             assert(cmp >= 0);
+            // If the last diagram was already optimal, the new one cannot be better.
+            if (was_optimal) assert(cmp == 0);
+            // Also, if the diagram was already optimal, the number of segments can only increase.
+            if (was_optimal) assert(diagram.size() >= last_diagram.size());
         }
         last_diagram = std::move(diagram);
+        was_optimal = is_optimal;
     };
 
     if (load_linearization) {
@@ -963,7 +972,15 @@ FUZZ_TARGET(clusterlin_sfl)
         test_fn();
         if (!sfl.OptimizeStep()) break;
     }
+
+    // Loop until minimal.
     test_fn(/*is_optimal=*/true);
+    sfl.StartMinimizing();
+    while (true) {
+        test_fn(/*is_optimal=*/true);
+        if (!sfl.MinimizeStep()) break;
+    }
+    test_fn(/*is_optimal=*/true, /*is_minimal=*/true);
 
     // Verify that optimality is reached within an expected amount of work. This protects against
     // hypothetical bugs that hugely increase the amount of work needed to reach optimality.
@@ -975,6 +992,9 @@ FUZZ_TARGET(clusterlin_sfl)
     auto simple_cmp = CompareChunks(last_diagram, simple_diagram);
     assert(simple_cmp >= 0);
     if (simple_optimal) assert(simple_cmp == 0);
+    // If the diagram matches, we must also have at least as many segments (because the SFL state
+    // and its produced diagram are minimal);
+    if (simple_cmp == 0) assert(last_diagram.size() >= simple_diagram.size());
 
     // We can compare with any arbitrary linearization, and the diagram must be at least as good as
     // each.
@@ -983,6 +1003,7 @@ FUZZ_TARGET(clusterlin_sfl)
         auto read_diagram = ChunkLinearization(depgraph, read_lin);
         auto cmp = CompareChunks(last_diagram, read_diagram);
         assert(cmp >= 0);
+        if (cmp == 0) assert(last_diagram.size() >= read_diagram.size());
     }
 }
 
@@ -1052,12 +1073,9 @@ FUZZ_TARGET(clusterlin_linearize)
         // SimpleLinearize is broken).
         if (simple_optimal) assert(cmp == 0);
 
-        // Temporarily disabled, as Linearize() currently does not guarantee minimal chunks, even
-        // when it reports an optimal result. This will be re-introduced in a later commit.
-        //
-        // // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
-        // // chunking is claimed to be optimal, which implies minimal chunks).
-        // if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
+        // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
+        // chunking is claimed to be optimal, which implies minimal chunks).
+        if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
 
         // Compare with a linearization read from the fuzz input.
         auto read = ReadLinearization(depgraph, reader);
diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h
index f57c486bd97..40ce7f62723 100644
--- a/src/test/util/cluster_linearize.h
+++ b/src/test/util/cluster_linearize.h
@@ -402,14 +402,14 @@ inline uint64_t MaxOptimalLinearizationIters(DepGraphIndex cluster_count)
     // *some* reasonable cost bound, optimal linearizations are always found.
     static constexpr uint64_t ITERS[65] = {
         0,
-        0, 2, 8, 21, 51, 96, 162, 200,
-        273, 323, 413, 506, 602, 788, 883, 948,
-        1153, 1187, 1367, 1619, 1854, 2271, 2257, 2707,
-        2904, 3275, 3342, 4209, 4648, 4146, 4273, 4905,
-        5358, 5767, 5977, 6777, 7812, 7689, 8478, 8425,
-        9561, 11765, 10743, 11806, 12812, 12838, 15421, 16778,
-        16661, 19393, 17995, 23947, 23314, 24564, 26209, 29267,
-        24719, 31065, 31794, 29185, 32465, 35432, 39986, 36865
+        0, 4, 10, 34, 76, 118, 184, 225,
+        320, 376, 464, 573, 830, 868, 1019, 1468,
+        1375, 1785, 1880, 1854, 2551, 2559, 4336, 4784,
+        5547, 5807, 6157, 6075, 6961, 7403, 7756, 8001,
+        8041, 7579, 8483, 10077, 9015, 9388, 9626, 12371,
+        12847, 12102, 15173, 15800, 20319, 22190, 23183, 24361,
+        24909, 19225, 27419, 23789, 25909, 21993, 25596, 24130,
+        26349, 31823, 31855, 31250, 32688, 34825, 41710, 45478
     };
     assert(cluster_count < std::size(ITERS));
     // Multiply the table number by two, to account for the fact that they are not absolutes.
diff --git a/src/txgraph.cpp b/src/txgraph.cpp
index d76ee60e032..f60d8ca2a66 100644
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@@ -2063,8 +2063,7 @@ std::pair<uint64_t, bool> GenericClusterImpl::Relinearize(TxGraphImpl& graph, in
     uint64_t rng_seed = graph.m_rng.rand64();
     auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization, /*is_topological=*/IsTopological());
     // Postlinearize to undo some of the non-determinism caused by randomizing the linearization.
-    // This also guarantees that all chunks are connected (which is not guaranteed by SFL
-    // currently, even when optimal).
+    // This also guarantees that all chunks are connected (even when non-optimal).
     PostLinearize(m_depgraph, linearization);
     // Update the linearization.
     m_linearization = std::move(linearization);