clusterlin: replace cluster linearization with SFL (feature)

This replaces the existing LIMO linearization algorithm (which internally uses ancestor set finding and candidate set finding) with the much more performant spanning-forest linearization algorithm. This removes the old candidate-set search algorithm, and several of its tests, benchmarks, and needed utility code. The worst case time per cost is similar to the previous algorithm, so ACCEPTABLE_ITERS is unchanged.
2026-06-10 06:39:15 +02:00 · 2025-10-23 19:15:21 -04:00
parent 6a8fa821b8
commit 3efc94d656
7 changed files with 81 additions and 994 deletions
--- a/src/bench/cluster_linearize.cpp
+++ b/src/bench/cluster_linearize.cpp
@@ -18,21 +18,6 @@ using namespace util::hex_literals;

 namespace {

-/** Construct a linear graph. These are pessimal for AncestorCandidateFinder, as they maximize
- *  the number of ancestor set feerate updates. The best ancestor set is always the topmost
- *  remaining transaction, whose removal requires updating all remaining transactions' ancestor
- *  set feerates. */
-template<typename SetType>
-DepGraph<SetType> MakeLinearGraph(DepGraphIndex ntx)
-{
-    DepGraph<SetType> depgraph;
-    for (DepGraphIndex i = 0; i < ntx; ++i) {
-        depgraph.AddTransaction({-int32_t(i), 1});
-        if (i > 0) depgraph.AddDependencies(SetType::Singleton(i - 1), i);
-    }
-    return depgraph;
-}
-
 /** Construct a wide graph (one root, with N-1 children that are otherwise unrelated, with
 *  increasing feerates). These graphs are pessimal for the LIMO step in Linearize, because
 *  rechunking is needed after every candidate (the last transaction gets picked every time).
@@ -48,136 +33,6 @@ DepGraph<SetType> MakeWideGraph(DepGraphIndex ntx)
    return depgraph;
 }

-// Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the implemented
-// algorithm (purely empirically determined).
-template<typename SetType>
-DepGraph<SetType> MakeHardGraph(DepGraphIndex ntx)
-{
-    DepGraph<SetType> depgraph;
-    for (DepGraphIndex i = 0; i < ntx; ++i) {
-        if (ntx & 1) {
-            // Odd cluster size.
-            //
-            // Mermaid diagram code for the resulting cluster for 11 transactions:
-            // ```mermaid
-            // graph BT
-            // T0["T0: 1/2"];T1["T1: 14/2"];T2["T2: 6/1"];T3["T3: 5/1"];T4["T4: 7/1"];
-            // T5["T5: 5/1"];T6["T6: 7/1"];T7["T7: 5/1"];T8["T8: 7/1"];T9["T9: 5/1"];
-            // T10["T10: 7/1"];
-            // T1-->T0;T1-->T2;T3-->T2;T4-->T3;T4-->T5;T6-->T5;T4-->T7;T8-->T7;T4-->T9;T10-->T9;
-            // ```
-            if (i == 0) {
-                depgraph.AddTransaction({1, 2});
-            } else if (i == 1) {
-                depgraph.AddTransaction({14, 2});
-                depgraph.AddDependencies(SetType::Singleton(0), 1);
-            } else if (i == 2) {
-                depgraph.AddTransaction({6, 1});
-                depgraph.AddDependencies(SetType::Singleton(2), 1);
-            } else if (i == 3) {
-                depgraph.AddTransaction({5, 1});
-                depgraph.AddDependencies(SetType::Singleton(2), 3);
-            } else if ((i & 1) == 0) {
-                depgraph.AddTransaction({7, 1});
-                depgraph.AddDependencies(SetType::Singleton(i - 1), i);
-            } else {
-                depgraph.AddTransaction({5, 1});
-                depgraph.AddDependencies(SetType::Singleton(i), 4);
-            }
-        } else {
-            // Even cluster size.
-            //
-            // Mermaid diagram code for the resulting cluster for 10 transactions:
-            // ```mermaid
-            // graph BT
-            // T0["T0: 1"];T1["T1: 3"];T2["T2: 1"];T3["T3: 4"];T4["T4: 0"];T5["T5: 4"];T6["T6: 0"];
-            // T7["T7: 4"];T8["T8: 0"];T9["T9: 4"];
-            // T1-->T0;T2-->T0;T3-->T2;T3-->T4;T5-->T4;T3-->T6;T7-->T6;T3-->T8;T9-->T8;
-            // ```
-            if (i == 0) {
-                depgraph.AddTransaction({1, 1});
-            } else if (i == 1) {
-                depgraph.AddTransaction({3, 1});
-                depgraph.AddDependencies(SetType::Singleton(0), 1);
-            } else if (i == 2) {
-                depgraph.AddTransaction({1, 1});
-                depgraph.AddDependencies(SetType::Singleton(0), 2);
-            } else if (i & 1) {
-                depgraph.AddTransaction({4, 1});
-                depgraph.AddDependencies(SetType::Singleton(i - 1), i);
-            } else {
-                depgraph.AddTransaction({0, 1});
-                depgraph.AddDependencies(SetType::Singleton(i), 3);
-            }
-        }
-    }
-    return depgraph;
-}
-
-/** Benchmark that does search-based candidate finding with a specified number of iterations.
- *
- * Its goal is measuring how much time every additional search iteration in linearization costs,
- * by running with a low and a high count, subtracting the results, and divided by the number
- * iterations difference.
- */
-template<typename SetType>
-void BenchLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench, uint64_t iter_limit)
-{
-    const auto depgraph = MakeHardGraph<SetType>(ntx);
-    uint64_t rng_seed = 0;
-    bench.run([&] {
-        SearchCandidateFinder finder(depgraph, rng_seed++);
-        auto [candidate, iters_performed] = finder.FindCandidateSet(iter_limit, {});
-        assert(iters_performed == iter_limit);
-    });
-}
-
-/** Benchmark for linearization improvement of a trivial linear graph using just ancestor sort.
- *
- * Its goal is measuring how much time linearization may take without any search iterations.
- *
- * If P is the benchmarked per-iteration count (obtained by running BenchLinearizeWorstCase for a
- * high and a low iteration count, subtracting them, and dividing by the difference in count), and
- * N is the resulting time of BenchLinearizeNoItersWorstCase*, then an invocation of Linearize with
- * max_iterations=m should take no more than roughly N+m*P time. This may however be an
- * overestimate, as the worst cases do not coincide (the ones that are worst for linearization
- * without any search happen to be ones that do not need many search iterations).
- *
- * This benchmark exercises a worst case for AncestorCandidateFinder, but for which improvement is
- * cheap.
- */
-template<typename SetType>
-void BenchLinearizeNoItersWorstCaseAnc(DepGraphIndex ntx, benchmark::Bench& bench)
-{
-    const auto depgraph = MakeLinearGraph<SetType>(ntx);
-    uint64_t rng_seed = 0;
-    std::vector<DepGraphIndex> old_lin(ntx);
-    for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i;
-    bench.run([&] {
-        Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin);
-    });
-}
-
-/** Benchmark for linearization improvement of a trivial wide graph using just ancestor sort.
- *
- * Its goal is measuring how much time improving a linearization may take without any search
- * iterations, similar to the previous function.
- *
- * This benchmark exercises a worst case for improving an existing linearization, but for which
- * AncestorCandidateFinder is cheap.
- */
-template<typename SetType>
-void BenchLinearizeNoItersWorstCaseLIMO(DepGraphIndex ntx, benchmark::Bench& bench)
-{
-    const auto depgraph = MakeWideGraph<SetType>(ntx);
-    uint64_t rng_seed = 0;
-    std::vector<DepGraphIndex> old_lin(ntx);
-    for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i;
-    bench.run([&] {
-        Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin);
-    });
-}
-
 template<typename SetType>
 void BenchPostLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench)
 {
@@ -257,33 +112,6 @@ void BenchLinearizeOptimallyPerCost(benchmark::Bench& bench, const std::string&

 } // namespace

-static void Linearize16TxWorstCase20Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<16>>(16, bench, 20); }
-static void Linearize16TxWorstCase120Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<16>>(16, bench, 120); }
-static void Linearize32TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<32>>(32, bench, 5000); }
-static void Linearize32TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<32>>(32, bench, 15000); }
-static void Linearize48TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<48>>(48, bench, 5000); }
-static void Linearize48TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<48>>(48, bench, 15000); }
-static void Linearize64TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<64>>(64, bench, 5000); }
-static void Linearize64TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<64>>(64, bench, 15000); }
-static void Linearize75TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<75>>(75, bench, 5000); }
-static void Linearize75TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<75>>(75, bench, 15000); }
-static void Linearize99TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<99>>(99, bench, 5000); }
-static void Linearize99TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase<BitSet<99>>(99, bench, 15000); }
-
-static void LinearizeNoIters16TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<16>>(16, bench); }
-static void LinearizeNoIters32TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<32>>(32, bench); }
-static void LinearizeNoIters48TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<48>>(48, bench); }
-static void LinearizeNoIters64TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<64>>(64, bench); }
-static void LinearizeNoIters75TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<75>>(75, bench); }
-static void LinearizeNoIters99TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc<BitSet<99>>(99, bench); }
-
-static void LinearizeNoIters16TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<16>>(16, bench); }
-static void LinearizeNoIters32TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<32>>(32, bench); }
-static void LinearizeNoIters48TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<48>>(48, bench); }
-static void LinearizeNoIters64TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<64>>(64, bench); }
-static void LinearizeNoIters75TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<75>>(75, bench); }
-static void LinearizeNoIters99TxWorstCaseLIMO(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseLIMO<BitSet<99>>(99, bench); }
-
 static void PostLinearize16TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<16>>(16, bench); }
 static void PostLinearize32TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<32>>(32, bench); }
 static void PostLinearize48TxWorstCase(benchmark::Bench& bench) { BenchPostLinearizeWorstCase<BitSet<48>>(48, bench); }
@@ -350,33 +178,6 @@ static void LinearizeOptimallyPerCost(benchmark::Bench& bench)
    BenchLinearizeOptimallyPerCost(bench, "LinearizeOptimallySyntheticPerCost", CLUSTERS_SYNTHETIC);
 }

-BENCHMARK(Linearize16TxWorstCase20Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize16TxWorstCase120Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize32TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize32TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize48TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize48TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize64TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize64TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize75TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize75TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize99TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH);
-BENCHMARK(Linearize99TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH);
-
-BENCHMARK(LinearizeNoIters16TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters32TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters48TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters64TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters75TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters99TxWorstCaseAnc, benchmark::PriorityLevel::HIGH);
-
-BENCHMARK(LinearizeNoIters16TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters32TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters48TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters64TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters75TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-BENCHMARK(LinearizeNoIters99TxWorstCaseLIMO, benchmark::PriorityLevel::HIGH);
-
 BENCHMARK(PostLinearize16TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(PostLinearize32TxWorstCase, benchmark::PriorityLevel::HIGH);
 BENCHMARK(PostLinearize48TxWorstCase, benchmark::PriorityLevel::HIGH);
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -411,13 +411,6 @@ struct SetInfo
        return {transactions - other.transactions, feerate - other.feerate};
    }

-    /** Construct a new SetInfo equal to this, with more transactions added (which may overlap
-     *  with the existing transactions in the SetInfo). */
-    [[nodiscard]] SetInfo Add(const DepGraph<SetType>& depgraph, const SetType& txn) const noexcept
-    {
-        return {transactions | txn, feerate + depgraph.FeeRate(txn - transactions)};
-    }
-
    /** Swap two SetInfo objects. */
    friend void swap(SetInfo& a, SetInfo& b) noexcept
    {
@@ -576,108 +569,6 @@ public:
    }
 };

-/** Class encapsulating the state needed to find the best remaining ancestor set.
- *
- * It is initialized for an entire DepGraph, and parts of the graph can be dropped by calling
- * MarkDone.
- *
- * As long as any part of the graph remains, FindCandidateSet() can be called which will return a
- * SetInfo with the highest-feerate ancestor set that remains (an ancestor set is a single
- * transaction together with all its remaining ancestors).
- */
-template<typename SetType>
-class AncestorCandidateFinder
-{
-    /** Internal dependency graph. */
-    const DepGraph<SetType>& m_depgraph;
-    /** Which transaction are left to include. */
-    SetType m_todo;
-    /** Precomputed ancestor-set feerates (only kept up-to-date for indices in m_todo). */
-    std::vector<FeeFrac> m_ancestor_set_feerates;
-
-public:
-    /** Construct an AncestorCandidateFinder for a given cluster.
-     *
-     * Complexity: O(N^2) where N=depgraph.TxCount().
-     */
-    AncestorCandidateFinder(const DepGraph<SetType>& depgraph LIFETIMEBOUND) noexcept :
-        m_depgraph(depgraph),
-        m_todo{depgraph.Positions()},
-        m_ancestor_set_feerates(depgraph.PositionRange())
-    {
-        // Precompute ancestor-set feerates.
-        for (DepGraphIndex i : m_depgraph.Positions()) {
-            /** The remaining ancestors for transaction i. */
-            SetType anc_to_add = m_depgraph.Ancestors(i);
-            FeeFrac anc_feerate;
-            // Reuse accumulated feerate from first ancestor, if usable.
-            Assume(anc_to_add.Any());
-            DepGraphIndex first = anc_to_add.First();
-            if (first < i) {
-                anc_feerate = m_ancestor_set_feerates[first];
-                Assume(!anc_feerate.IsEmpty());
-                anc_to_add -= m_depgraph.Ancestors(first);
-            }
-            // Add in other ancestors (which necessarily include i itself).
-            Assume(anc_to_add[i]);
-            anc_feerate += m_depgraph.FeeRate(anc_to_add);
-            // Store the result.
-            m_ancestor_set_feerates[i] = anc_feerate;
-        }
-    }
-
-    /** Remove a set of transactions from the set of to-be-linearized ones.
-     *
-     * The same transaction may not be MarkDone()'d twice.
-     *
-     * Complexity: O(N*M) where N=depgraph.TxCount(), M=select.Count().
-     */
-    void MarkDone(SetType select) noexcept
-    {
-        Assume(select.Any());
-        Assume(select.IsSubsetOf(m_todo));
-        m_todo -= select;
-        for (auto i : select) {
-            auto feerate = m_depgraph.FeeRate(i);
-            for (auto j : m_depgraph.Descendants(i) & m_todo) {
-                m_ancestor_set_feerates[j] -= feerate;
-            }
-        }
-    }
-
-    /** Check whether any unlinearized transactions remain. */
-    bool AllDone() const noexcept
-    {
-        return m_todo.None();
-    }
-
-    /** Count the number of remaining unlinearized transactions. */
-    DepGraphIndex NumRemaining() const noexcept
-    {
-        return m_todo.Count();
-    }
-
-    /** Find the best (highest-feerate, smallest among those in case of a tie) ancestor set
-     *  among the remaining transactions. Requires !AllDone().
-     *
-     * Complexity: O(N) where N=depgraph.TxCount();
-     */
-    SetInfo<SetType> FindCandidateSet() const noexcept
-    {
-        Assume(!AllDone());
-        std::optional<DepGraphIndex> best;
-        for (auto i : m_todo) {
-            if (best.has_value()) {
-                Assume(!m_ancestor_set_feerates[i].IsEmpty());
-                if (!(m_ancestor_set_feerates[i] > m_ancestor_set_feerates[*best])) continue;
-            }
-            best = i;
-        }
-        Assume(best.has_value());
-        return {m_depgraph.Ancestors(*best) & m_todo, m_ancestor_set_feerates[*best]};
-    }
-};
-
 /** Class to represent the internal state of the spanning-forest linearization (SFL) algorithm.
 *
 * At all times, each dependency is marked as either "active" or "inactive". The subset of active
@@ -1456,391 +1347,10 @@ public:
    }
 };

-
-/** Class encapsulating the state needed to perform search for good candidate sets.
- *
- * It is initialized for an entire DepGraph, and parts of the graph can be dropped by calling
- * MarkDone().
- *
- * As long as any part of the graph remains, FindCandidateSet() can be called to perform a search
- * over the set of topologically-valid subsets of that remainder, with a limit on how many
- * combinations are tried.
- */
-template<typename SetType>
-class SearchCandidateFinder
-{
-    /** Internal RNG. */
-    InsecureRandomContext m_rng;
-    /** m_sorted_to_original[i] is the original position that sorted transaction position i had. */
-    std::vector<DepGraphIndex> m_sorted_to_original;
-    /** m_original_to_sorted[i] is the sorted position original transaction position i has. */
-    std::vector<DepGraphIndex> m_original_to_sorted;
-    /** Internal dependency graph for the cluster (with transactions in decreasing individual
-     *  feerate order). */
-    DepGraph<SetType> m_sorted_depgraph;
-    /** Which transactions are left to do (indices in m_sorted_depgraph's order). */
-    SetType m_todo;
-
-    /** Given a set of transactions with sorted indices, get their original indices. */
-    SetType SortedToOriginal(const SetType& arg) const noexcept
-    {
-        SetType ret;
-        for (auto pos : arg) ret.Set(m_sorted_to_original[pos]);
-        return ret;
-    }
-
-    /** Given a set of transactions with original indices, get their sorted indices. */
-    SetType OriginalToSorted(const SetType& arg) const noexcept
-    {
-        SetType ret;
-        for (auto pos : arg) ret.Set(m_original_to_sorted[pos]);
-        return ret;
-    }
-
-public:
-    /** Construct a candidate finder for a graph.
-     *
-     * @param[in] depgraph   Dependency graph for the to-be-linearized cluster.
-     * @param[in] rng_seed   A random seed to control the search order.
-     *
-     * Complexity: O(N^2) where N=depgraph.Count().
-     */
-    SearchCandidateFinder(const DepGraph<SetType>& depgraph, uint64_t rng_seed) noexcept :
-        m_rng(rng_seed),
-        m_sorted_to_original(depgraph.TxCount()),
-        m_original_to_sorted(depgraph.PositionRange())
-    {
-        // Determine reordering mapping, by sorting by decreasing feerate. Unused positions are
-        // not included, as they will never be looked up anyway.
-        DepGraphIndex sorted_pos{0};
-        for (auto i : depgraph.Positions()) {
-            m_sorted_to_original[sorted_pos++] = i;
-        }
-        std::sort(m_sorted_to_original.begin(), m_sorted_to_original.end(), [&](auto a, auto b) {
-            auto feerate_cmp = depgraph.FeeRate(a) <=> depgraph.FeeRate(b);
-            if (feerate_cmp == 0) return a < b;
-            return feerate_cmp > 0;
-        });
-        // Compute reverse mapping.
-        for (DepGraphIndex i = 0; i < m_sorted_to_original.size(); ++i) {
-            m_original_to_sorted[m_sorted_to_original[i]] = i;
-        }
-        // Compute reordered dependency graph.
-        m_sorted_depgraph = DepGraph(depgraph, m_original_to_sorted, m_sorted_to_original.size());
-        m_todo = m_sorted_depgraph.Positions();
-    }
-
-    /** Check whether any unlinearized transactions remain. */
-    bool AllDone() const noexcept
-    {
-        return m_todo.None();
-    }
-
-    /** Find a high-feerate topologically-valid subset of what remains of the cluster.
-     *  Requires !AllDone().
-     *
-     * @param[in] max_iterations  The maximum number of optimization steps that will be performed.
-     * @param[in] best            A set/feerate pair with an already-known good candidate. This may
-     *                            be empty.
-     * @return                    A pair of:
-     *                            - The best (highest feerate, smallest size as tiebreaker)
-     *                              topologically valid subset (and its feerate) that was
-     *                              encountered during search. It will be at least as good as the
-     *                              best passed in (if not empty).
-     *                            - The number of optimization steps that were performed. This will
-     *                              be <= max_iterations. If strictly < max_iterations, the
-     *                              returned subset is optimal.
-     *
-     * Complexity: possibly O(N * min(max_iterations, sqrt(2^N))) where N=depgraph.TxCount().
-     */
-    std::pair<SetInfo<SetType>, uint64_t> FindCandidateSet(uint64_t max_iterations, SetInfo<SetType> best) noexcept
-    {
-        Assume(!AllDone());
-
-        // Convert the provided best to internal sorted indices.
-        best.transactions = OriginalToSorted(best.transactions);
-
-        /** Type for work queue items. */
-        struct WorkItem
-        {
-            /** Set of transactions definitely included (and its feerate). This must be a subset
-             *  of m_todo, and be topologically valid (includes all in-m_todo ancestors of
-             *  itself). */
-            SetInfo<SetType> inc;
-            /** Set of undecided transactions. This must be a subset of m_todo, and have no overlap
-             *  with inc. The set (inc | und) must be topologically valid. */
-            SetType und;
-            /** (Only when inc is not empty) The best feerate of any superset of inc that is also a
-             *  subset of (inc | und), without requiring it to be topologically valid. It forms a
-             *  conservative upper bound on how good a set this work item can give rise to.
-             *  Transactions whose feerate is below best's are ignored when determining this value,
-             *  which means it may technically be an underestimate, but if so, this work item
-             *  cannot result in something that beats best anyway. */
-            FeeFrac pot_feerate;
-
-            /** Construct a new work item. */
-            WorkItem(SetInfo<SetType>&& i, SetType&& u, FeeFrac&& p_f) noexcept :
-                inc(std::move(i)), und(std::move(u)), pot_feerate(std::move(p_f))
-            {
-                Assume(pot_feerate.IsEmpty() == inc.feerate.IsEmpty());
-            }
-
-            /** Swap two WorkItems. */
-            void Swap(WorkItem& other) noexcept
-            {
-                swap(inc, other.inc);
-                swap(und, other.und);
-                swap(pot_feerate, other.pot_feerate);
-            }
-        };
-
-        /** The queue of work items. */
-        VecDeque<WorkItem> queue;
-        queue.reserve(std::max<size_t>(256, 2 * m_todo.Count()));
-
-        // Create initial entries per connected component of m_todo. While clusters themselves are
-        // generally connected, this is not necessarily true after some parts have already been
-        // removed from m_todo. Without this, effort can be wasted on searching "inc" sets that
-        // span multiple components.
-        auto to_cover = m_todo;
-        do {
-            auto component = m_sorted_depgraph.FindConnectedComponent(to_cover);
-            to_cover -= component;
-            // If best is not provided, set it to the first component, so that during the work
-            // processing loop below, and during the add_fn/split_fn calls, we do not need to deal
-            // with the best=empty case.
-            if (best.feerate.IsEmpty()) best = SetInfo(m_sorted_depgraph, component);
-            queue.emplace_back(/*inc=*/SetInfo<SetType>{},
-                               /*und=*/std::move(component),
-                               /*pot_feerate=*/FeeFrac{});
-        } while (to_cover.Any());
-
-        /** Local copy of the iteration limit. */
-        uint64_t iterations_left = max_iterations;
-
-        /** The set of transactions in m_todo which have feerate > best's. */
-        SetType imp = m_todo;
-        while (imp.Any()) {
-            DepGraphIndex check = imp.Last();
-            if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break;
-            imp.Reset(check);
-        }
-
-        /** Internal function to add an item to the queue of elements to explore if there are any
-         *  transactions left to split on, possibly improving it before doing so, and to update
-         *  best/imp.
-         *
-         * - inc: the "inc" value for the new work item (must be topological).
-         * - und: the "und" value for the new work item ((inc | und) must be topological).
-         */
-        auto add_fn = [&](SetInfo<SetType> inc, SetType und) noexcept {
-            /** SetInfo object with the set whose feerate will become the new work item's
-             *  pot_feerate. It starts off equal to inc. */
-            auto pot = inc;
-            if (!inc.feerate.IsEmpty()) {
-                // Add entries to pot. We iterate over all undecided transactions whose feerate is
-                // higher than best. While undecided transactions of lower feerate may improve pot,
-                // the resulting pot feerate cannot possibly exceed best's (and this item will be
-                // skipped in split_fn anyway).
-                for (auto pos : imp & und) {
-                    // Determine if adding transaction pos to pot (ignoring topology) would improve
-                    // it. If not, we're done updating pot. This relies on the fact that
-                    // m_sorted_depgraph, and thus the transactions iterated over, are in decreasing
-                    // individual feerate order.
-                    if (!(m_sorted_depgraph.FeeRate(pos) >> pot.feerate)) break;
-                    pot.Set(m_sorted_depgraph, pos);
-                }
-
-                // The "jump ahead" optimization: whenever pot has a topologically-valid subset,
-                // that subset can be added to inc. Any subset of (pot - inc) has the property that
-                // its feerate exceeds that of any set compatible with this work item (superset of
-                // inc, subset of (inc | und)). Thus, if T is a topological subset of pot, and B is
-                // the best topologically-valid set compatible with this work item, and (T - B) is
-                // non-empty, then (T | B) is better than B and also topological. This is in
-                // contradiction with the assumption that B is best. Thus, (T - B) must be empty,
-                // or T must be a subset of B.
-                //
-                // See https://delvingbitcoin.org/t/how-to-linearize-your-cluster/303 section 2.4.
-                const auto init_inc = inc.transactions;
-                for (auto pos : pot.transactions - inc.transactions) {
-                    // If the transaction's ancestors are a subset of pot, we can add it together
-                    // with its ancestors to inc. Just update the transactions here; the feerate
-                    // update happens below.
-                    auto anc_todo = m_sorted_depgraph.Ancestors(pos) & m_todo;
-                    if (anc_todo.IsSubsetOf(pot.transactions)) inc.transactions |= anc_todo;
-                }
-                // Finally update und and inc's feerate to account for the added transactions.
-                und -= inc.transactions;
-                inc.feerate += m_sorted_depgraph.FeeRate(inc.transactions - init_inc);
-
-                // If inc's feerate is better than best's, remember it as our new best.
-                if (inc.feerate > best.feerate) {
-                    best = inc;
-                    // See if we can remove any entries from imp now.
-                    while (imp.Any()) {
-                        DepGraphIndex check = imp.Last();
-                        if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break;
-                        imp.Reset(check);
-                    }
-                }
-
-                // If no potential transactions exist beyond the already included ones, no
-                // improvement is possible anymore.
-                if (pot.feerate.size == inc.feerate.size) return;
-                // At this point und must be non-empty. If it were empty then pot would equal inc.
-                Assume(und.Any());
-            } else {
-                Assume(inc.transactions.None());
-                // If inc is empty, we just make sure there are undecided transactions left to
-                // split on.
-                if (und.None()) return;
-            }
-
-            // Actually construct a new work item on the queue. Due to the switch to DFS when queue
-            // space runs out (see below), we know that no reallocation of the queue should ever
-            // occur.
-            Assume(queue.size() < queue.capacity());
-            queue.emplace_back(/*inc=*/std::move(inc),
-                               /*und=*/std::move(und),
-                               /*pot_feerate=*/std::move(pot.feerate));
-        };
-
-        /** Internal process function. It takes an existing work item, and splits it in two: one
-         *  with a particular transaction (and its ancestors) included, and one with that
-         *  transaction (and its descendants) excluded. */
-        auto split_fn = [&](WorkItem&& elem) noexcept {
-            // Any queue element must have undecided transactions left, otherwise there is nothing
-            // to explore anymore.
-            Assume(elem.und.Any());
-            // The included and undecided set are all subsets of m_todo.
-            Assume(elem.inc.transactions.IsSubsetOf(m_todo) && elem.und.IsSubsetOf(m_todo));
-            // Included transactions cannot be undecided.
-            Assume(!elem.inc.transactions.Overlaps(elem.und));
-            // If pot is empty, then so is inc.
-            Assume(elem.inc.feerate.IsEmpty() == elem.pot_feerate.IsEmpty());
-
-            const DepGraphIndex first = elem.und.First();
-            if (!elem.inc.feerate.IsEmpty()) {
-                // If no undecided transactions remain with feerate higher than best, this entry
-                // cannot be improved beyond best.
-                if (!elem.und.Overlaps(imp)) return;
-                // We can ignore any queue item whose potential feerate isn't better than the best
-                // seen so far.
-                if (elem.pot_feerate <= best.feerate) return;
-            } else {
-                // In case inc is empty use a simpler alternative check.
-                if (m_sorted_depgraph.FeeRate(first) <= best.feerate) return;
-            }
-
-            // Decide which transaction to split on. Splitting is how new work items are added, and
-            // how progress is made. One split transaction is chosen among the queue item's
-            // undecided ones, and:
-            // - A work item is (potentially) added with that transaction plus its remaining
-            //   descendants excluded (removed from the und set).
-            // - A work item is (potentially) added with that transaction plus its remaining
-            //   ancestors included (added to the inc set).
-            //
-            // To decide what to split on, consider the undecided ancestors of the highest
-            // individual feerate undecided transaction. Pick the one which reduces the search space
-            // most. Let I(t) be the size of the undecided set after including t, and E(t) the size
-            // of the undecided set after excluding t. Then choose the split transaction t such
-            // that 2^I(t) + 2^E(t) is minimal, tie-breaking by highest individual feerate for t.
-            DepGraphIndex split = 0;
-            const auto select = elem.und & m_sorted_depgraph.Ancestors(first);
-            Assume(select.Any());
-            std::optional<std::pair<DepGraphIndex, DepGraphIndex>> split_counts;
-            for (auto t : select) {
-                // Call max = max(I(t), E(t)) and min = min(I(t), E(t)). Let counts = {max,min}.
-                // Sorting by the tuple counts is equivalent to sorting by 2^I(t) + 2^E(t). This
-                // expression is equal to 2^max + 2^min = 2^max * (1 + 1/2^(max - min)). The second
-                // factor (1 + 1/2^(max - min)) there is in (1,2]. Thus increasing max will always
-                // increase it, even when min decreases. Because of this, we can first sort by max.
-                std::pair<DepGraphIndex, DepGraphIndex> counts{
-                    (elem.und - m_sorted_depgraph.Ancestors(t)).Count(),
-                    (elem.und - m_sorted_depgraph.Descendants(t)).Count()};
-                if (counts.first < counts.second) std::swap(counts.first, counts.second);
-                // Remember the t with the lowest counts.
-                if (!split_counts.has_value() || counts < *split_counts) {
-                    split = t;
-                    split_counts = counts;
-                }
-            }
-            // Since there was at least one transaction in select, we must always find one.
-            Assume(split_counts.has_value());
-
-            // Add a work item corresponding to exclusion of the split transaction.
-            const auto& desc = m_sorted_depgraph.Descendants(split);
-            add_fn(/*inc=*/elem.inc,
-                   /*und=*/elem.und - desc);
-
-            // Add a work item corresponding to inclusion of the split transaction.
-            const auto anc = m_sorted_depgraph.Ancestors(split) & m_todo;
-            add_fn(/*inc=*/elem.inc.Add(m_sorted_depgraph, anc),
-                   /*und=*/elem.und - anc);
-
-            // Account for the performed split.
-            --iterations_left;
-        };
-
-        // Work processing loop.
-        //
-        // New work items are always added at the back of the queue, but items to process use a
-        // hybrid approach where they can be taken from the front or the back.
-        //
-        // Depth-first search (DFS) corresponds to always taking from the back of the queue. This
-        // is very memory-efficient (linear in the number of transactions). Breadth-first search
-        // (BFS) corresponds to always taking from the front, which potentially uses more memory
-        // (up to exponential in the transaction count), but seems to work better in practice.
-        //
-        // The approach here combines the two: use BFS (plus random swapping) until the queue grows
-        // too large, at which point we temporarily switch to DFS until the size shrinks again.
-        while (!queue.empty()) {
-            // Randomly swap the first two items to randomize the search order.
-            if (queue.size() > 1 && m_rng.randbool()) {
-                queue[0].Swap(queue[1]);
-            }
-
-            // Processing the first queue item, and then using DFS for everything it gives rise to,
-            // may increase the queue size by the number of undecided elements in there, minus 1
-            // for the first queue item being removed. Thus, only when that pushes the queue over
-            // its capacity can we not process from the front (BFS), and should we use DFS.
-            while (queue.size() - 1 + queue.front().und.Count() > queue.capacity()) {
-                if (!iterations_left) break;
-                auto elem = queue.back();
-                queue.pop_back();
-                split_fn(std::move(elem));
-            }
-
-            // Process one entry from the front of the queue (BFS exploration)
-            if (!iterations_left) break;
-            auto elem = queue.front();
-            queue.pop_front();
-            split_fn(std::move(elem));
-        }
-
-        // Return the found best set (converted to the original transaction indices), and the
-        // number of iterations performed.
-        best.transactions = SortedToOriginal(best.transactions);
-        return {std::move(best), max_iterations - iterations_left};
-    }
-
-    /** Remove a subset of transactions from the cluster being linearized.
-     *
-     * Complexity: O(N) where N=done.Count().
-     */
-    void MarkDone(const SetType& done) noexcept
-    {
-        const auto done_sorted = OriginalToSorted(done);
-        Assume(done_sorted.Any());
-        Assume(done_sorted.IsSubsetOf(m_todo));
-        m_todo -= done_sorted;
-    }
-};
-
 /** Find or improve a linearization for a cluster.
 *
 * @param[in] depgraph            Dependency graph of the cluster to be linearized.
- * @param[in] max_iterations      Upper bound on the number of optimization steps that will be done.
+ * @param[in] max_iterations      Upper bound on the amount of work that will be done.
 * @param[in] rng_seed            A random number seed to control search order. This prevents peers
 *                                from predicting exactly which clusters would be hard for us to
 *                                linearize.
@@ -1852,85 +1362,28 @@ public:
 *                                - A boolean indicating whether the result is guaranteed to be
 *                                  optimal.
 *                                - How many optimization steps were actually performed.
- *
- * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount().
 */
 template<typename SetType>
 std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
 {
-    Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount());
-    if (depgraph.TxCount() == 0) return {{}, true, 0};
-
-    uint64_t iterations_left = max_iterations;
-    std::vector<DepGraphIndex> linearization;
-
-    AncestorCandidateFinder anc_finder(depgraph);
-    std::optional<SearchCandidateFinder<SetType>> src_finder;
-    linearization.reserve(depgraph.TxCount());
-    bool optimal = true;
-
-    // Treat the initialization of SearchCandidateFinder as taking N^2/64 (rounded up) iterations
-    // (largely due to the cost of constructing the internal sorted-by-feerate DepGraph inside
-    // SearchCandidateFinder), a rough approximation based on benchmark. If we don't have that
-    // many, don't start it.
-    uint64_t start_iterations = (uint64_t{depgraph.TxCount()} * depgraph.TxCount() + 63) / 64;
-    if (iterations_left > start_iterations) {
-        iterations_left -= start_iterations;
-        src_finder.emplace(depgraph, rng_seed);
+    (void)rng_seed; // Unused for now.
+    /** Initialize a spanning forest data structure for this cluster. */
+    SpanningForestState forest(depgraph);
+    if (!old_linearization.empty()) {
+        forest.LoadLinearization(old_linearization);
+    } else {
+        forest.MakeTopological();
    }
-
-    /** Chunking of what remains of the old linearization. */
-    LinearizationChunking old_chunking(depgraph, old_linearization);
-
-    while (true) {
-        // Find the highest-feerate prefix of the remainder of old_linearization.
-        SetInfo<SetType> best_prefix;
-        if (old_chunking.NumChunksLeft()) best_prefix = old_chunking.GetChunk(0);
-
-        // Then initialize best to be either the best remaining ancestor set, or the first chunk.
-        auto best = anc_finder.FindCandidateSet();
-        if (!best_prefix.feerate.IsEmpty() && best_prefix.feerate >= best.feerate) best = best_prefix;
-
-        uint64_t iterations_done_now = 0;
-        uint64_t max_iterations_now = 0;
-        if (src_finder) {
-            // Treat the invocation of SearchCandidateFinder::FindCandidateSet() as costing N/4
-            // up-front (rounded up) iterations (largely due to the cost of connected-component
-            // splitting), a rough approximation based on benchmarks.
-            uint64_t base_iterations = (anc_finder.NumRemaining() + 3) / 4;
-            if (iterations_left > base_iterations) {
-                // Invoke bounded search to update best, with up to half of our remaining
-                // iterations as limit.
-                iterations_left -= base_iterations;
-                max_iterations_now = (iterations_left + 1) / 2;
-                std::tie(best, iterations_done_now) = src_finder->FindCandidateSet(max_iterations_now, best);
-                iterations_left -= iterations_done_now;
-            }
-        }
-
-        if (iterations_done_now == max_iterations_now) {
-            optimal = false;
-            // If the search result is not (guaranteed to be) optimal, run intersections to make
-            // sure we don't pick something that makes us unable to reach further diagram points
-            // of the old linearization.
-            if (old_chunking.NumChunksLeft() > 0) {
-                best = old_chunking.IntersectPrefixes(best);
-            }
-        }
-
-        // Add to output in topological order.
-        depgraph.AppendTopo(linearization, best.transactions);
-
-        // Update state to reflect best is no longer to be linearized.
-        anc_finder.MarkDone(best.transactions);
-        if (anc_finder.AllDone()) break;
-        if (src_finder) src_finder->MarkDone(best.transactions);
-        if (old_chunking.NumChunksLeft() > 0) {
-            old_chunking.MarkDone(best.transactions);
+    // Make improvement steps to it until we hit the max_iterations limit, or an optimal result
+    // is found.
+    bool optimal = false;
+    while (forest.GetCost() < max_iterations) {
+        if (!forest.OptimizeStep()) {
+            optimal = true;
+            break;
        }
    }
-
-    return {std::move(linearization), optimal, max_iterations - iterations_left};
+    return {forest.GetLinearization(), optimal, forest.GetCost()};
 }

 /** Improve a given linearization.
--- a/src/test/cluster_linearize_tests.cpp
+++ b/src/test/cluster_linearize_tests.cpp
@@ -88,8 +88,10 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
            SanityCheck(depgraph, lin);
            auto chunking = ChunkLinearization(depgraph, lin);
            BOOST_CHECK(std::is_eq(CompareChunks(chunking, optimal_diagram)));
-            // Verify that the chunks are minimal.
-            BOOST_CHECK(chunking.size() == optimal_diagram.size());
+            // TODO: temporarily disabled; SFL does not guarantee minimal chunks. This will be
+            // reinstated in a future commit.
+            // // Verify that the chunks are minimal.
+            // BOOST_CHECK(chunking.size() == optimal_diagram.size());
        }
        tx_count = depgraph.PositionRange();
    };
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -24,29 +24,22 @@
 *          possibly by comparison with other implementations (at the end of the line ->).
 *   <<---: The right side is implemented using the left side.
 *
- *   +-----------------------+
- *   | SearchCandidateFinder | <<---------------------\
- *   +-----------------------+                        |
- *     |                                            +-----------+       +---------------------+
- *     |                                            | Linearize |       | SpanningForestState |
- *     |                                            +-----------+       +---------------------+
- *     |        +-------------------------+           |  |                              |
- *     |        | AncestorCandidateFinder | <<--------/  |                              |
- *     |        +-------------------------+              |                              |
- *     |          |                     ^                |        ^^  PRODUCTION CODE   |
- *     |          |                     |                |        ||                    |
+ *   +---------------------+                        +-----------+
+ *   | SpanningForestState | <<-------------------- | Linearize |
+ *   +---------------------+                        +-----------+
+ *               |                                       |
+ *               |                                       |        ^^  PRODUCTION CODE
+ *               |                                       |        ||
 *  ==============================================================================================
- *     |          |                     |                |        ||                    |
- *     | clusterlin_ancestor_finder*    |                |        vv  TEST CODE         |
- *     |                                |                |                              |
- *     |-clusterlin_search_finder*      |                |-clusterlin_linearize*        |
- *     |                                |                |                              |
- *     v                                |                v              clusterlin_sfl--|
- *   +-----------------------+          |           +-----------------+                 |
- *   | SimpleCandidateFinder | <<-------------------| SimpleLinearize |<----------------/
- *   +-----------------------+          |           +-----------------+
- *                  |                   |                |
- *                  +-------------------/                |
+ *               |                                       |        ||
+ *               |-clusterlin_sfl*                       |        vv  TEST CODE
+ *               |                                       |
+ *               \------------------------------------\  |-clusterlin_linearize*
+ *                                                    |  |
+ *                                                    v  v
+ *   +-----------------------+                      +-----------------+
+ *   | SimpleCandidateFinder | <<-------------------| SimpleLinearize |
+ *   +-----------------------+                      +-----------------+
 *                  |                                    |
 *                  |-clusterlin_simple_finder*          |-clusterlin_simple_linearize*
 *                  v                                    v
@@ -78,11 +71,8 @@ using namespace cluster_linearize;

 namespace {

-/** A simple finder class for candidate sets.
- *
- * This class matches SearchCandidateFinder in interface and behavior, though with fewer
- * optimizations.
- */
+/** A simple finder class for candidate sets (topologically-valid subsets with high feerate), only
+ *  used by SimpleLinearize below. */
 template<typename SetType>
 class SimpleCandidateFinder
 {
@@ -153,7 +143,8 @@ public:
 /** A very simple finder class for optimal candidate sets, which tries every subset.
 *
 * It is even simpler than SimpleCandidateFinder, and exists just to help test the correctness of
- * SimpleCandidateFinder, which is then used to test the correctness of SearchCandidateFinder.
+ * SimpleCandidateFinder, so that it can be used in SimpleLinearize, which is then used to test the
+ * correctness of Linearize.
 */
 template<typename SetType>
 class ExhaustiveCandidateFinder
@@ -204,8 +195,8 @@ public:
 /** A simple linearization algorithm.
 *
 * This matches Linearize() in interface and behavior, though with fewer optimizations, lacking
- * the ability to pass in an existing linearization, and using just SimpleCandidateFinder rather
- * than AncestorCandidateFinder and SearchCandidateFinder.
+ * the ability to pass in an existing linearization, and linearizing by simply finding the
+ * consecutive remaining highest-feerate topological subset using SimpleCandidateFinder.
 */
 template<typename SetType>
 std::pair<std::vector<DepGraphIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations)
@@ -766,68 +757,17 @@ FUZZ_TARGET(clusterlin_chunking)
    assert(todo.None());
 }

-FUZZ_TARGET(clusterlin_ancestor_finder)
-{
-    // Verify that AncestorCandidateFinder works as expected.
-
-    // Retrieve a depgraph from the fuzz input.
-    SpanReader reader(buffer);
-    DepGraph<TestBitSet> depgraph;
-    try {
-        reader >> Using<DepGraphFormatter>(depgraph);
-    } catch (const std::ios_base::failure&) {}
-
-    AncestorCandidateFinder anc_finder(depgraph);
-    auto todo = depgraph.Positions();
-    while (todo.Any()) {
-        // Call the ancestor finder's FindCandidateSet for what remains of the graph.
-        assert(!anc_finder.AllDone());
-        assert(todo.Count() == anc_finder.NumRemaining());
-        auto best_anc = anc_finder.FindCandidateSet();
-        // Sanity check the result.
-        assert(best_anc.transactions.Any());
-        assert(best_anc.transactions.IsSubsetOf(todo));
-        assert(depgraph.FeeRate(best_anc.transactions) == best_anc.feerate);
-        assert(depgraph.IsConnected(best_anc.transactions));
-        // Check that it is topologically valid.
-        for (auto i : best_anc.transactions) {
-            assert((depgraph.Ancestors(i) & todo).IsSubsetOf(best_anc.transactions));
-        }
-
-        // Compute all remaining ancestor sets.
-        std::optional<SetInfo<TestBitSet>> real_best_anc;
-        for (auto i : todo) {
-            SetInfo info(depgraph, todo & depgraph.Ancestors(i));
-            if (!real_best_anc.has_value() || info.feerate > real_best_anc->feerate) {
-                real_best_anc = info;
-            }
-        }
-        // The set returned by anc_finder must equal the real best ancestor sets.
-        assert(real_best_anc.has_value());
-        assert(*real_best_anc == best_anc);
-
-        // Find a non-empty topologically valid subset of transactions to remove from the graph.
-        // Using an empty set would mean the next iteration is identical to the current one, and
-        // could cause an infinite loop.
-        auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
-        todo -= del_set;
-        anc_finder.MarkDone(del_set);
-    }
-    assert(anc_finder.AllDone());
-    assert(anc_finder.NumRemaining() == 0);
-}
-
 static constexpr auto MAX_SIMPLE_ITERATIONS = 300000;

 FUZZ_TARGET(clusterlin_simple_finder)
 {
    // Verify that SimpleCandidateFinder works as expected by sanity checking the results
    // and comparing them (if claimed to be optimal) against the sets found by
-    // ExhaustiveCandidateFinder and AncestorCandidateFinder.
+    // ExhaustiveCandidateFinder.
    //
    // Note that SimpleCandidateFinder is only used in tests; the purpose of this fuzz test is to
-    // establish confidence in SimpleCandidateFinder, so that it can be used to test
-    // SearchCandidateFinder below.
+    // establish confidence in SimpleCandidateFinder, so that it can be used in SimpleLinearize,
+    // which is then used to test Linearize below.

    // Retrieve a depgraph from the fuzz input.
    SpanReader reader(buffer);
@@ -836,18 +776,15 @@ FUZZ_TARGET(clusterlin_simple_finder)
        reader >> Using<DepGraphFormatter>(depgraph);
    } catch (const std::ios_base::failure&) {}

-    // Instantiate the SimpleCandidateFinder to be tested, and the ExhaustiveCandidateFinder and
-    // AncestorCandidateFinder it is being tested against.
+    // Instantiate the SimpleCandidateFinder to be tested, and the ExhaustiveCandidateFinder it is
+    // being tested against.
    SimpleCandidateFinder smp_finder(depgraph);
    ExhaustiveCandidateFinder exh_finder(depgraph);
-    AncestorCandidateFinder anc_finder(depgraph);

    auto todo = depgraph.Positions();
    while (todo.Any()) {
        assert(!smp_finder.AllDone());
        assert(!exh_finder.AllDone());
-        assert(!anc_finder.AllDone());
-        assert(anc_finder.NumRemaining() == todo.Count());

        // Call SimpleCandidateFinder.
        auto [found, iterations_done] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
@@ -874,10 +811,6 @@ FUZZ_TARGET(clusterlin_simple_finder)

        // Perform further quality checks only if SimpleCandidateFinder claims an optimal result.
        if (optimal) {
-            // Compare with AncestorCandidateFinder.
-            auto anc = anc_finder.FindCandidateSet();
-            assert(anc.feerate <= found.feerate);
-
            if (todo.Count() <= 12) {
                // Compare with ExhaustiveCandidateFinder. This quickly gets computationally
                // expensive for large clusters (O(2^n)), so only do it for sufficiently small ones.
@@ -898,119 +831,10 @@ FUZZ_TARGET(clusterlin_simple_finder)
        todo -= del_set;
        smp_finder.MarkDone(del_set);
        exh_finder.MarkDone(del_set);
-        anc_finder.MarkDone(del_set);
    }

    assert(smp_finder.AllDone());
    assert(exh_finder.AllDone());
-    assert(anc_finder.AllDone());
-    assert(anc_finder.NumRemaining() == 0);
-}
-
-FUZZ_TARGET(clusterlin_search_finder)
-{
-    // Verify that SearchCandidateFinder works as expected by sanity checking the results
-    // and comparing with the results from SimpleCandidateFinder and AncestorCandidateFinder,
-    // if the result is claimed to be optimal.
-
-    // Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input.
-    SpanReader reader(buffer);
-    DepGraph<TestBitSet> depgraph;
-    uint64_t rng_seed{0};
-    uint8_t make_connected{1};
-    try {
-        reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
-    } catch (const std::ios_base::failure&) {}
-    // The most complicated graphs are connected ones (other ones just split up). Optionally force
-    // the graph to be connected.
-    if (make_connected) MakeConnected(depgraph);
-
-    // Instantiate the candidate finders.
-    SearchCandidateFinder src_finder(depgraph, rng_seed);
-    SimpleCandidateFinder smp_finder(depgraph);
-    AncestorCandidateFinder anc_finder(depgraph);
-
-    auto todo = depgraph.Positions();
-    while (todo.Any()) {
-        assert(!src_finder.AllDone());
-        assert(!smp_finder.AllDone());
-        assert(!anc_finder.AllDone());
-        assert(anc_finder.NumRemaining() == todo.Count());
-
-        // For each iteration, read an iteration count limit from the fuzz input.
-        uint64_t max_iterations = 1;
-        try {
-            reader >> VARINT(max_iterations);
-        } catch (const std::ios_base::failure&) {}
-        max_iterations &= 0xfffff;
-
-        // Read an initial subset from the fuzz input (allowed to be empty).
-        auto init_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false);
-        SetInfo init_best(depgraph, init_set);
-
-        // Call the search finder's FindCandidateSet for what remains of the graph.
-        auto [found, iterations_done] = src_finder.FindCandidateSet(max_iterations, init_best);
-        bool optimal = iterations_done < max_iterations;
-
-        // Sanity check the result.
-        assert(iterations_done <= max_iterations);
-        assert(found.transactions.Any());
-        assert(found.transactions.IsSubsetOf(todo));
-        assert(depgraph.FeeRate(found.transactions) == found.feerate);
-        if (!init_best.feerate.IsEmpty()) assert(found.feerate >= init_best.feerate);
-        // Check that it is topologically valid.
-        for (auto i : found.transactions) {
-            assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo));
-        }
-
-        // At most 2^(N-1) iterations can be required: the maximum number of non-empty topological
-        // subsets a (connected) cluster with N transactions can have. Even when the cluster is no
-        // longer connected after removing certain transactions, this holds, because the connected
-        // components are searched separately.
-        assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
-        // Additionally, test that no more than sqrt(2^N)+1 iterations are required. This is just
-        // an empirical bound that seems to hold, without proof. Still, add a test for it so we
-        // can learn about counterexamples if they exist.
-        if (iterations_done >= 1 && todo.Count() <= 63) {
-            Assume((iterations_done - 1) * (iterations_done - 1) <= uint64_t{1} << todo.Count());
-        }
-
-        // Perform quality checks only if SearchCandidateFinder claims an optimal result.
-        if (optimal) {
-            // Optimal sets are always connected.
-            assert(depgraph.IsConnected(found.transactions));
-
-            // Compare with SimpleCandidateFinder.
-            auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
-            assert(found.feerate >= simple.feerate);
-            if (simple_iters < MAX_SIMPLE_ITERATIONS) {
-                assert(found.feerate == simple.feerate);
-            }
-
-            // Compare with AncestorCandidateFinder;
-            auto anc = anc_finder.FindCandidateSet();
-            assert(found.feerate >= anc.feerate);
-
-            // Compare with a non-empty topological set read from the fuzz input (comparing with an
-            // empty set is not interesting).
-            auto read_topo = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
-            assert(found.feerate >= depgraph.FeeRate(read_topo));
-        }
-
-        // Find a non-empty topologically valid subset of transactions to remove from the graph.
-        // Using an empty set would mean the next iteration is identical to the current one, and
-        // could cause an infinite loop.
-        auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
-        todo -= del_set;
-        src_finder.MarkDone(del_set);
-        smp_finder.MarkDone(del_set);
-        anc_finder.MarkDone(del_set);
-    }
-
-    assert(src_finder.AllDone());
-    assert(smp_finder.AllDone());
-    assert(anc_finder.AllDone());
-    assert(anc_finder.NumRemaining() == 0);
 }

 FUZZ_TARGET(clusterlin_linearization_chunking)
@@ -1250,6 +1074,10 @@ FUZZ_TARGET(clusterlin_sfl)
    }
    test_fn(/*is_optimal=*/true);

+    // Verify that optimality is reached within an expected amount of work. This protects against
+    // hypothetical bugs that hugely increase the amount of work needed to reach optimality.
+    assert(sfl.GetCost() <= MaxOptimalLinearizationIters(depgraph.TxCount()));
+
    // The result must be as good as SimpleLinearize.
    auto [simple_linearization, simple_optimal] = SimpleLinearize(depgraph, MAX_SIMPLE_ITERATIONS / 10);
    auto simple_diagram = ChunkLinearization(depgraph, simple_linearization);
@@ -1301,7 +1129,6 @@ FUZZ_TARGET(clusterlin_linearize)
    // Invoke Linearize().
    iter_count &= 0x7ffff;
    auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
-    assert(cost <= iter_count);
    SanityCheck(depgraph, linearization);
    auto chunking = ChunkLinearization(depgraph, linearization);

@@ -1313,7 +1140,7 @@ FUZZ_TARGET(clusterlin_linearize)
    }

    // If the iteration count is sufficiently high, an optimal linearization must be found.
-    if (iter_count >= MaxOptimalLinearizationIters(depgraph.TxCount())) {
+    if (iter_count > MaxOptimalLinearizationIters(depgraph.TxCount())) {
        assert(optimal);
    }

@@ -1328,9 +1155,13 @@ FUZZ_TARGET(clusterlin_linearize)
        // If SimpleLinearize finds the optimal result too, they must be equal (if not,
        // SimpleLinearize is broken).
        if (simple_optimal) assert(cmp == 0);
-        // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
-        // chunking is claimed to be optimal, which implies minimal chunks).
-        if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
+
+        // Temporarily disabled, as Linearize() currently does not guarantee minimal chunks, even
+        // when it reports an optimal result. This will be re-introduced in a later commit.
+        //
+        // // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
+        // // chunking is claimed to be optimal, which implies minimal chunks).
+        // if (cmp == 0) assert(chunking.size() >= simple_chunking.size());

        // Compare with a linearization read from the fuzz input.
        auto read = ReadLinearization(depgraph, reader);
--- a/src/test/util/cluster_linearize.h
+++ b/src/test/util/cluster_linearize.h
@@ -396,25 +396,24 @@ void SanityCheck(const DepGraph<SetType>& depgraph, std::span<const DepGraphInde

 inline uint64_t MaxOptimalLinearizationIters(DepGraphIndex cluster_count)
 {
-    // We assume sqrt(2^k)+1 candidate-finding iterations per candidate to be found, plus ceil(k/4)
-    // startup cost when up to k unlinearization transactions remain, plus ceil(n^2/64) overall
-    // startup cost in Linearize. Thus, we can compute the upper bound for a whole linearization
-    // (summing for k=1..n) using the Python expression:
-    //
-    //   [sum((k+3)//4 + math.isqrt(2**k) + 1 for k in range(1, n + 1)) + (n**2 + 63) // 64 for n in range(0, 65)]
-    //
-    // Note that these are just assumptions, as the proven upper bound grows with 2^k, not
-    // sqrt(2^k).
-    static constexpr uint64_t MAX_OPTIMAL_ITERS[65] = {
-        0, 4, 8, 12, 18, 26, 37, 51, 70, 97, 133, 182, 251, 346, 480, 666, 927, 1296, 1815, 2545,
-        3576, 5031, 7087, 9991, 14094, 19895, 28096, 39690, 56083, 79263, 112041, 158391, 223936,
-        316629, 447712, 633086, 895241, 1265980, 1790280, 2531747, 3580335, 5063259, 7160424,
-        10126257, 14320575, 20252230, 28640853, 40504150, 57281380, 81007962, 114562410, 162015557,
-        229124437, 324030718, 458248463, 648061011, 916496483, 1296121563, 1832992493, 2592242635,
-        3665984477, 5184484745, 7331968412, 10368968930, 14663936244
+    // These are the largest numbers seen returned as cost by Linearize(), in a large randomized
+    // trial. There exist almost certainly far worse cases, but they are unlikely to be
+    // encountered in randomized tests. The purpose of these numbers is guaranteeing that for
+    // *some* reasonable cost bound, optimal linearizations are always found.
+    static constexpr uint64_t ITERS[65] = {
+        0,
+        0, 2, 8, 21, 51, 99, 162, 208,
+        300, 349, 489, 627, 776, 867, 982, 1204,
+        1414, 1473, 1770, 2045, 2391, 2417, 3669, 3953,
+        3816, 5717, 4096, 5933, 5225, 5684, 6205, 6407,
+        7671, 12044, 11799, 9577, 9631, 10819, 12277, 15250,
+        18609, 14439, 22283, 16461, 22887, 20641, 22009, 22053,
+        27068, 22173, 31066, 30848, 31841, 37174, 39701, 35666,
+        42728, 43679, 45719, 40217, 51395, 57796, 72739, 60079
    };
-    assert(cluster_count < sizeof(MAX_OPTIMAL_ITERS) / sizeof(MAX_OPTIMAL_ITERS[0]));
-    return MAX_OPTIMAL_ITERS[cluster_count];
+    assert(cluster_count < std::size(ITERS));
+    // Multiply the table number by two, to account for the fact that they are not absolutes.
+    return ITERS[cluster_count] * 2;
 }

 } // namespace
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@@ -2091,9 +2091,9 @@ std::pair<uint64_t, bool> GenericClusterImpl::Relinearize(TxGraphImpl& graph, in
    // Invoke the actual linearization algorithm (passing in the existing one).
    uint64_t rng_seed = graph.m_rng.rand64();
    auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization);
-    // Postlinearize if the result isn't optimal already. This guarantees (among other things)
-    // that the chunks of the resulting linearization are all connected.
-    if (!optimal) PostLinearize(m_depgraph, linearization);
+    // Postlinearize to guarantee that the chunks of the resulting linearization are all connected.
+    // (SFL currently does not guarantee connected chunks even when optimal).
+    PostLinearize(m_depgraph, linearization);
    // Update the linearization.
    m_linearization = std::move(linearization);
    // Update the Cluster's quality.
--- a/test/functional/mempool_packages.py
+++ b/test/functional/mempool_packages.py
@@ -239,8 +239,9 @@ class MempoolPackagesTest(BitcoinTestFramework):
        self.generate(self.nodes[0], 1)
        self.trigger_reorg(fork_blocks, self.nodes[0])

-        # Check if the txs are returned to the mempool
-        assert_equal(self.nodes[0].getrawmempool(), mempool0)
+        # Check if the txs are returned to the mempool (though the transaction ordering may
+        # change as it is non-deterministic).
+        assert_equal(set(self.nodes[0].getrawmempool()), set(mempool0))

        # Clean-up the mempool
        self.generate(self.nodes[0], 1)