From 77a432ee704b4a83d56135ed10cf2adf4b3c18af Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 25 Sep 2024 19:34:47 -0400
Subject: [PATCH 01/11] clusterlin tests: count SimpleCandidateFinder
 iterations better

Only count the number of actual new subsets added. If the queue contains
a work item that completely covers a component, no transaction can be added
to it without creating a disconnected component. In this case, also don't
count it as an iteration.

With this, the number of iterations performed by SimpleCandidateFinder is
bounded by the number of distinct connected topologically-valid subsets of
the cluster.
---
 src/test/fuzz/cluster_linearize.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 6a1ea40aef3..8d0249a5d73 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -64,7 +64,6 @@ public:
         SetInfo best(m_depgraph, m_todo);
         // Process the queue.
         while (!queue.empty() && iterations_left) {
-            --iterations_left;
             // Pop top element of the queue.
             auto [inc, und] = queue.back();
             queue.pop_back();
@@ -75,6 +74,7 @@ public:
                 // transactions that share ancestry with inc so far (which means only connected
                 // sets will be considered).
                 if (inc_none || inc.Overlaps(m_depgraph.Ancestors(split))) {
+                    --iterations_left;
                     // Add a queue entry with split included.
                     SetInfo new_inc(m_depgraph, inc | (m_todo & m_depgraph.Ancestors(split)));
                     queue.emplace_back(new_inc.transactions, und - new_inc.transactions);

From a38c38951e10a61af80e3bca1e1ae04de978d5c0 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 7 Aug 2024 13:40:47 -0400
Subject: [PATCH 02/11] clusterlin tests: separate testing of Search- and
 SimpleCandidateFinder

This separates the existing fuzz test into:

* clusterlin_search_finder: establishes SearchCandidateFinder's correctness using the
                            simpler SimpleCandidateFinder.
* clusterlin_simple_finder: establishes SimpleCandidateFinder's correctness using the
                            (even) simpler ExhaustiveCandidateFinder.

rather than trying to do both at once.
---
 src/test/fuzz/cluster_linearize.cpp | 114 ++++++++++++++++++++++------
 1 file changed, 90 insertions(+), 24 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 8d0249a5d73..22312c8c965 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -92,9 +92,8 @@ public:
 
 /** A very simple finder class for optimal candidate sets, which tries every subset.
  *
- * It is even simpler than SimpleCandidateFinder, and is primarily included here to test the
- * correctness of SimpleCandidateFinder, which is then used to test the correctness of
- * SearchCandidateFinder.
+ * It is even simpler than SimpleCandidateFinder, and exists just to help test the correctness of
+ * SimpleCandidateFinder, which is then used to test the correctness of SearchCandidateFinder.
  */
 template<typename SetType>
 class ExhaustiveCandidateFinder
@@ -640,11 +639,94 @@ FUZZ_TARGET(clusterlin_ancestor_finder)
 
 static constexpr auto MAX_SIMPLE_ITERATIONS = 300000;
 
+FUZZ_TARGET(clusterlin_simple_finder)
+{
+    // Verify that SimpleCandidateFinder works as expected by sanity checking the results
+    // and comparing them (if claimed to be optimal) against the sets found by
+    // ExhaustiveCandidateFinder and AncestorCandidateFinder.
+    //
+    // Note that SimpleCandidateFinder is only used in tests; the purpose of this fuzz test is to
+    // establish confidence in SimpleCandidateFinder, so that it can be used to test
+    // SearchCandidateFinder below.
+
+    // Retrieve a depgraph from the fuzz input.
+    SpanReader reader(buffer);
+    DepGraph<TestBitSet> depgraph;
+    try {
+        reader >> Using<DepGraphFormatter>(depgraph);
+    } catch (const std::ios_base::failure&) {}
+
+    // Instantiate the SimpleCandidateFinder to be tested, and the ExhaustiveCandidateFinder and
+    // AncestorCandidateFinder it is being tested against.
+    SimpleCandidateFinder smp_finder(depgraph);
+    ExhaustiveCandidateFinder exh_finder(depgraph);
+    AncestorCandidateFinder anc_finder(depgraph);
+
+    auto todo = depgraph.Positions();
+    while (todo.Any()) {
+        assert(!smp_finder.AllDone());
+        assert(!exh_finder.AllDone());
+        assert(!anc_finder.AllDone());
+        assert(anc_finder.NumRemaining() == todo.Count());
+
+        // Call SimpleCandidateFinder.
+        auto [found, iterations_done] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
+        bool optimal = (iterations_done != MAX_SIMPLE_ITERATIONS);
+
+        // Sanity check the result.
+        assert(iterations_done <= MAX_SIMPLE_ITERATIONS);
+        assert(found.transactions.Any());
+        assert(found.transactions.IsSubsetOf(todo));
+        assert(depgraph.FeeRate(found.transactions) == found.feerate);
+        // Check that it is topologically valid.
+        for (auto i : found.transactions) {
+            assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo));
+        }
+
+        // At most 2^(N-1) iterations can be required: the number of non-empty connected subsets a
+        // graph with N transactions can have. If MAX_SIMPLE_ITERATIONS exceeds this number, the
+        // result is necessarily optimal.
+        assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
+        if (MAX_SIMPLE_ITERATIONS > (uint64_t{1} << (todo.Count() - 1))) assert(optimal);
+
+        // Perform quality checks only if SimpleCandidateFinder claims an optimal result.
+        if (optimal) {
+            // Optimal sets are always connected.
+            assert(depgraph.IsConnected(found.transactions));
+
+            // Compare with AncestorCandidateFinder.
+            auto anc = anc_finder.FindCandidateSet();
+            assert(anc.feerate <= found.feerate);
+
+            if (todo.Count() <= 12) {
+                // Compare with ExhaustiveCandidateFinder. This quickly gets computationally
+                // expensive for large clusters (O(2^n)), so only do it for sufficiently small ones.
+                auto exhaustive = exh_finder.FindCandidateSet();
+                assert(exhaustive.feerate == found.feerate);
+            }
+        }
+
+        // Find a topologically valid subset of transactions to remove from the graph.
+        auto del_set = ReadTopologicalSet(depgraph, todo, reader);
+        // If we did not find anything, use found itself, because we should remove something.
+        if (del_set.None()) del_set = found.transactions;
+        todo -= del_set;
+        smp_finder.MarkDone(del_set);
+        exh_finder.MarkDone(del_set);
+        anc_finder.MarkDone(del_set);
+    }
+
+    assert(smp_finder.AllDone());
+    assert(exh_finder.AllDone());
+    assert(anc_finder.AllDone());
+    assert(anc_finder.NumRemaining() == 0);
+}
+
 FUZZ_TARGET(clusterlin_search_finder)
 {
     // Verify that SearchCandidateFinder works as expected by sanity checking the results
-    // and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and
-    // AncestorCandidateFinder.
+    // and comparing with the results from SimpleCandidateFinder and AncestorCandidateFinder,
+    // if the result is claimed to be optimal.
 
     // Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input.
     SpanReader reader(buffer);
@@ -658,17 +740,15 @@ FUZZ_TARGET(clusterlin_search_finder)
     // the graph to be connected.
     if (make_connected) MakeConnected(depgraph);
 
-    // Instantiate ALL the candidate finders.
+    // Instantiate the candidate finders.
     SearchCandidateFinder src_finder(depgraph, rng_seed);
     SimpleCandidateFinder smp_finder(depgraph);
-    ExhaustiveCandidateFinder exh_finder(depgraph);
     AncestorCandidateFinder anc_finder(depgraph);
 
     auto todo = depgraph.Positions();
     while (todo.Any()) {
         assert(!src_finder.AllDone());
         assert(!smp_finder.AllDone());
-        assert(!exh_finder.AllDone());
         assert(!anc_finder.AllDone());
         assert(anc_finder.NumRemaining() == todo.Count());
 
@@ -684,6 +764,7 @@ FUZZ_TARGET(clusterlin_search_finder)
 
         // Call the search finder's FindCandidateSet for what remains of the graph.
         auto [found, iterations_done] = src_finder.FindCandidateSet(max_iterations, init_best);
+        bool optimal = iterations_done < max_iterations;
 
         // Sanity check the result.
         assert(iterations_done <= max_iterations);
@@ -709,7 +790,7 @@ FUZZ_TARGET(clusterlin_search_finder)
         }
 
         // Perform quality checks only if SearchCandidateFinder claims an optimal result.
-        if (iterations_done < max_iterations) {
+        if (optimal) {
             // Optimal sets are always connected.
             assert(depgraph.IsConnected(found.transactions));
 
@@ -723,19 +804,6 @@ FUZZ_TARGET(clusterlin_search_finder)
             // Compare with AncestorCandidateFinder;
             auto anc = anc_finder.FindCandidateSet();
             assert(found.feerate >= anc.feerate);
-
-            // Compare with ExhaustiveCandidateFinder. This quickly gets computationally expensive
-            // for large clusters (O(2^n)), so only do it for sufficiently small ones.
-            if (todo.Count() <= 12) {
-                auto exhaustive = exh_finder.FindCandidateSet();
-                assert(exhaustive.feerate == found.feerate);
-                // Also compare ExhaustiveCandidateFinder with SimpleCandidateFinder (this is
-                // primarily a test for SimpleCandidateFinder's correctness).
-                assert(exhaustive.feerate >= simple.feerate);
-                if (simple_iters < MAX_SIMPLE_ITERATIONS) {
-                    assert(exhaustive.feerate == simple.feerate);
-                }
-            }
         }
 
         // Find a topologically valid subset of transactions to remove from the graph.
@@ -745,13 +813,11 @@ FUZZ_TARGET(clusterlin_search_finder)
         todo -= del_set;
         src_finder.MarkDone(del_set);
         smp_finder.MarkDone(del_set);
-        exh_finder.MarkDone(del_set);
         anc_finder.MarkDone(del_set);
     }
 
     assert(src_finder.AllDone());
     assert(smp_finder.AllDone());
-    assert(exh_finder.AllDone());
     assert(anc_finder.AllDone());
     assert(anc_finder.NumRemaining() == 0);
 }

From 10e90f7aef9cff6fa63dc15adbe22e68f76aac58 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Tue, 10 Jun 2025 17:32:48 -0400
Subject: [PATCH 03/11] clusterlin tests: make SimpleCandidateFinder always
 find connected

Make a small change to guarantee that SimpleCandidateFinder only ever returns
connected solutions, even when non-optimal. Then test this property.
---
 src/test/fuzz/cluster_linearize.cpp | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 22312c8c965..691c1b7653c 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -48,6 +48,8 @@ public:
     /** Find a candidate set using at most max_iterations iterations, and the number of iterations
      *  actually performed. If that number is less than max_iterations, then the result is optimal.
      *
+     * Always returns a connected set of transactions.
+     *
      * Complexity: O(N * M), where M is the number of connected topological subsets of the cluster.
      *             That number is bounded by M <= 2^(N-1).
      */
@@ -60,8 +62,9 @@ public:
         std::vector<std::pair<SetType, SetType>> queue;
         // Initially we have just one queue element, with the entire graph in und.
         queue.emplace_back(SetType{}, m_todo);
-        // Best solution so far.
-        SetInfo best(m_depgraph, m_todo);
+        // Best solution so far. Initialize with the remaining ancestors of the first remaining
+        // transaction.
+        SetInfo best(m_depgraph, m_depgraph.Ancestors(m_todo.First()) & m_todo);
         // Process the queue.
         while (!queue.empty() && iterations_left) {
             // Pop top element of the queue.
@@ -689,11 +692,11 @@ FUZZ_TARGET(clusterlin_simple_finder)
         assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
         if (MAX_SIMPLE_ITERATIONS > (uint64_t{1} << (todo.Count() - 1))) assert(optimal);
 
-        // Perform quality checks only if SimpleCandidateFinder claims an optimal result.
-        if (optimal) {
-            // Optimal sets are always connected.
-            assert(depgraph.IsConnected(found.transactions));
+        // SimpleCandidateFinder only finds connected sets.
+        assert(depgraph.IsConnected(found.transactions));
 
+        // Perform further quality checks only if SimpleCandidateFinder claims an optimal result.
+        if (optimal) {
             // Compare with AncestorCandidateFinder.
             auto anc = anc_finder.FindCandidateSet();
             assert(anc.feerate <= found.feerate);

From 98c1c88b6f8d928b3893e2910ce0c400a6fd1928 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 7 Aug 2024 14:27:33 -0400
Subject: [PATCH 04/11] clusterlin tests: separate testing of SimpleLinearize
 and Linearize

The separates the existing fuzz test into:

* clusterlin_linearize: establishes the correctness of Linearize() using the
                        simpler SimpleLinearize() function.
* clusterlin_simple_linearize: establishes the correctness of SimpleLinearize() by
                        comparing with all valid linearizations computed by
                        std::next_permutation.

rather than combining the first two into a single fuzz test.
---
 src/test/fuzz/cluster_linearize.cpp | 81 ++++++++++++++++++++---------
 1 file changed, 56 insertions(+), 25 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 691c1b7653c..9ce7dbfd7d7 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -942,6 +942,62 @@ FUZZ_TARGET(clusterlin_linearization_chunking)
     assert(chunking.NumChunksLeft() == 0);
 }
 
+FUZZ_TARGET(clusterlin_simple_linearize)
+{
+    // Verify the behavior of SimpleLinearize(). Note that SimpleLinearize is only used in tests;
+    // the purpose of this fuzz test is to establish confidence in SimpleLinearize, so that it can
+    // be used to test the real Linearize function in the fuzz test below.
+
+    // Retrieve an iteration count and a depgraph from the fuzz input.
+    SpanReader reader(buffer);
+    uint64_t iter_count{0};
+    DepGraph<TestBitSet> depgraph;
+    try {
+        reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph);
+    } catch (const std::ios_base::failure&) {}
+    iter_count %= MAX_SIMPLE_ITERATIONS;
+
+    // Invoke SimpleLinearize().
+    auto [linearization, optimal] = SimpleLinearize(depgraph, iter_count);
+    SanityCheck(depgraph, linearization);
+    auto simple_chunking = ChunkLinearization(depgraph, linearization);
+
+    // If the iteration count is sufficiently high, an optimal linearization must be found.
+    // SimpleLinearize on k transactions can take up to 2^(k-1) iterations (one per non-empty
+    // connected topologically valid subset), which sums over k=1..n to (2^n)-1.
+    const uint64_t n = depgraph.TxCount();
+    if (n <= 63 && (iter_count >> n)) {
+        assert(optimal);
+    }
+
+    // If SimpleLinearize claims optimal result, and the cluster is sufficiently small (there are
+    // n! linearizations), test that the result is as good as every valid linearization.
+    if (optimal && depgraph.TxCount() <= 7) {
+        std::vector<DepGraphIndex> perm_linearization;
+        // Initialize with the lexicographically-first linearization.
+        for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i);
+        // Iterate over all valid permutations.
+        do {
+            // Determine whether perm_linearization is topological.
+            TestBitSet perm_done;
+            bool perm_is_topo{true};
+            for (auto i : perm_linearization) {
+                perm_done.Set(i);
+                if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) {
+                    perm_is_topo = false;
+                    break;
+                }
+            }
+            // If so, verify that the obtained linearization is as good as the permutation.
+            if (perm_is_topo) {
+                auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
+                auto cmp = CompareChunks(simple_chunking, perm_chunking);
+                assert(cmp >= 0);
+            }
+        } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
+    }
+}
+
 FUZZ_TARGET(clusterlin_linearize)
 {
     // Verify the behavior of Linearize().
@@ -1017,31 +1073,6 @@ FUZZ_TARGET(clusterlin_linearize)
         // If SimpleLinearize finds the optimal result too, they must be equal (if not,
         // SimpleLinearize is broken).
         if (simple_optimal) assert(cmp == 0);
-
-        // Only for very small clusters, test every topologically-valid permutation.
-        if (depgraph.TxCount() <= 7) {
-            std::vector<DepGraphIndex> perm_linearization;
-            for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i);
-            // Iterate over all valid permutations.
-            do {
-                // Determine whether perm_linearization is topological.
-                TestBitSet perm_done;
-                bool perm_is_topo{true};
-                for (auto i : perm_linearization) {
-                    perm_done.Set(i);
-                    if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) {
-                        perm_is_topo = false;
-                        break;
-                    }
-                }
-                // If so, verify that the obtained linearization is as good as the permutation.
-                if (perm_is_topo) {
-                    auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
-                    auto cmp = CompareChunks(chunking, perm_chunking);
-                    assert(cmp >= 0);
-                }
-            } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
-        }
     }
 }
 

From 6e37824ac390835d3d9c82d197f58e992ccc584f Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 25 Sep 2024 22:18:33 -0400
Subject: [PATCH 05/11] clusterlin tests: optimize clusterlin_simple_linearize

Whenever a non-topological permutation is encountered, fast forward to the
last permutation with the same non-topological prefix, skipping over
potentially many permutations that are non-topological for the same reason.

With that, increase the checking of all permutations to clusters of size 8
instead of 7.
---
 src/test/fuzz/cluster_linearize.cpp | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 9ce7dbfd7d7..4c7906a8b26 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -972,27 +972,33 @@ FUZZ_TARGET(clusterlin_simple_linearize)
 
     // If SimpleLinearize claims optimal result, and the cluster is sufficiently small (there are
     // n! linearizations), test that the result is as good as every valid linearization.
-    if (optimal && depgraph.TxCount() <= 7) {
+    if (optimal && depgraph.TxCount() <= 8) {
         std::vector<DepGraphIndex> perm_linearization;
         // Initialize with the lexicographically-first linearization.
         for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i);
         // Iterate over all valid permutations.
         do {
-            // Determine whether perm_linearization is topological.
+            /** What prefix of perm_linearization is topological. */
+            DepGraphIndex topo_length{0};
             TestBitSet perm_done;
-            bool perm_is_topo{true};
-            for (auto i : perm_linearization) {
+            while (topo_length < perm_linearization.size()) {
+                auto i = perm_linearization[topo_length];
                 perm_done.Set(i);
-                if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) {
-                    perm_is_topo = false;
-                    break;
-                }
+                if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) break;
+                ++topo_length;
             }
-            // If so, verify that the obtained linearization is as good as the permutation.
-            if (perm_is_topo) {
+            if (topo_length == perm_linearization.size()) {
+                // If all of perm_linearization is topological, verify that the obtained
+                // linearization is no worse than it.
                 auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
                 auto cmp = CompareChunks(simple_chunking, perm_chunking);
                 assert(cmp >= 0);
+            } else {
+                // Otherwise, fast forward to the last permutation with the same non-topological
+                // prefix.
+                auto first_non_topo = perm_linearization.begin() + topo_length;
+                assert(std::is_sorted(first_non_topo + 1, perm_linearization.end()));
+                std::reverse(first_non_topo + 1, perm_linearization.end());
             }
         } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
     }

From 5f92ebee0d2401d0d7b5c3466d66a4b09e51ccb0 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 25 Sep 2024 22:29:46 -0400
Subject: [PATCH 06/11] clusterlin tests: compare with fuzz-provided
 topological sets

---
 src/test/fuzz/cluster_linearize.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 4c7906a8b26..628f00f1aea 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -707,6 +707,10 @@ FUZZ_TARGET(clusterlin_simple_finder)
                 auto exhaustive = exh_finder.FindCandidateSet();
                 assert(exhaustive.feerate == found.feerate);
             }
+
+            // Compare with a topological set read from the fuzz input.
+            auto read_topo = ReadTopologicalSet(depgraph, todo, reader);
+            if (read_topo.Any()) assert(found.feerate >= depgraph.FeeRate(read_topo));
         }
 
         // Find a topologically valid subset of transactions to remove from the graph.
@@ -807,6 +811,10 @@ FUZZ_TARGET(clusterlin_search_finder)
             // Compare with AncestorCandidateFinder;
             auto anc = anc_finder.FindCandidateSet();
             assert(found.feerate >= anc.feerate);
+
+            // Compare with a topological set read from the fuzz input.
+            auto read_topo = ReadTopologicalSet(depgraph, todo, reader);
+            if (read_topo.Any()) assert(found.feerate >= depgraph.FeeRate(read_topo));
         }
 
         // Find a topologically valid subset of transactions to remove from the graph.

From 94f3e17c33e6fe63c4f791de7aacfb912059e394 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 25 Sep 2024 22:30:02 -0400
Subject: [PATCH 07/11] clusterlin tests: compare with fuzz-provided
 linearizations

---
 src/test/fuzz/cluster_linearize.cpp | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 628f00f1aea..b783d1595bd 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -1010,6 +1010,14 @@ FUZZ_TARGET(clusterlin_simple_linearize)
             }
         } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
     }
+
+    if (optimal) {
+        // Compare with a linearization read from the fuzz input.
+        auto read = ReadLinearization(depgraph, reader);
+        auto read_chunking = ChunkLinearization(depgraph, read);
+        auto cmp = CompareChunks(simple_chunking, read_chunking);
+        assert(cmp >= 0);
+    }
 }
 
 FUZZ_TARGET(clusterlin_linearize)
@@ -1087,6 +1095,12 @@ FUZZ_TARGET(clusterlin_linearize)
         // If SimpleLinearize finds the optimal result too, they must be equal (if not,
         // SimpleLinearize is broken).
         if (simple_optimal) assert(cmp == 0);
+
+        // Compare with a linearization read from the fuzz input.
+        auto read = ReadLinearization(depgraph, reader);
+        auto read_chunking = ChunkLinearization(depgraph, read);
+        auto cmp_read = CompareChunks(chunking, read_chunking);
+        assert(cmp_read >= 0);
     }
 }
 

From da23ecef29b7e6a1e03de9697a6bdf6fbe944ef7 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Fri, 4 Oct 2024 10:59:32 -0400
Subject: [PATCH 08/11] clusterlin tests: support non-empty
 ReadTopologicalSubset()

In several call sites for ReadTopologicalSubset, a non-empty result is
expected, necessitating a special case at the call site for empty results.

Fix this by adding a bool non_empty argument, which does this special
casing (more efficiently) inside ReadTopologicalSubset itself.
---
 src/test/fuzz/cluster_linearize.cpp | 77 +++++++++++++++++------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index b783d1595bd..eff93a36c25 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -187,12 +187,16 @@ void MakeConnected(DepGraph<BS>& depgraph)
 
 /** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */
 template<typename SetType>
-SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader)
+SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& todo, SpanReader& reader, bool non_empty)
 {
+    // Read a bitmask from the fuzzing input. Add 1 if non_empty, so the mask is definitely not
+    // zero in that case.
     uint64_t mask{0};
     try {
         reader >> VARINT(mask);
     } catch(const std::ios_base::failure&) {}
+    mask += non_empty;
+
     SetType ret;
     for (auto i : todo) {
         if (!ret[i]) {
@@ -200,7 +204,17 @@ SetType ReadTopologicalSet(const DepGraph<SetType>& depgraph, const SetType& tod
             mask >>= 1;
         }
     }
-    return ret & todo;
+    ret &= todo;
+
+    // While mask starts off non-zero if non_empty is true, it is still possible that all its low
+    // bits are 0, and ret ends up being empty. As a last resort, use the in-todo ancestry of the
+    // first todo position.
+    if (non_empty && ret.None()) {
+        Assume(todo.Any());
+        ret = depgraph.Ancestors(todo.First()) & todo;
+        Assume(ret.Any());
+    }
+    return ret;
 }
 
 /** Given a dependency graph, construct any valid linearization for it, reading from a SpanReader. */
@@ -629,10 +643,10 @@ FUZZ_TARGET(clusterlin_ancestor_finder)
         assert(real_best_anc.has_value());
         assert(*real_best_anc == best_anc);
 
-        // Find a topologically valid subset of transactions to remove from the graph.
-        auto del_set = ReadTopologicalSet(depgraph, todo, reader);
-        // If we did not find anything, use best_anc itself, because we should remove something.
-        if (del_set.None()) del_set = best_anc.transactions;
+        // Find a non-empty topologically valid subset of transactions to remove from the graph.
+        // Using an empty set would mean the next iteration is identical to the current one, and
+        // could cause an infinite loop.
+        auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
         todo -= del_set;
         anc_finder.MarkDone(del_set);
     }
@@ -708,15 +722,16 @@ FUZZ_TARGET(clusterlin_simple_finder)
                 assert(exhaustive.feerate == found.feerate);
             }
 
-            // Compare with a topological set read from the fuzz input.
-            auto read_topo = ReadTopologicalSet(depgraph, todo, reader);
-            if (read_topo.Any()) assert(found.feerate >= depgraph.FeeRate(read_topo));
+            // Compare with a non-empty topological set read from the fuzz input (comparing with an
+            // empty set is not interesting).
+            auto read_topo = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
+            assert(found.feerate >= depgraph.FeeRate(read_topo));
         }
 
-        // Find a topologically valid subset of transactions to remove from the graph.
-        auto del_set = ReadTopologicalSet(depgraph, todo, reader);
-        // If we did not find anything, use found itself, because we should remove something.
-        if (del_set.None()) del_set = found.transactions;
+        // Find a non-empty topologically valid subset of transactions to remove from the graph.
+        // Using an empty set would mean the next iteration is identical to the current one, and
+        // could cause an infinite loop.
+        auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
         todo -= del_set;
         smp_finder.MarkDone(del_set);
         exh_finder.MarkDone(del_set);
@@ -766,8 +781,9 @@ FUZZ_TARGET(clusterlin_search_finder)
         } catch (const std::ios_base::failure&) {}
         max_iterations &= 0xfffff;
 
-        // Read an initial subset from the fuzz input.
-        SetInfo init_best(depgraph, ReadTopologicalSet(depgraph, todo, reader));
+        // Read an initial subset from the fuzz input (allowed to be empty).
+        auto init_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false);
+        SetInfo init_best(depgraph, init_set);
 
         // Call the search finder's FindCandidateSet for what remains of the graph.
         auto [found, iterations_done] = src_finder.FindCandidateSet(max_iterations, init_best);
@@ -812,15 +828,16 @@ FUZZ_TARGET(clusterlin_search_finder)
             auto anc = anc_finder.FindCandidateSet();
             assert(found.feerate >= anc.feerate);
 
-            // Compare with a topological set read from the fuzz input.
-            auto read_topo = ReadTopologicalSet(depgraph, todo, reader);
-            if (read_topo.Any()) assert(found.feerate >= depgraph.FeeRate(read_topo));
+            // Compare with a non-empty topological set read from the fuzz input (comparing with an
+            // empty set is not interesting).
+            auto read_topo = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
+            assert(found.feerate >= depgraph.FeeRate(read_topo));
         }
 
-        // Find a topologically valid subset of transactions to remove from the graph.
-        auto del_set = ReadTopologicalSet(depgraph, todo, reader);
-        // If we did not find anything, use found itself, because we should remove something.
-        if (del_set.None()) del_set = found.transactions;
+        // Find a non-empty topologically valid subset of transactions to remove from the graph.
+        // Using an empty set would mean the next iteration is identical to the current one, and
+        // could cause an infinite loop.
+        auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
         todo -= del_set;
         src_finder.MarkDone(del_set);
         smp_finder.MarkDone(del_set);
@@ -844,9 +861,10 @@ FUZZ_TARGET(clusterlin_linearization_chunking)
         reader >> Using<DepGraphFormatter>(depgraph);
     } catch (const std::ios_base::failure&) {}
 
-    // Retrieve a topologically-valid subset of depgraph.
+    // Retrieve a topologically-valid subset of depgraph (allowed to be empty, because the argument
+    // to LinearizationChunking::Intersect is allowed to be empty).
     auto todo = depgraph.Positions();
-    auto subset = SetInfo(depgraph, ReadTopologicalSet(depgraph, todo, reader));
+    auto subset = SetInfo(depgraph, ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false));
 
     // Retrieve a valid linearization for depgraph.
     auto linearization = ReadLinearization(depgraph, reader);
@@ -935,13 +953,10 @@ FUZZ_TARGET(clusterlin_linearization_chunking)
             }
         }
 
-        // Find a subset to remove from linearization.
-        auto done = ReadTopologicalSet(depgraph, todo, reader);
-        if (done.None()) {
-            // We need to remove a non-empty subset, so fall back to the unlinearized ancestors of
-            // the first transaction in todo if done is empty.
-            done = depgraph.Ancestors(todo.First()) & todo;
-        }
+        // Find a non-empty topologically valid subset of transactions to remove from the graph.
+        // Using an empty set would mean the next iteration is identical to the current one, and
+        // could cause an infinite loop.
+        auto done = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
         todo -= done;
         chunking.MarkDone(done);
         subset = SetInfo(depgraph, subset.transactions - done);

From 1fa55a64ed18a0a3defbed33f6bd355929f67b48 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Mon, 3 Mar 2025 13:40:42 -0500
Subject: [PATCH 09/11] clusterlin tests: verify that chunks are minimal

---
 src/test/fuzz/cluster_linearize.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index eff93a36c25..b0d3a2ce8f9 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -1016,6 +1016,10 @@ FUZZ_TARGET(clusterlin_simple_linearize)
                 auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
                 auto cmp = CompareChunks(simple_chunking, perm_chunking);
                 assert(cmp >= 0);
+                // If perm_chunking is diagram-optimal, it cannot have more chunks than
+                // simple_chunking (as simple_chunking claims to be optimal, which implies minimal
+                // chunks.
+                if (cmp == 0) assert(simple_chunking.size() >= perm_chunking.size());
             } else {
                 // Otherwise, fast forward to the last permutation with the same non-topological
                 // prefix.
@@ -1110,6 +1114,9 @@ FUZZ_TARGET(clusterlin_linearize)
         // If SimpleLinearize finds the optimal result too, they must be equal (if not,
         // SimpleLinearize is broken).
         if (simple_optimal) assert(cmp == 0);
+        // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
+        // chunking is claimed to be optimal, which implies minimal chunks).
+        if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
 
         // Compare with a linearization read from the fuzz input.
         auto read = ReadLinearization(depgraph, reader);

From b64e61d2de65026c46f70cb91e3cb2213c336be0 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Wed, 11 Jun 2025 17:30:56 -0400
Subject: [PATCH 10/11] clusterlin: abstract try-permutations into
 ExhaustiveLinearize function

Rather than this exhaustive linearization check happening inline inside
clusterlin_simple_linearize, abstract it out into a Linearize()-like
function for clarity.

Note that this isn't exactly a refactor, because the old code would compare the
found linearization against all (valid) permutations, while the new code instead
first computes the best linearization from all valid permutations, and then
compares it with the found one.
---
 src/test/fuzz/cluster_linearize.cpp | 89 ++++++++++++++++++-----------
 1 file changed, 57 insertions(+), 32 deletions(-)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index b0d3a2ce8f9..579fe955f43 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -168,6 +168,58 @@ std::pair<std::vector<DepGraphIndex>, bool> SimpleLinearize(const DepGraph<SetTy
     return {std::move(linearization), optimal};
 }
 
+/** An even simpler linearization algorithm that tries all permutations.
+ *
+ * This roughly matches SimpleLinearize() (and Linearize) in interface and behavior, but always
+ * tries all topologically-valid transaction orderings, has no way to bound how much work it does,
+ * and always finds the optimal. With an O(n!) complexity, it should only be used for small
+ * clusters.
+ */
+template<typename SetType>
+std::vector<DepGraphIndex> ExhaustiveLinearize(const DepGraph<SetType>& depgraph)
+{
+    // The best linearization so far, and its chunking.
+    std::vector<DepGraphIndex> linearization;
+    std::vector<FeeFrac> chunking;
+
+    std::vector<DepGraphIndex> perm_linearization;
+    // Initialize with the lexicographically-first linearization.
+    for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i);
+    // Iterate over all valid permutations.
+    do {
+        /** What prefix of perm_linearization is topological. */
+        DepGraphIndex topo_length{0};
+        TestBitSet perm_done;
+        while (topo_length < perm_linearization.size()) {
+            auto i = perm_linearization[topo_length];
+            perm_done.Set(i);
+            if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) break;
+            ++topo_length;
+        }
+        if (topo_length == perm_linearization.size()) {
+            // If all of perm_linearization is topological, check if it is perhaps our best
+            // linearization so far.
+            auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
+            auto cmp = CompareChunks(perm_chunking, chunking);
+            // If the diagram is better, or if it is equal but with more chunks (because we
+            // prefer minimal chunks), consider this better.
+            if (linearization.empty() || cmp > 0 || (cmp == 0 && perm_chunking.size() > chunking.size())) {
+                linearization = perm_linearization;
+                chunking = perm_chunking;
+            }
+        } else {
+            // Otherwise, fast forward to the last permutation with the same non-topological
+            // prefix.
+            auto first_non_topo = perm_linearization.begin() + topo_length;
+            assert(std::is_sorted(first_non_topo + 1, perm_linearization.end()));
+            std::reverse(first_non_topo + 1, perm_linearization.end());
+        }
+    } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
+
+    return linearization;
+}
+
+
 /** Stitch connected components together in a DepGraph, guaranteeing its corresponding cluster is connected. */
 template<typename BS>
 void MakeConnected(DepGraph<BS>& depgraph)
@@ -996,38 +1048,11 @@ FUZZ_TARGET(clusterlin_simple_linearize)
     // If SimpleLinearize claims optimal result, and the cluster is sufficiently small (there are
     // n! linearizations), test that the result is as good as every valid linearization.
     if (optimal && depgraph.TxCount() <= 8) {
-        std::vector<DepGraphIndex> perm_linearization;
-        // Initialize with the lexicographically-first linearization.
-        for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i);
-        // Iterate over all valid permutations.
-        do {
-            /** What prefix of perm_linearization is topological. */
-            DepGraphIndex topo_length{0};
-            TestBitSet perm_done;
-            while (topo_length < perm_linearization.size()) {
-                auto i = perm_linearization[topo_length];
-                perm_done.Set(i);
-                if (!depgraph.Ancestors(i).IsSubsetOf(perm_done)) break;
-                ++topo_length;
-            }
-            if (topo_length == perm_linearization.size()) {
-                // If all of perm_linearization is topological, verify that the obtained
-                // linearization is no worse than it.
-                auto perm_chunking = ChunkLinearization(depgraph, perm_linearization);
-                auto cmp = CompareChunks(simple_chunking, perm_chunking);
-                assert(cmp >= 0);
-                // If perm_chunking is diagram-optimal, it cannot have more chunks than
-                // simple_chunking (as simple_chunking claims to be optimal, which implies minimal
-                // chunks.
-                if (cmp == 0) assert(simple_chunking.size() >= perm_chunking.size());
-            } else {
-                // Otherwise, fast forward to the last permutation with the same non-topological
-                // prefix.
-                auto first_non_topo = perm_linearization.begin() + topo_length;
-                assert(std::is_sorted(first_non_topo + 1, perm_linearization.end()));
-                std::reverse(first_non_topo + 1, perm_linearization.end());
-            }
-        } while(std::next_permutation(perm_linearization.begin(), perm_linearization.end()));
+        auto exh_linearization = ExhaustiveLinearize(depgraph);
+        auto exh_chunking = ChunkLinearization(depgraph, exh_linearization);
+        auto cmp = CompareChunks(simple_chunking, exh_chunking);
+        assert(cmp == 0);
+        assert(simple_chunking.size() == exh_chunking.size());
     }
 
     if (optimal) {

From d7fca5c171f450621112457ea6f7c99b2e21d354 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Thu, 12 Jun 2025 10:49:04 -0400
Subject: [PATCH 11/11] clusterlin: add big comment explaning the relation
 between tests

---
 src/test/fuzz/cluster_linearize.cpp | 57 +++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp
index 579fe955f43..ef20eeaea0d 100644
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -17,6 +17,63 @@
 #include <utility>
 #include <vector>
 
+/*
+ * The tests in this file primarily cover the candidate finder classes and linearization algorithms.
+ *
+ *   <----: An implementation (at the start of the line --) is tested in the test marked with *,
+ *          possibly by comparison with other implementations (at the end of the line ->).
+ *   <<---: The right side is implemented using the left side.
+ *
+ *   +-----------------------+
+ *   | SearchCandidateFinder | <<---------------------\
+ *   +-----------------------+                        |
+ *     |                                            +-----------+
+ *     |                                            | Linearize |
+ *     |                                            +-----------+
+ *     |        +-------------------------+           |  |
+ *     |        | AncestorCandidateFinder | <<--------/  |
+ *     |        +-------------------------+              |
+ *     |          |                     ^                |        ^^  PRODUCTION CODE
+ *     |          |                     |                |        ||
+ *  ==============================================================================================
+ *     |          |                     |                |        ||
+ *     | clusterlin_ancestor_finder*    |                |        vv  TEST CODE
+ *     |                                |                |
+ *     |-clusterlin_search_finder*      |                |-clusterlin_linearize*
+ *     |                                |                |
+ *     v                                |                v
+ *   +-----------------------+          |           +-----------------+
+ *   | SimpleCandidateFinder | <<-------------------| SimpleLinearize |
+ *   +-----------------------+          |           +-----------------+
+ *                  |                   |                |
+ *                  +-------------------/                |
+ *                  |                                    |
+ *                  |-clusterlin_simple_finder*          |-clusterlin_simple_linearize*
+ *                  v                                    v
+ *   +---------------------------+                  +---------------------+
+ *   | ExhaustiveCandidateFinder |                  | ExhaustiveLinearize |
+ *   +---------------------------+                  +---------------------+
+ *
+ * More tests are included for lower-level and related functions and classes:
+ * - DepGraph tests:
+ *   - clusterlin_depgraph_sim
+ *   - clusterlin_depgraph_serialization
+ *   - clusterlin_components
+ * - ChunkLinearization and LinearizationChunking tests:
+ *   - clusterlin_chunking
+ *   - clusterlin_linearization_chunking
+ * - PostLinearize tests:
+ *   - clusterlin_postlinearize
+ *   - clusterlin_postlinearize_tree
+ *   - clusterlin_postlinearize_moved_leaf
+ * - MergeLinearization tests:
+ *   - clusterlin_merge
+ * - FixLinearization tests:
+ *   - clusterlin_fix_linearization
+ * - MakeConnected tests (a test-only function):
+ *   - clusterlin_make_connected
+ */
+
 using namespace cluster_linearize;
 
 namespace {