clusterlin: support fixing linearizations (feature)

This also updates FixLinearization to just be a thin wrapper around Linearize. In a future commit, FixLinearization will be removed entirely.
2026-01-19 14:53:43 +01:00 · 2025-10-23 19:16:50 -04:00
parent 755f0900a2
commit 01ffcf464a
3 changed files with 44 additions and 54 deletions
--- a/src/cluster_linearize.h
+++ b/src/cluster_linearize.h
@@ -1338,8 +1338,9 @@ public:
 * @param[in] rng_seed            A random number seed to control search order. This prevents peers
 *                                from predicting exactly which clusters would be hard for us to
 *                                linearize.
- * @param[in] old_linearization   An existing linearization for the cluster (which must be
- *                                topologically valid), or empty.
+ * @param[in] old_linearization   An existing linearization for the cluster, or empty.
+ * @param[in] is_topological      (Only relevant if old_linearization is not empty) Whether
+ *                                old_linearization is topologically valid.
 * @return                        A tuple of:
 *                                - The resulting linearization. It is guaranteed to be at least as
 *                                  good (in the feerate diagram sense) as old_linearization.
@@ -1348,12 +1349,13 @@ public:
 *                                - How many optimization steps were actually performed.
 */
 template<typename SetType>
-std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
+std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}, bool is_topological = true) noexcept
 {
    /** Initialize a spanning forest data structure for this cluster. */
    SpanningForestState forest(depgraph, rng_seed);
    if (!old_linearization.empty()) {
        forest.LoadLinearization(old_linearization);
+        if (!is_topological) forest.MakeTopological();
    } else {
        forest.MakeTopological();
    }
@@ -1573,36 +1575,14 @@ void PostLinearize(const DepGraph<SetType>& depgraph, std::span<DepGraphIndex> l
    }
 }

-/** Make linearization topological, retaining its ordering where possible. */
+/** Make linearization topological, reusing information from the old linearization where possible. */
 template<typename SetType>
 void FixLinearization(const DepGraph<SetType>& depgraph, std::span<DepGraphIndex> linearization) noexcept
 {
-    // This algorithm can be summarized as moving every element in the linearization backwards
-    // until it is placed after all its ancestors.
-    SetType done;
-    const auto len = linearization.size();
-    // Iterate over the elements of linearization from back to front (i is distance from back).
-    for (DepGraphIndex i = 0; i < len; ++i) {
-        /** The element at that position. */
-        DepGraphIndex elem = linearization[len - 1 - i];
-        /** j represents how far from the back of the linearization elem should be placed. */
-        DepGraphIndex j = i;
-        // Figure out which elements need to be moved before elem.
-        SetType place_before = done & depgraph.Ancestors(elem);
-        // Find which position to place elem in (updating j), continuously moving the elements
-        // in between forward.
-        while (place_before.Any()) {
-            // j cannot be 0 here; if it was, then there was necessarily nothing earlier which
-            // elem needs to be placed before anymore, and place_before would be empty.
-            Assume(j > 0);
-            auto to_swap = linearization[len - 1 - (j - 1)];
-            place_before.Reset(to_swap);
-            linearization[len - 1 - (j--)] = to_swap;
-        }
-        // Put elem in its final position and mark it as done.
-        linearization[len - 1 - j] = elem;
-        done.Set(elem);
-    }
+    // TODO: update call sites to use Linearize directly.
+    auto [new_lin, _opt, _steps] = Linearize(depgraph, /*max_iterations=*/0, /*rng_seed=*/0, linearization, /*is_topological=*/false);
+    Assume(new_lin.size() == linearization.size());
+    std::copy(new_lin.begin(), new_lin.end(), linearization.begin());
 }

 } // namespace cluster_linearize
--- a/src/test/cluster_linearize_tests.cpp
+++ b/src/test/cluster_linearize_tests.cpp
@@ -68,7 +68,8 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
        for (int iter = 0; iter < 200; ++iter) {
            bool opt;
            uint64_t cost{0};
-            switch (rng.randrange(3)) {
+            bool is_topological{true};
+            switch (rng.randrange(4)) {
            case 0:
                // Use empty input linearization.
                lin.clear();
@@ -77,12 +78,17 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
                // Reuse previous optimal linearization as input.
                break;
            case 2:
-                // Construct random input linearization.
+                // Construct random valid input linearization.
                std::shuffle(lin.begin(), lin.end(), rng);
-                FixLinearization(depgraph, lin);
+                std::sort(lin.begin(), lin.end(), [&](auto a, auto b) { return depgraph.Ancestors(a).Count() < depgraph.Ancestors(b).Count(); });
+                break;
+            case 3:
+                // Construct random potentially invalid input linearization.
+                std::shuffle(lin.begin(), lin.end(), rng);
+                is_topological = false;
                break;
            }
-            std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin);
+            std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin, is_topological);
            BOOST_CHECK(opt);
            BOOST_CHECK(cost <= MaxOptimalLinearizationIters(depgraph.TxCount()));
            SanityCheck(depgraph, lin);
--- a/src/test/fuzz/cluster_linearize.cpp
+++ b/src/test/fuzz/cluster_linearize.cpp
@@ -998,35 +998,40 @@ FUZZ_TARGET(clusterlin_linearize)
    DepGraph<TestBitSet> depgraph;
    uint64_t rng_seed{0};
    uint64_t iter_count{0};
-    uint8_t make_connected{1};
+    uint8_t flags{7};
    try {
-        reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
+        reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> flags;
    } catch (const std::ios_base::failure&) {}
+    bool make_connected = flags & 1;
+    // The following 3 booleans have 4 combinations:
+    // - (flags & 6) == 0: do not provide input linearization.
+    // - (flags & 6) == 2: provide potentially non-topological input.
+    // - (flags & 6) == 4: provide topological input linearization, but do not claim it is
+    //                     topological.
+    // - (flags & 6) == 6: provide topological input linearization, and claim it is topological.
+    bool provide_input = flags & 6;
+    bool provide_topological_input = flags & 4;
+    bool claim_topological_input = (flags & 6) == 6;
    // The most complicated graphs are connected ones (other ones just split up). Optionally force
    // the graph to be connected.
    if (make_connected) MakeConnected(depgraph);

    // Optionally construct an old linearization for it.
    std::vector<DepGraphIndex> old_linearization;
-    {
-        uint8_t have_old_linearization{0};
-        try {
-            reader >> have_old_linearization;
-        } catch(const std::ios_base::failure&) {}
-        if (have_old_linearization & 1) {
-            old_linearization = ReadLinearization(depgraph, reader);
-            SanityCheck(depgraph, old_linearization);
-        }
+    if (provide_input) {
+        old_linearization = ReadLinearization(depgraph, reader, /*topological=*/provide_topological_input);
+        if (provide_topological_input) SanityCheck(depgraph, old_linearization);
    }

    // Invoke Linearize().
    iter_count &= 0x7ffff;
-    auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
+    auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization, /*is_topological=*/claim_topological_input);
    SanityCheck(depgraph, linearization);
    auto chunking = ChunkLinearization(depgraph, linearization);

-    // Linearization must always be as good as the old one, if provided.
-    if (!old_linearization.empty()) {
+    // Linearization must always be as good as the old one, if provided and topological (even when
+    // not claimed to be topological).
+    if (provide_topological_input) {
        auto old_chunking = ChunkLinearization(depgraph, old_linearization);
        auto cmp = CompareChunks(chunking, old_chunking);
        assert(cmp >= 0);
@@ -1231,12 +1236,11 @@ FUZZ_TARGET(clusterlin_fix_linearization)
    // Sanity check it (which includes testing whether it is topological).
    SanityCheck(depgraph, linearization_fixed);

-    // FixLinearization does not modify the topological prefix of linearization.
-    assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix,
-                      linearization_fixed.begin()));
-    // This also means that if linearization was entirely topological, FixLinearization cannot have
-    // modified it. This is implied by the assertion above already, but repeat it explicitly.
+    // If linearization was entirely topological, FixLinearization cannot worsen it.
    if (topo_prefix == linearization.size()) {
-        assert(linearization == linearization_fixed);
+        auto chunking = ChunkLinearization(depgraph, linearization);
+        auto chunking_fixed = ChunkLinearization(depgraph, linearization_fixed);
+        auto cmp = CompareChunks(chunking_fixed, chunking);
+        assert(cmp >= 0);
    }
 }