From f3c2fc867fc4332dfed0a3766997433e1676dbe3 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Sun, 13 Apr 2025 17:16:27 -0400
Subject: [PATCH] txgraph: add work limit to DoWork(), try optimal (feature)

This adds an `iters` parameter to DoWork(), which controls how much work it is
allowed to do right now.

Additionally, DoWork() won't stop at just getting everything ACCEPTABLE, but if
there is work budget left, will also attempt to get every cluster linearized
optimally.
---
 src/test/fuzz/txgraph.cpp | 31 +++++++++++++++++-
 src/txgraph.cpp           | 68 ++++++++++++++++++++++++++++++++-------
 src/txgraph.h             |  7 ++--
 3 files changed, 90 insertions(+), 16 deletions(-)

diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp
index 7e4409df6b8..d9629b11a45 100644
--- a/src/test/fuzz/txgraph.cpp
+++ b/src/test/fuzz/txgraph.cpp
@@ -58,6 +58,8 @@ struct SimTxGraph
     SetType modified;
     /** The configured maximum total size of transactions per cluster. */
     uint64_t max_cluster_size;
+    /** Whether the corresponding real graph is known to be optimally linearized. */
+    bool real_is_optimal{false};
 
     /** Construct a new SimTxGraph with the specified maximum cluster count and size. */
     explicit SimTxGraph(DepGraphIndex cluster_count, uint64_t cluster_size) :
@@ -139,6 +141,7 @@ struct SimTxGraph
     {
         assert(graph.TxCount() < MAX_TRANSACTIONS);
         auto simpos = graph.AddTransaction(feerate);
+        real_is_optimal = false;
         MakeModified(simpos);
         assert(graph.Positions()[simpos]);
         simmap[simpos] = std::make_shared<TxGraph::Ref>();
@@ -158,6 +161,7 @@ struct SimTxGraph
         if (chl_pos == MISSING) return;
         graph.AddDependencies(SetType::Singleton(par_pos), chl_pos);
         MakeModified(par_pos);
+        real_is_optimal = false;
         // This may invalidate our cached oversized value.
         if (oversized.has_value() && !*oversized) oversized = std::nullopt;
     }
@@ -168,6 +172,7 @@ struct SimTxGraph
         auto pos = Find(ref);
         if (pos == MISSING) return;
         // No need to invoke MakeModified, because this equally affects main and staging.
+        real_is_optimal = false;
         graph.FeeRate(pos).fee = fee;
     }
 
@@ -177,6 +182,7 @@ struct SimTxGraph
         auto pos = Find(ref);
         if (pos == MISSING) return;
         MakeModified(pos);
+        real_is_optimal = false;
         graph.RemoveTransactions(SetType::Singleton(pos));
         simrevmap.erase(simmap[pos].get());
         // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't
@@ -203,6 +209,7 @@ struct SimTxGraph
         } else {
             MakeModified(pos);
             graph.RemoveTransactions(SetType::Singleton(pos));
+            real_is_optimal = false;
             simrevmap.erase(simmap[pos].get());
             simmap[pos].reset();
             // This may invalidate our cached oversized value.
@@ -467,6 +474,7 @@ FUZZ_TARGET(txgraph)
                     if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break;
                 }
                 top_sim.AddDependency(par, chl);
+                top_sim.real_is_optimal = false;
                 real->AddDependency(*par, *chl);
                 break;
             } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) {
@@ -721,7 +729,18 @@ FUZZ_TARGET(txgraph)
                 break;
             } else if (command-- == 0) {
                 // DoWork.
-                real->DoWork();
+                uint64_t iters = provider.ConsumeIntegralInRange<uint64_t>(0, alt ? 10000 : 255);
+                if (real->DoWork(iters)) {
+                    for (unsigned level = 0; level < sims.size(); ++level) {
+                        // DoWork() will not optimize oversized levels.
+                        if (sims[level].IsOversized()) continue;
+                        // DoWork() will not touch the main level if a builder is present.
+                        if (level == 0 && !block_builders.empty()) continue;
+                        // If neither of the two above conditions holds, and DoWork() returned
+                        // then the level is optimal.
+                        sims[level].real_is_optimal = true;
+                    }
+                }
                 break;
             } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) {
                 // GetMainStagingDiagrams()
@@ -1005,6 +1024,16 @@ FUZZ_TARGET(txgraph)
         }
         assert(todo.None());
 
+        // If the real graph claims to be optimal (the last DoWork() call returned true), verify
+        // that calling Linearize on it does not improve it further.
+        if (sims[0].real_is_optimal) {
+            auto real_diagram = ChunkLinearization(sims[0].graph, vec1);
+            auto [sim_lin, _optimal, _cost] = Linearize(sims[0].graph, 300000, rng.rand64(), vec1);
+            auto sim_diagram = ChunkLinearization(sims[0].graph, sim_lin);
+            auto cmp = CompareChunks(real_diagram, sim_diagram);
+            assert(cmp == 0);
+        }
+
         // For every transaction in the total ordering, find a random one before it and after it,
         // and compare their chunk feerates, which must be consistent with the ordering.
         for (size_t pos = 0; pos < vec1.size(); ++pos) {
diff --git a/src/txgraph.cpp b/src/txgraph.cpp
index 6615bca50d0..345be02f0f1 100644
--- a/src/txgraph.cpp
+++ b/src/txgraph.cpp
@@ -189,8 +189,9 @@ public:
     void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept;
     /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */
     void ApplyDependencies(TxGraphImpl& graph, std::span<std::pair<GraphIndex, GraphIndex>> to_apply) noexcept;
-    /** Improve the linearization of this Cluster. Returns how much work was performed. */
-    uint64_t Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept;
+    /** Improve the linearization of this Cluster. Returns how much work was performed and whether
+     *  the Cluster's QualityLevel improved as a result. */
+    std::pair<uint64_t, bool> Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept;
     /** For every chunk in the cluster, append its FeeFrac to ret. */
     void AppendChunkFeerates(std::vector<FeeFrac>& ret) const noexcept;
     /** Add a TrimTxData entry (filling m_chunk_feerate, m_index, m_tx_size) for every
@@ -592,7 +593,7 @@ public:
     void AddDependency(const Ref& parent, const Ref& child) noexcept final;
     void SetTransactionFee(const Ref&, int64_t fee) noexcept final;
 
-    void DoWork() noexcept final;
+    bool DoWork(uint64_t iters) noexcept final;
 
     void StartStaging() noexcept final;
     void CommitStaging() noexcept final;
@@ -1655,12 +1656,12 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept
     clusterset.m_group_data = GroupData{};
 }
 
-uint64_t Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
+std::pair<uint64_t, bool> Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
 {
     // We can only relinearize Clusters that do not need splitting.
     Assume(!NeedsSplitting());
     // No work is required for Clusters which are already optimally linearized.
-    if (IsOptimal()) return 0;
+    if (IsOptimal()) return {0, false};
     // Invoke the actual linearization algorithm (passing in the existing one).
     uint64_t rng_seed = graph.m_rng.rand64();
     auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization);
@@ -1670,11 +1671,17 @@ uint64_t Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept
     // Update the linearization.
     m_linearization = std::move(linearization);
     // Update the Cluster's quality.
-    auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE;
-    graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality);
+    bool improved = false;
+    if (optimal) {
+        graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::OPTIMAL);
+        improved = true;
+    } else if (max_iters >= graph.m_acceptable_iters && !IsAcceptable()) {
+        graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::ACCEPTABLE);
+        improved = true;
+    }
     // Update the Entry objects.
     Updated(graph);
-    return cost;
+    return {cost, improved};
 }
 
 void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept
@@ -2478,13 +2485,50 @@ void TxGraphImpl::SanityCheck() const
     assert(actual_chunkindex == expected_chunkindex);
 }
 
-void TxGraphImpl::DoWork() noexcept
+bool TxGraphImpl::DoWork(uint64_t iters) noexcept
 {
-    for (int level = 0; level <= GetTopLevel(); ++level) {
-        if (level > 0 || m_main_chunkindex_observers == 0) {
-            MakeAllAcceptable(level);
+    uint64_t iters_done{0};
+    // First linearize everything in NEEDS_RELINEARIZE to an acceptable level. If more budget
+    // remains after that, try to make everything optimal.
+    for (QualityLevel quality : {QualityLevel::NEEDS_RELINEARIZE, QualityLevel::ACCEPTABLE}) {
+        // First linearize staging, if it exists, then main.
+        for (int level = GetTopLevel(); level >= 0; --level) {
+            // Do not modify main if it has any observers.
+            if (level == 0 && m_main_chunkindex_observers != 0) continue;
+            ApplyDependencies(level);
+            auto& clusterset = GetClusterSet(level);
+            // Do not modify oversized levels.
+            if (clusterset.m_oversized == true) continue;
+            auto& queue = clusterset.m_clusters[int(quality)];
+            while (!queue.empty()) {
+                if (iters_done >= iters) return false;
+                // Randomize the order in which we process, so that if the first cluster somehow
+                // needs more work than what iters allows, we don't keep spending it on the same
+                // one.
+                auto pos = m_rng.randrange<size_t>(queue.size());
+                auto iters_now = iters - iters_done;
+                if (quality == QualityLevel::NEEDS_RELINEARIZE) {
+                    // If we're working with clusters that need relinearization still, only perform
+                    // up to m_acceptable_iters iterations. If they become ACCEPTABLE, and we still
+                    // have budget after all other clusters are ACCEPTABLE too, we'll spend the
+                    // remaining budget on trying to make them OPTIMAL.
+                    iters_now = std::min(iters_now, m_acceptable_iters);
+                }
+                auto [cost, improved] = queue[pos].get()->Relinearize(*this, iters_now);
+                iters_done += cost;
+                // If no improvement was made to the Cluster, it means we've essentially run out of
+                // budget. Even though it may be the case that iters_done < iters still, the
+                // linearizer decided there wasn't enough budget left to attempt anything with.
+                // To avoid an infinite loop that keeps trying clusters with minuscule budgets,
+                // stop here too.
+                if (!improved) return false;
+            }
         }
     }
+    // All possible work has been performed, so we can return true. Note that this does *not* mean
+    // that all clusters are optimally linearized now. It may be that there is nothing to do left
+    // because all non-optimal clusters are in oversized and/or observer-bearing levels.
+    return true;
 }
 
 void BlockBuilderImpl::Next() noexcept
diff --git a/src/txgraph.h b/src/txgraph.h
index 4bddf95b860..ef2a7bd3c06 100644
--- a/src/txgraph.h
+++ b/src/txgraph.h
@@ -94,9 +94,10 @@ public:
     virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0;
 
     /** TxGraph is internally lazy, and will not compute many things until they are needed.
-     *  Calling DoWork will compute everything now, so that future operations are fast. This can be
-     *  invoked while oversized. */
-    virtual void DoWork() noexcept = 0;
+     *  Calling DoWork will perform some work now (controlled by iters) so that future operations
+     *  are fast, if there is any. Returns whether all currently-available work is done. This can
+     *  be invoked while oversized, but oversized graphs will be skipped by this call. */
+    virtual bool DoWork(uint64_t iters) noexcept = 0;
 
     /** Create a staging graph (which cannot exist already). This acts as if a full copy of
      *  the transaction graph is made, upon which further modifications are made. This copy can