From 01ffcf464a4679136185e323b5b0328695a5bd1e Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 23 Oct 2025 19:16:50 -0400 Subject: [PATCH 1/5] clusterlin: support fixing linearizations (feature) This also updates FixLinearization to just be a thin wrapper around Linearize. In a future commit, FixLinearization will be removed entirely. --- src/cluster_linearize.h | 40 +++++++------------------ src/test/cluster_linearize_tests.cpp | 14 ++++++--- src/test/fuzz/cluster_linearize.cpp | 44 +++++++++++++++------------- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index f5516f69067..9b7fd6a2f33 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1338,8 +1338,9 @@ public: * @param[in] rng_seed A random number seed to control search order. This prevents peers * from predicting exactly which clusters would be hard for us to * linearize. - * @param[in] old_linearization An existing linearization for the cluster (which must be - * topologically valid), or empty. + * @param[in] old_linearization An existing linearization for the cluster, or empty. + * @param[in] is_topological (Only relevant if old_linearization is not empty) Whether + * old_linearization is topologically valid. * @return A tuple of: * - The resulting linearization. It is guaranteed to be at least as * good (in the feerate diagram sense) as old_linearization. @@ -1348,12 +1349,13 @@ public: * - How many optimization steps were actually performed. */ template -std::tuple, bool, uint64_t> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span old_linearization = {}) noexcept +std::tuple, bool, uint64_t> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span old_linearization = {}, bool is_topological = true) noexcept { /** Initialize a spanning forest data structure for this cluster. */ SpanningForestState forest(depgraph, rng_seed); if (!old_linearization.empty()) { forest.LoadLinearization(old_linearization); + if (!is_topological) forest.MakeTopological(); } else { forest.MakeTopological(); } @@ -1573,36 +1575,14 @@ void PostLinearize(const DepGraph& depgraph, std::span l } } -/** Make linearization topological, retaining its ordering where possible. */ +/** Make linearization topological, reusing information from the old linearization where possible. */ template void FixLinearization(const DepGraph& depgraph, std::span linearization) noexcept { - // This algorithm can be summarized as moving every element in the linearization backwards - // until it is placed after all its ancestors. - SetType done; - const auto len = linearization.size(); - // Iterate over the elements of linearization from back to front (i is distance from back). - for (DepGraphIndex i = 0; i < len; ++i) { - /** The element at that position. */ - DepGraphIndex elem = linearization[len - 1 - i]; - /** j represents how far from the back of the linearization elem should be placed. */ - DepGraphIndex j = i; - // Figure out which elements need to be moved before elem. - SetType place_before = done & depgraph.Ancestors(elem); - // Find which position to place elem in (updating j), continuously moving the elements - // in between forward. - while (place_before.Any()) { - // j cannot be 0 here; if it was, then there was necessarily nothing earlier which - // elem needs to be placed before anymore, and place_before would be empty. - Assume(j > 0); - auto to_swap = linearization[len - 1 - (j - 1)]; - place_before.Reset(to_swap); - linearization[len - 1 - (j--)] = to_swap; - } - // Put elem in its final position and mark it as done. - linearization[len - 1 - j] = elem; - done.Set(elem); - } + // TODO: update call sites to use Linearize directly. + auto [new_lin, _opt, _steps] = Linearize(depgraph, /*max_iterations=*/0, /*rng_seed=*/0, linearization, /*is_topological=*/false); + Assume(new_lin.size() == linearization.size()); + std::copy(new_lin.begin(), new_lin.end(), linearization.begin()); } } // namespace cluster_linearize diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp index ede17b97813..a2be29776cb 100644 --- a/src/test/cluster_linearize_tests.cpp +++ b/src/test/cluster_linearize_tests.cpp @@ -68,7 +68,8 @@ void TestOptimalLinearization(const std::vector& enc, const std::vector for (int iter = 0; iter < 200; ++iter) { bool opt; uint64_t cost{0}; - switch (rng.randrange(3)) { + bool is_topological{true}; + switch (rng.randrange(4)) { case 0: // Use empty input linearization. lin.clear(); @@ -77,12 +78,17 @@ void TestOptimalLinearization(const std::vector& enc, const std::vector // Reuse previous optimal linearization as input. break; case 2: - // Construct random input linearization. + // Construct random valid input linearization. std::shuffle(lin.begin(), lin.end(), rng); - FixLinearization(depgraph, lin); + std::sort(lin.begin(), lin.end(), [&](auto a, auto b) { return depgraph.Ancestors(a).Count() < depgraph.Ancestors(b).Count(); }); + break; + case 3: + // Construct random potentially invalid input linearization. + std::shuffle(lin.begin(), lin.end(), rng); + is_topological = false; break; } - std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin); + std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin, is_topological); BOOST_CHECK(opt); BOOST_CHECK(cost <= MaxOptimalLinearizationIters(depgraph.TxCount())); SanityCheck(depgraph, lin); diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index ccf1be68768..9bd4280512f 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -998,35 +998,40 @@ FUZZ_TARGET(clusterlin_linearize) DepGraph depgraph; uint64_t rng_seed{0}; uint64_t iter_count{0}; - uint8_t make_connected{1}; + uint8_t flags{7}; try { - reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed >> make_connected; + reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed >> flags; } catch (const std::ios_base::failure&) {} + bool make_connected = flags & 1; + // The following 3 booleans have 4 combinations: + // - (flags & 6) == 0: do not provide input linearization. + // - (flags & 6) == 2: provide potentially non-topological input. + // - (flags & 6) == 4: provide topological input linearization, but do not claim it is + // topological. + // - (flags & 6) == 6: provide topological input linearization, and claim it is topological. + bool provide_input = flags & 6; + bool provide_topological_input = flags & 4; + bool claim_topological_input = (flags & 6) == 6; // The most complicated graphs are connected ones (other ones just split up). Optionally force // the graph to be connected. if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. std::vector old_linearization; - { - uint8_t have_old_linearization{0}; - try { - reader >> have_old_linearization; - } catch(const std::ios_base::failure&) {} - if (have_old_linearization & 1) { - old_linearization = ReadLinearization(depgraph, reader); - SanityCheck(depgraph, old_linearization); - } + if (provide_input) { + old_linearization = ReadLinearization(depgraph, reader, /*topological=*/provide_topological_input); + if (provide_topological_input) SanityCheck(depgraph, old_linearization); } // Invoke Linearize(). iter_count &= 0x7ffff; - auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization); + auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization, /*is_topological=*/claim_topological_input); SanityCheck(depgraph, linearization); auto chunking = ChunkLinearization(depgraph, linearization); - // Linearization must always be as good as the old one, if provided. - if (!old_linearization.empty()) { + // Linearization must always be as good as the old one, if provided and topological (even when + // not claimed to be topological). + if (provide_topological_input) { auto old_chunking = ChunkLinearization(depgraph, old_linearization); auto cmp = CompareChunks(chunking, old_chunking); assert(cmp >= 0); @@ -1231,12 +1236,11 @@ FUZZ_TARGET(clusterlin_fix_linearization) // Sanity check it (which includes testing whether it is topological). SanityCheck(depgraph, linearization_fixed); - // FixLinearization does not modify the topological prefix of linearization. - assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix, - linearization_fixed.begin())); - // This also means that if linearization was entirely topological, FixLinearization cannot have - // modified it. This is implied by the assertion above already, but repeat it explicitly. + // If linearization was entirely topological, FixLinearization cannot worsen it. if (topo_prefix == linearization.size()) { - assert(linearization == linearization_fixed); + auto chunking = ChunkLinearization(depgraph, linearization); + auto chunking_fixed = ChunkLinearization(depgraph, linearization_fixed); + auto cmp = CompareChunks(chunking_fixed, chunking); + assert(cmp >= 0); } } From 62dd88624a7fba14d2dac32d94fd101eb378d1e7 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 16 Oct 2025 11:23:50 -0400 Subject: [PATCH 2/5] txgraph: drop NEEDS_SPLIT_ACCEPTABLE (simplification) With the SFL algorithm, we will practically be capable of keeping most if not all clusters optimal. With that, it seems less valuable to avoid doing work after splitting an acceptable cluster, because by doing some work we may get it to OPTIMAL. This reduces the complexity of the code a bit as well. --- src/txgraph.cpp | 63 +++++++++++-------------------------------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 274b14a4750..492e13c8900 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -42,8 +42,6 @@ enum class QualityLevel OVERSIZED_SINGLETON, /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, - /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ - NEEDS_SPLIT_ACCEPTABLE, /** This cluster has undergone changes that warrant re-linearization. */ NEEDS_RELINEARIZE, /** The minimal level of linearization has been performed, but it is not known to be optimal. */ @@ -128,10 +126,9 @@ public: // Generic helper functions. /** Whether the linearization of this Cluster can be exposed. */ - bool IsAcceptable(bool after_split = false) const noexcept + bool IsAcceptable() const noexcept { - return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL || - (after_split && m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE); + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; } /** Whether the linearization of this Cluster is optimal. */ bool IsOptimal() const noexcept @@ -145,8 +142,7 @@ public: /** Whether this cluster requires splitting. */ bool NeedsSplitting() const noexcept { - return m_quality == QualityLevel::NEEDS_SPLIT || - m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + return m_quality == QualityLevel::NEEDS_SPLIT; } /** Get the smallest number of transactions this Cluster is intended for. */ @@ -1179,37 +1175,20 @@ void GenericClusterImpl::ApplyRemovals(TxGraphImpl& graph, int level, std::span< to_remove = to_remove.subspan(1); } while(!to_remove.empty()); - auto quality = m_quality; Assume(todo.Any()); // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries // removed, so we benefit from batching all the removals). m_depgraph.RemoveTransactions(todo); m_mapping.resize(m_depgraph.PositionRange()); - // First remove all removals at the end of the linearization. - while (!m_linearization.empty() && todo[m_linearization.back()]) { - todo.Reset(m_linearization.back()); - m_linearization.pop_back(); - } - if (todo.None()) { - // If no further removals remain, and thus all removals were at the end, we may be able - // to leave the cluster at a better quality level. - if (IsAcceptable(/*after_split=*/true)) { - quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; - } else { - quality = QualityLevel::NEEDS_SPLIT; - } - } else { - // If more removals remain, filter those out of m_linearization. - m_linearization.erase(std::remove_if( - m_linearization.begin(), - m_linearization.end(), - [&](auto pos) { return todo[pos]; }), m_linearization.end()); - quality = QualityLevel::NEEDS_SPLIT; - } + // Filter removed transactions out of m_linearization. + m_linearization.erase(std::remove_if(m_linearization.begin(), m_linearization.end(), + [&](auto pos) { return todo[pos]; }), + m_linearization.end()); + Compact(); graph.GetClusterSet(level).m_cluster_usage += TotalMemoryUsage(); - graph.SetClusterQuality(level, m_quality, m_setindex, quality); + graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); Updated(graph, level); } @@ -1354,17 +1333,7 @@ bool GenericClusterImpl::Split(TxGraphImpl& graph, int level) noexcept // This function can only be called when the Cluster needs splitting. Assume(NeedsSplitting()); // Determine the new quality the split-off Clusters will have. - QualityLevel new_quality = IsAcceptable(/*after_split=*/true) ? QualityLevel::ACCEPTABLE - : QualityLevel::NEEDS_RELINEARIZE; - // If we're going to produce ACCEPTABLE clusters (i.e., when in NEEDS_SPLIT_ACCEPTABLE), we - // need to post-linearize to make sure the split-out versions are all connected (as - // connectivity may have changed by removing part of the cluster). This could be done on each - // resulting split-out cluster separately, but it is simpler to do it once up front before - // splitting. This step is not necessary if the resulting clusters are NEEDS_RELINEARIZE, as - // they will be post-linearized anyway in MakeAcceptable(). - if (new_quality == QualityLevel::ACCEPTABLE) { - PostLinearize(m_depgraph, m_linearization); - } + QualityLevel new_quality = QualityLevel::NEEDS_RELINEARIZE; /** Which positions are still left in this Cluster. */ auto todo = m_depgraph.Positions(); /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and @@ -1771,11 +1740,9 @@ void TxGraphImpl::SplitAll(int up_to_level) noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(up_to_level); for (int level = 0; level <= up_to_level; ++level) { - for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = GetClusterSet(level).m_clusters[int(quality)]; - while (!queue.empty()) { - Split(*queue.back().get(), level); - } + auto& queue = GetClusterSet(level).m_clusters[int(QualityLevel::NEEDS_SPLIT)]; + while (!queue.empty()) { + Split(*queue.back().get(), level); } } } @@ -2640,10 +2607,8 @@ void GenericClusterImpl::SetFee(TxGraphImpl& graph, int level, DepGraphIndex idx m_depgraph.FeeRate(idx).fee = fee; if (m_quality == QualityLevel::OVERSIZED_SINGLETON) { // Nothing to do. - } else if (!NeedsSplitting()) { + } else if (IsAcceptable()) { graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); - } else { - graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph, level); } From 3380e0cbb59b2a893faabfa8f9aa18e582a11c2f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 16 Oct 2025 11:23:14 -0400 Subject: [PATCH 3/5] txgraph: use PostLinearize less prior to linearizing With the new SFL algorithm, the process of loading an existing linearization into the SFL state is very similar to what PostLinearize does. This means there is little benefit to performing an explicit PostLinearize step before linearizing inside txgraph. Instead, it seems better to use our allotted CPU time to perform more SFL optimization steps. --- src/txgraph.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 492e13c8900..368b5cf7fb4 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -1451,14 +1451,6 @@ void SingletonClusterImpl::Merge(TxGraphImpl&, int, Cluster&) noexcept void GenericClusterImpl::ApplyDependencies(TxGraphImpl& graph, int level, std::span> to_apply) noexcept { - // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters - // between which dependencies are added, which simply concatenates their linearizations. Invoke - // PostLinearize, which has the effect that the linearization becomes a merge-sort of the - // constituent linearizations. Do this here rather than in Cluster::Merge, because this - // function is only invoked once per merged Cluster, rather than once per constituent one. - // This concatenation + post-linearization could be replaced with an explicit merge-sort. - PostLinearize(m_depgraph, m_linearization); - // Sort the list of dependencies to apply by child, so those can be applied in batch. std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; }); // Iterate over groups of to-be-added dependencies with the same child. @@ -1484,9 +1476,8 @@ void GenericClusterImpl::ApplyDependencies(TxGraphImpl& graph, int level, std::s } // Finally fix the linearization, as the new dependencies may have invalidated the - // linearization, and post-linearize it to fix up the worst problems with it. + // linearization. FixLinearization(m_depgraph, m_linearization); - PostLinearize(m_depgraph, m_linearization); Assume(!NeedsSplitting()); Assume(!IsOversized()); if (IsAcceptable()) { From 34a77138b7a6f65a31c6273f2770e943cc0b474b Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 15 Oct 2025 22:52:52 -0400 Subject: [PATCH 4/5] txgraph: permit non-topological clusters to defer fixing (optimization) --- src/txgraph.cpp | 88 ++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 368b5cf7fb4..d76ee60e032 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -40,8 +40,12 @@ enum class QualityLevel /** This is a singleton cluster consisting of a transaction that individually exceeds the * cluster size limit. It cannot be merged with anything. */ OVERSIZED_SINGLETON, + /** This cluster may have multiple disconnected components, which are all NEEDS_FIX. */ + NEEDS_SPLIT_FIX, /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, + /** This cluster may be non-topological. */ + NEEDS_FIX, /** This cluster has undergone changes that warrant re-linearization. */ NEEDS_RELINEARIZE, /** The minimal level of linearization has been performed, but it is not known to be optimal. */ @@ -130,6 +134,11 @@ public: { return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; } + /** Whether the linearization of this Cluster is topological. */ + bool IsTopological() const noexcept + { + return m_quality != QualityLevel::NEEDS_FIX && m_quality != QualityLevel::NEEDS_SPLIT_FIX; + } /** Whether the linearization of this Cluster is optimal. */ bool IsOptimal() const noexcept { @@ -142,7 +151,7 @@ public: /** Whether this cluster requires splitting. */ bool NeedsSplitting() const noexcept { - return m_quality == QualityLevel::NEEDS_SPLIT; + return m_quality == QualityLevel::NEEDS_SPLIT || m_quality == QualityLevel::NEEDS_SPLIT_FIX; } /** Get the smallest number of transactions this Cluster is intended for. */ @@ -165,8 +174,9 @@ public: virtual DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept = 0; /** Add dependencies to a given child in this cluster. */ virtual void AddDependencies(SetType parents, DepGraphIndex child) noexcept = 0; - /** Invoke visitor_fn for each transaction in the cluster, in linearization order, then wipe this Cluster. */ - virtual void ExtractTransactions(const std::function& visit_fn) noexcept = 0; + /** Invoke visit1_fn for each transaction in the cluster, in linearization order, then + * visit2_fn in the same order, then wipe this Cluster. */ + virtual void ExtractTransactions(const std::function& visit1_fn, const std::function& visit2_fn) noexcept = 0; /** Figure out what level this Cluster exists at in the graph. In most cases this is known by * the caller already (see all "int level" arguments below), but not always. */ virtual int GetLevel(const TxGraphImpl& graph) const noexcept = 0; @@ -266,7 +276,7 @@ public: GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept final { return m_mapping[index]; } DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept final; void AddDependencies(SetType parents, DepGraphIndex child) noexcept final; - void ExtractTransactions(const std::function& visit_fn) noexcept final; + void ExtractTransactions(const std::function& visit1_fn, const std::function& visit2_fn) noexcept final; int GetLevel(const TxGraphImpl& graph) const noexcept final; void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept final { m_mapping[cluster_idx] = graph_idx; } void Updated(TxGraphImpl& graph, int level) noexcept final; @@ -322,7 +332,7 @@ public: GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept final { Assume(index == 0); Assume(GetTxCount()); return m_graph_index; } DepGraphIndex AppendTransaction(GraphIndex graph_idx, FeePerWeight feerate) noexcept final; void AddDependencies(SetType parents, DepGraphIndex child) noexcept final; - void ExtractTransactions(const std::function& visit_fn) noexcept final; + void ExtractTransactions(const std::function& visit1_fn, const std::function& visit2_fn) noexcept final; int GetLevel(const TxGraphImpl& graph) const noexcept final; void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept final { Assume(cluster_idx == 0); m_graph_index = graph_idx; } void Updated(TxGraphImpl& graph, int level) noexcept final; @@ -913,10 +923,13 @@ void SingletonClusterImpl::AddDependencies(SetType parents, DepGraphIndex child) Assume(parents == SetType{} || parents == SetType::Fill(0)); } -void GenericClusterImpl::ExtractTransactions(const std::function& visit_fn) noexcept +void GenericClusterImpl::ExtractTransactions(const std::function& visit1_fn, const std::function& visit2_fn) noexcept { for (auto pos : m_linearization) { - visit_fn(pos, m_mapping[pos], FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(pos)), m_depgraph.GetReducedParents(pos)); + visit1_fn(pos, m_mapping[pos], FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(pos))); + } + for (auto pos : m_linearization) { + visit2_fn(pos, m_mapping[pos], m_depgraph.GetReducedParents(pos)); } // Purge this Cluster, now that everything has been moved. m_depgraph = DepGraph{}; @@ -924,10 +937,11 @@ void GenericClusterImpl::ExtractTransactions(const std::function& visit_fn) noexcept +void SingletonClusterImpl::ExtractTransactions(const std::function& visit1_fn, const std::function& visit2_fn) noexcept { if (GetTxCount()) { - visit_fn(0, m_graph_index, m_feerate, SetType{}); + visit1_fn(0, m_graph_index, m_feerate); + visit2_fn(0, m_graph_index, SetType{}); m_graph_index = NO_GRAPH_INDEX; } } @@ -1188,7 +1202,8 @@ void GenericClusterImpl::ApplyRemovals(TxGraphImpl& graph, int level, std::span< Compact(); graph.GetClusterSet(level).m_cluster_usage += TotalMemoryUsage(); - graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + auto new_quality = IsTopological() ? QualityLevel::NEEDS_SPLIT : QualityLevel::NEEDS_SPLIT_FIX; + graph.SetClusterQuality(level, m_quality, m_setindex, new_quality); Updated(graph, level); } @@ -1290,6 +1305,7 @@ void SingletonClusterImpl::AppendChunkFeerates(std::vector& ret) const uint64_t GenericClusterImpl::AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept { + Assume(IsAcceptable()); auto linchunking = ChunkLinearizationInfo(m_depgraph, m_linearization); LinearizationIndex pos{0}; uint64_t size{0}; @@ -1333,7 +1349,7 @@ bool GenericClusterImpl::Split(TxGraphImpl& graph, int level) noexcept // This function can only be called when the Cluster needs splitting. Assume(NeedsSplitting()); // Determine the new quality the split-off Clusters will have. - QualityLevel new_quality = QualityLevel::NEEDS_RELINEARIZE; + QualityLevel new_quality = IsTopological() ? QualityLevel::NEEDS_RELINEARIZE : QualityLevel::NEEDS_FIX; /** Which positions are still left in this Cluster. */ auto todo = m_depgraph.Positions(); /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and @@ -1345,7 +1361,7 @@ bool GenericClusterImpl::Split(TxGraphImpl& graph, int level) noexcept while (todo.Any()) { auto component = m_depgraph.FindConnectedComponent(todo); auto component_size = component.Count(); - auto split_quality = component_size <= 2 ? QualityLevel::OPTIMAL : new_quality; + auto split_quality = component_size <= 1 ? QualityLevel::OPTIMAL : new_quality; if (first && component == todo && SetType::Fill(component_size) == component && component_size >= MIN_INTENDED_TX_COUNT) { // The existing Cluster is an entire component, without holes. Leave it be, but update // its quality. If there are holes, we continue, so that the Cluster is reconstructed @@ -1415,7 +1431,7 @@ void GenericClusterImpl::Merge(TxGraphImpl& graph, int level, Cluster& other) no /** Vector to store the positions in this Cluster for each position in other. */ std::vector remap(other.GetDepGraphIndexRange()); // Iterate over all transactions in the other Cluster (the one being absorbed). - other.ExtractTransactions([&](DepGraphIndex pos, GraphIndex idx, FeePerWeight feerate, SetType other_parents) noexcept { + other.ExtractTransactions([&](DepGraphIndex pos, GraphIndex idx, FeePerWeight feerate) noexcept { // Copy the transaction into this Cluster, and remember its position. auto new_pos = m_depgraph.AddTransaction(feerate); // Since this cluster must have been made hole-free before being merged into, all added @@ -1424,9 +1440,8 @@ void GenericClusterImpl::Merge(TxGraphImpl& graph, int level, Cluster& other) no remap[pos] = new_pos; m_mapping.push_back(idx); m_linearization.push_back(new_pos); - // Copy the transaction's dependencies, translating them using remap. Note that since - // pos iterates in linearization order, which is topological, all parents of pos should - // already be in remap. + }, [&](DepGraphIndex pos, GraphIndex idx, SetType other_parents) noexcept { + // Copy the transaction's dependencies, translating them using remap. SetType parents; for (auto par : other_parents) { parents.Set(remap[par]); @@ -1439,7 +1454,7 @@ void GenericClusterImpl::Merge(TxGraphImpl& graph, int level, Cluster& other) no // Discard any potential ChunkData prior to modifying the Cluster (as that could // invalidate its ordering). if (level == 0) graph.ClearChunkData(entry); - entry.m_locator[level].SetPresent(this, new_pos); + entry.m_locator[level].SetPresent(this, remap[pos]); }); } @@ -1475,14 +1490,11 @@ void GenericClusterImpl::ApplyDependencies(TxGraphImpl& graph, int level, std::s m_depgraph.AddDependencies(parents, child_idx); } - // Finally fix the linearization, as the new dependencies may have invalidated the - // linearization. - FixLinearization(m_depgraph, m_linearization); + // Finally set the cluster to NEEDS_FIX, so its linearization is fixed the next time it is + // attempted to be made ACCEPTABLE. Assume(!NeedsSplitting()); Assume(!IsOversized()); - if (IsAcceptable()) { - graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); - } + graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_FIX); // Finally push the changes to graph.m_entries. Updated(graph, level); @@ -1731,9 +1743,11 @@ void TxGraphImpl::SplitAll(int up_to_level) noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(up_to_level); for (int level = 0; level <= up_to_level; ++level) { - auto& queue = GetClusterSet(level).m_clusters[int(QualityLevel::NEEDS_SPLIT)]; - while (!queue.empty()) { - Split(*queue.back().get(), level); + for (auto quality : {QualityLevel::NEEDS_SPLIT_FIX, QualityLevel::NEEDS_SPLIT}) { + auto& queue = GetClusterSet(level).m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get(), level); + } } } } @@ -2047,7 +2061,7 @@ std::pair GenericClusterImpl::Relinearize(TxGraphImpl& graph, in if (IsOptimal()) return {0, false}; // Invoke the actual linearization algorithm (passing in the existing one). uint64_t rng_seed = graph.m_rng.rand64(); - auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization); + auto [linearization, optimal, cost] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization, /*is_topological=*/IsTopological()); // Postlinearize to undo some of the non-determinism caused by randomizing the linearization. // This also guarantees that all chunks are connected (which is not guaranteed by SFL // currently, even when optimal). @@ -2062,6 +2076,9 @@ std::pair GenericClusterImpl::Relinearize(TxGraphImpl& graph, in } else if (max_iters >= graph.m_acceptable_iters && !IsAcceptable()) { graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::ACCEPTABLE); improved = true; + } else if (!IsTopological()) { + graph.SetClusterQuality(level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + improved = true; } // Update the Entry objects. Updated(graph, level); @@ -2089,9 +2106,11 @@ void TxGraphImpl::MakeAllAcceptable(int level) noexcept ApplyDependencies(level); auto& clusterset = GetClusterSet(level); if (clusterset.m_oversized == true) return; - auto& queue = clusterset.m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)]; - while (!queue.empty()) { - MakeAcceptable(*queue.back().get(), level); + for (auto quality : {QualityLevel::NEEDS_FIX, QualityLevel::NEEDS_RELINEARIZE}) { + auto& queue = clusterset.m_clusters[int(quality)]; + while (!queue.empty()) { + MakeAcceptable(*queue.back().get(), level); + } } } @@ -2727,7 +2746,9 @@ void GenericClusterImpl::SanityCheck(const TxGraphImpl& graph, int level) const const auto& entry = graph.m_entries[m_mapping[lin_pos]]; // Check that the linearization is topological. m_done.Set(lin_pos); - assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + if (IsTopological()) { + assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + } // Check that the Entry has a locator pointing back to this Cluster & position within it. assert(entry.m_locator[level].cluster == this); assert(entry.m_locator[level].index == lin_pos); @@ -2966,7 +2987,7 @@ bool TxGraphImpl::DoWork(uint64_t iters) noexcept uint64_t iters_done{0}; // First linearize everything in NEEDS_RELINEARIZE to an acceptable level. If more budget // remains after that, try to make everything optimal. - for (QualityLevel quality : {QualityLevel::NEEDS_RELINEARIZE, QualityLevel::ACCEPTABLE}) { + for (QualityLevel quality : {QualityLevel::NEEDS_FIX, QualityLevel::NEEDS_RELINEARIZE, QualityLevel::ACCEPTABLE}) { // First linearize staging, if it exists, then main. for (int level = GetTopLevel(); level >= 0; --level) { // Do not modify main if it has any observers. @@ -2983,7 +3004,7 @@ bool TxGraphImpl::DoWork(uint64_t iters) noexcept // one. auto pos = m_rng.randrange(queue.size()); auto iters_now = iters - iters_done; - if (quality == QualityLevel::NEEDS_RELINEARIZE) { + if (quality == QualityLevel::NEEDS_FIX || quality == QualityLevel::NEEDS_RELINEARIZE) { // If we're working with clusters that need relinearization still, only perform // up to m_acceptable_iters iterations. If they become ACCEPTABLE, and we still // have budget after all other clusters are ACCEPTABLE too, we'll spend the @@ -3245,6 +3266,7 @@ std::vector TxGraphImpl::Trim() noexcept .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); uint64_t size{0}; for (Cluster* cluster : cluster_span) { + MakeAcceptable(*cluster, cluster->GetLevel(*this)); size += cluster->AppendTrimData(trim_data, deps_by_child); } // If this group of Clusters does not violate any limits, continue to the next group. From 1808b5aaf7c4910122fcd088e03d790189907438 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 16 Sep 2025 13:36:36 -0400 Subject: [PATCH 5/5] clusterlin: remove unused FixLinearization (cleanup) --- src/cluster_linearize.h | 10 ------- src/test/fuzz/cluster_linearize.cpp | 44 ----------------------------- 2 files changed, 54 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 9b7fd6a2f33..c397e879de9 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1575,16 +1575,6 @@ void PostLinearize(const DepGraph& depgraph, std::span l } } -/** Make linearization topological, reusing information from the old linearization where possible. */ -template -void FixLinearization(const DepGraph& depgraph, std::span linearization) noexcept -{ - // TODO: update call sites to use Linearize directly. - auto [new_lin, _opt, _steps] = Linearize(depgraph, /*max_iterations=*/0, /*rng_seed=*/0, linearization, /*is_topological=*/false); - Assume(new_lin.size() == linearization.size()); - std::copy(new_lin.begin(), new_lin.end(), linearization.begin()); -} - } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 9bd4280512f..cb5267a4ff0 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -58,8 +58,6 @@ * - clusterlin_postlinearize * - clusterlin_postlinearize_tree * - clusterlin_postlinearize_moved_leaf - * - FixLinearization tests: - * - clusterlin_fix_linearization * - MakeConnected tests (a test-only function): * - clusterlin_make_connected */ @@ -1202,45 +1200,3 @@ FUZZ_TARGET(clusterlin_postlinearize_moved_leaf) auto cmp = CompareChunks(new_chunking, old_chunking); assert(cmp >= 0); } - -FUZZ_TARGET(clusterlin_fix_linearization) -{ - // Verify expected properties of FixLinearization() on arbitrary linearizations. - - // Retrieve a depgraph from the fuzz input. - SpanReader reader(buffer); - DepGraph depgraph; - try { - reader >> Using(depgraph); - } catch (const std::ios_base::failure&) {} - - // Construct an arbitrary linearization (not necessarily topological for depgraph). - std::vector linearization = ReadLinearization(depgraph, reader, /*topological=*/false); - assert(linearization.size() == depgraph.TxCount()); - - // Determine what prefix of linearization is topological, i.e., the position of the first entry - // in linearization which corresponds to a transaction that is not preceded by all its - // ancestors. - size_t topo_prefix = 0; - auto todo = depgraph.Positions(); - while (topo_prefix < linearization.size()) { - DepGraphIndex idx = linearization[topo_prefix]; - todo.Reset(idx); - if (todo.Overlaps(depgraph.Ancestors(idx))) break; - ++topo_prefix; - } - - // Then make a fixed copy of linearization. - auto linearization_fixed = linearization; - FixLinearization(depgraph, linearization_fixed); - // Sanity check it (which includes testing whether it is topological). - SanityCheck(depgraph, linearization_fixed); - - // If linearization was entirely topological, FixLinearization cannot worsen it. - if (topo_prefix == linearization.size()) { - auto chunking = ChunkLinearization(depgraph, linearization); - auto chunking_fixed = ChunkLinearization(depgraph, linearization_fixed); - auto cmp = CompareChunks(chunking_fixed, chunking); - assert(cmp >= 0); - } -}