From a52b53926b5c6a5b92255435e3c204cdf18665a2 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 27 Mar 2025 10:01:51 -0400 Subject: [PATCH] clusterlin: add GetConnectedComponent This abstracts out the finding of the connected component that includes a given element from FindConnectedComponent (which just finds any connected component). Use this in the txgraph fuzz test, which was effectively reimplementing this logic. At the same time, improve its performance by replacing a vector with a set. --- src/cluster_linearize.h | 26 +++++++++++++++++++------- src/test/fuzz/cluster_linearize.cpp | 21 +++++++++++++++++++-- src/test/fuzz/txgraph.cpp | 25 ++++++------------------- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index ad6f1a76032..217c4700afe 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -250,10 +250,8 @@ public: return ret; } - /** Find some connected component within the subset "todo" of this graph. - * - * Specifically, this finds the connected component which contains the first transaction of - * todo (if any). + /** Get the connected component within the subset "todo" that contains tx (which must be in + * todo). * * Two transactions are considered connected if they are both in `todo`, and one is an ancestor * of the other in the entire graph (so not just within `todo`), or transitively there is a @@ -262,10 +260,11 @@ public: * * Complexity: O(ret.Count()). */ - SetType FindConnectedComponent(const SetType& todo) const noexcept + SetType GetConnectedComponent(const SetType& todo, DepGraphIndex tx) const noexcept { - if (todo.None()) return todo; - auto to_add = SetType::Singleton(todo.First()); + Assume(todo[tx]); + Assume(todo.IsSubsetOf(m_used)); + auto to_add = SetType::Singleton(tx); SetType ret; do { SetType old = ret; @@ -279,6 +278,19 @@ public: return ret; } + /** Find some connected component within the subset "todo" of this graph. + * + * Specifically, this finds the connected component which contains the first transaction of + * todo (if any). + * + * Complexity: O(ret.Count()). + */ + SetType FindConnectedComponent(const SetType& todo) const noexcept + { + if (todo.None()) return todo; + return GetConnectedComponent(todo, todo.First()); + } + /** Determine if a subset is connected. * * Complexity: O(subset.Count()). diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index c7e40a833da..fb4bf3a719f 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -446,19 +446,36 @@ FUZZ_TARGET(clusterlin_components) // Construct a depgraph. SpanReader reader(buffer); DepGraph depgraph; + std::vector linearization; try { reader >> Using(depgraph); } catch (const std::ios_base::failure&) {} TestBitSet todo = depgraph.Positions(); while (todo.Any()) { - // Find a connected component inside todo. - auto component = depgraph.FindConnectedComponent(todo); + // Pick a transaction in todo, or nothing. + std::optional picked; + { + uint64_t picked_num{0}; + try { + reader >> VARINT(picked_num); + } catch (const std::ios_base::failure&) {} + if (picked_num < todo.Size() && todo[picked_num]) { + picked = picked_num; + } + } + + // Find a connected component inside todo, including picked if any. + auto component = picked ? depgraph.GetConnectedComponent(todo, *picked) + : depgraph.FindConnectedComponent(todo); // The component must be a subset of todo and non-empty. assert(component.IsSubsetOf(todo)); assert(component.Any()); + // If picked was provided, the component must include it. + if (picked) assert(component[*picked]); + // If todo is the entire graph, and the entire graph is connected, then the component must // be the entire graph. if (todo == depgraph.Positions()) { diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 010c9e951ed..7eabb013e2b 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -561,36 +561,23 @@ FUZZ_TARGET(txgraph) std::shuffle(refs.begin(), refs.end(), rng); // Invoke the real function. auto result = real->CountDistinctClusters(refs, use_main); - // Build a vector with representatives of the clusters the Refs occur in in the + // Build a set with representatives of the clusters the Refs occur in in the // simulated graph. For each, remember the lowest-index transaction SimPos in the // cluster. - std::vector sim_reps; + SimTxGraph::SetType sim_reps; for (auto ref : refs) { // Skip Refs that do not occur in the simulated graph. auto simpos = sel_sim.Find(ref); if (simpos == SimTxGraph::MISSING) continue; - // Start with component equal to just the Ref's SimPos. - auto component = SimTxGraph::SetType::Singleton(simpos); - // Keep adding ancestors/descendants of all elements in component until it no - // longer changes. - while (true) { - auto old_component = component; - for (auto i : component) { - component |= sel_sim.graph.Ancestors(i); - component |= sel_sim.graph.Descendants(i); - } - if (component == old_component) break; - } + // Find the component that includes ref. + auto component = sel_sim.graph.GetConnectedComponent(sel_sim.graph.Positions(), simpos); // Remember the lowest-index SimPos in component, as a representative for it. assert(component.Any()); - sim_reps.push_back(component.First()); + sim_reps.Set(component.First()); } - // Remove duplicates from sim_reps. - std::sort(sim_reps.begin(), sim_reps.end()); - sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end()); // Compare the number of deduplicated representatives with the value returned by // the real function. - assert(result == sim_reps.size()); + assert(result == sim_reps.Count()); break; } else if (command-- == 0) { // DoWork.