From 965892096f556decb38b295587121f4aaf75400f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 25 Oct 2024 14:11:50 -0400 Subject: [PATCH 01/30] clusterlin: add FixLinearization function + fuzz test This function takes an existing ordering for transactions in a DepGraph, and makes it a valid linearization for it (i.e., topological). Any topological prefix of the input remains untouched. --- src/cluster_linearize.h | 32 +++++++++++++++ src/test/fuzz/cluster_linearize.cpp | 62 +++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 50b121d9e4c..43f16160f5d 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -1336,6 +1336,38 @@ std::vector MergeLinearizations(const DepGraph& depgraph, return ret; } +/** Make linearization topological, retaining its ordering where possible. */ +template +void FixLinearization(const DepGraph& depgraph, Span linearization) noexcept +{ + // This algorithm can be summarized as moving every element in the linearization backwards + // until it is placed after all its ancestors. + SetType done; + const auto len = linearization.size(); + // Iterate over the elements of linearization from back to front (i is distance from back). + for (ClusterIndex i = 0; i < len; ++i) { + /** The element at that position. */ + ClusterIndex elem = linearization[len - 1 - i]; + /** j represents how far from the back of the linearization elem should be placed. */ + ClusterIndex j = i; + // Figure out which elements need to be moved before elem. + SetType place_before = done & depgraph.Ancestors(elem); + // Find which position to place elem in (updating j), continuously moving the elements + // in between forward. + while (place_before.Any()) { + // j cannot be 0 here; if it was, then there was necessarily nothing earlier which + // elem needs to be place before anymore, and place_before would be empty. + Assume(j > 0); + auto to_swap = linearization[len - 1 - (j - 1)]; + place_before.Reset(to_swap); + linearization[len - 1 - (j--)] = to_swap; + } + // Put elem in its final position and mark it as done. + linearization[len - 1 - j] = elem; + done.Set(elem); + } +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 5b3770636ab..de066237b2a 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -1118,3 +1118,65 @@ FUZZ_TARGET(clusterlin_merge) auto cmp2 = CompareChunks(chunking_merged, chunking2); assert(cmp2 >= 0); } + +FUZZ_TARGET(clusterlin_fix_linearization) +{ + // Verify expected properties of FixLinearization() on arbitrary linearizations. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Construct an arbitrary linearization (not necessarily topological for depgraph). + std::vector linearization; + /** Which transactions of depgraph are yet to be included in linearization. */ + TestBitSet todo = depgraph.Positions(); + while (todo.Any()) { + // Read a number from the fuzz input in range [0, todo.Count()). + uint64_t val{0}; + try { + reader >> VARINT(val); + } catch (const std::ios_base::failure&) {} + val %= todo.Count(); + // Find the val'th element in todo, remove it from todo, and append it to linearization. + for (auto idx : todo) { + if (val == 0) { + linearization.push_back(idx); + todo.Reset(idx); + break; + } + --val; + } + } + assert(linearization.size() == depgraph.TxCount()); + + // Determine what prefix of linearization is topological, i.e., the position of the first entry + // in linearization which corresponds to a transaction that is not preceded by all its + // ancestors. + size_t topo_prefix = 0; + todo = depgraph.Positions(); + while (topo_prefix < linearization.size()) { + ClusterIndex idx = linearization[topo_prefix]; + todo.Reset(idx); + if (todo.Overlaps(depgraph.Ancestors(idx))) break; + ++topo_prefix; + } + + // Then make a fixed copy of linearization. + auto linearization_fixed = linearization; + FixLinearization(depgraph, linearization_fixed); + // Sanity check it (which includes testing whether it is topological). + SanityCheck(depgraph, linearization_fixed); + + // FixLinearization does not modify the topological prefix of linearization. + assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix, + linearization_fixed.begin())); + // This also means that if linearization was entirely topological, FixLinearization cannot have + // modified it. This is implied by the assertion above already, but repeat it explicitly. + if (topo_prefix == linearization.size()) { + assert(linearization == linearization_fixed); + } +} From 49818df2ac80e2eade1b70d3ea0b2da1e8c3145c Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 12 Nov 2024 15:13:58 -0500 Subject: [PATCH 02/30] clusterlin: make IsAcyclic() a DepGraph member function ... instead of being a separate test-only function. Also add a fuzz test for it returning false. --- src/cluster_linearize.h | 11 ++++++++++ src/test/fuzz/cluster_linearize.cpp | 33 +++++++++++++++++++++++++++-- src/test/util/cluster_linearize.h | 14 +----------- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 43f16160f5d..b32724a1711 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -309,6 +309,17 @@ public: return a < b; }); } + + /** Check if this graph is acyclic. */ + bool IsAcyclic() const noexcept + { + for (auto i : Positions()) { + if ((Ancestors(i) & Descendants(i)) != SetType::Singleton(i)) { + return false; + } + } + return true; + } }; /** A set of transactions together with their aggregate feerate. */ diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index de066237b2a..f5c0c897c98 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -401,13 +401,42 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Construct a graph by deserializing. SpanReader reader(buffer); DepGraph depgraph; + ClusterIndex par_code{0}, chl_code{0}; try { - reader >> Using(depgraph); + reader >> Using(depgraph) >> VARINT(par_code) >> VARINT(chl_code); } catch (const std::ios_base::failure&) {} SanityCheck(depgraph); // Verify the graph is a DAG. - assert(IsAcyclic(depgraph)); + assert(depgraph.IsAcyclic()); + + // Introduce a cycle, and then test that IsAcyclic returns false. + if (depgraph.TxCount() < 2) return; + ClusterIndex par(0), chl(0); + // Pick any transaction of depgraph as parent. + par_code %= depgraph.TxCount(); + for (auto i : depgraph.Positions()) { + if (par_code == 0) { + par = i; + break; + } + --par_code; + } + // Pick any ancestor of par (excluding itself) as child, if any. + auto ancestors = depgraph.Ancestors(par) - TestBitSet::Singleton(par); + if (ancestors.None()) return; + chl_code %= ancestors.Count(); + for (auto i : ancestors) { + if (chl_code == 0) { + chl = i; + break; + } + --chl_code; + } + // Add the cycle-introducing dependency. + depgraph.AddDependencies(TestBitSet::Singleton(par), chl); + // Check that we now detect a cycle. + assert(!depgraph.IsAcyclic()); } FUZZ_TARGET(clusterlin_components) diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 871aa9d74ed..cdde421637a 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -23,18 +23,6 @@ using namespace cluster_linearize; using TestBitSet = BitSet<32>; -/** Check if a graph is acyclic. */ -template -bool IsAcyclic(const DepGraph& depgraph) noexcept -{ - for (ClusterIndex i : depgraph.Positions()) { - if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) { - return false; - } - } - return true; -} - /** A formatter for a bespoke serialization for acyclic DepGraph objects. * * The serialization format outputs information about transactions in a topological order (parents @@ -337,7 +325,7 @@ void SanityCheck(const DepGraph& depgraph) assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child))); } } - if (IsAcyclic(depgraph)) { + if (depgraph.IsAcyclic()) { // If DepGraph is acyclic, serialize + deserialize must roundtrip. std::vector ser; VectorWriter writer(ser, 0); From 8872339583446bf206a9500af6695fd52b215532 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 31 Jan 2025 16:26:06 -0500 Subject: [PATCH 03/30] clusterlin: (refactor) ClusterIndex -> DepGraphIndex Since cluster_linearize.h does not actually have a Cluster type anymore, it is more appropriate to rename the index type to DepGraphIndex. --- src/bench/cluster_linearize.cpp | 44 ++++----- src/cluster_linearize.h | 132 +++++++++++++-------------- src/test/cluster_linearize_tests.cpp | 4 +- src/test/fuzz/cluster_linearize.cpp | 70 +++++++------- src/test/util/cluster_linearize.h | 36 ++++---- 5 files changed, 143 insertions(+), 143 deletions(-) diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp index 7d011975ddb..cb06f3fc28a 100644 --- a/src/bench/cluster_linearize.cpp +++ b/src/bench/cluster_linearize.cpp @@ -23,10 +23,10 @@ namespace { * remaining transaction, whose removal requires updating all remaining transactions' ancestor * set feerates. */ template -DepGraph MakeLinearGraph(ClusterIndex ntx) +DepGraph MakeLinearGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({-int32_t(i), 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(i - 1), i); } @@ -38,10 +38,10 @@ DepGraph MakeLinearGraph(ClusterIndex ntx) * rechunking is needed after every candidate (the last transaction gets picked every time). */ template -DepGraph MakeWideGraph(ClusterIndex ntx) +DepGraph MakeWideGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({int32_t(i) + 1, 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(0), i); } @@ -51,10 +51,10 @@ DepGraph MakeWideGraph(ClusterIndex ntx) // Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the implemented // algorithm (purely empirically determined). template -DepGraph MakeHardGraph(ClusterIndex ntx) +DepGraph MakeHardGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { if (ntx & 1) { // Odd cluster size. // @@ -121,7 +121,7 @@ DepGraph MakeHardGraph(ClusterIndex ntx) * iterations difference. */ template -void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) +void BenchLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) { const auto depgraph = MakeHardGraph(ntx); uint64_t rng_seed = 0; @@ -147,12 +147,12 @@ void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t * cheap. */ template -void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseAnc(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeLinearGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); @@ -167,41 +167,41 @@ void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench * AncestorCandidateFinder is cheap. */ template -void BenchLinearizeNoItersWorstCaseLIMO(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseLIMO(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeWideGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); } template -void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchPostLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph = MakeWideGraph(ntx); - std::vector lin(ntx); + std::vector lin(ntx); bench.run([&] { - for (ClusterIndex i = 0; i < ntx; ++i) lin[i] = i; + for (DepGraphIndex i = 0; i < ntx; ++i) lin[i] = i; PostLinearize(depgraph, lin); }); } template -void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchMergeLinearizationsWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({i, 1}); if (i) depgraph.AddDependencies(SetType::Singleton(0), i); } - std::vector lin1; - std::vector lin2; + std::vector lin1; + std::vector lin2; lin1.push_back(0); lin2.push_back(0); - for (ClusterIndex i = 1; i < ntx; ++i) { + for (DepGraphIndex i = 1; i < ntx; ++i) { lin1.push_back(i); lin2.push_back(ntx - i); } @@ -214,7 +214,7 @@ template void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array& serialized) { // Determine how many transactions the serialized cluster has. - ClusterIndex num_tx{0}; + DepGraphIndex num_tx{0}; { SpanReader reader{serialized}; DepGraph> depgraph; diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index b32724a1711..d5a6c24dc99 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -19,8 +19,8 @@ namespace cluster_linearize { -/** Data type to represent transaction indices in clusters. */ -using ClusterIndex = uint32_t; +/** Data type to represent transaction indices in DepGraphs and the clusters they represent. */ +using DepGraphIndex = uint32_t; /** Data structure that holds a transaction graph's preprocessed data (fee, size, ancestors, * descendants). */ @@ -86,11 +86,11 @@ public: * * Complexity: O(N^2) where N=depgraph.TxCount(). */ - DepGraph(const DepGraph& depgraph, Span mapping, ClusterIndex pos_range) noexcept : entries(pos_range) + DepGraph(const DepGraph& depgraph, Span mapping, DepGraphIndex pos_range) noexcept : entries(pos_range) { Assume(mapping.size() == depgraph.PositionRange()); Assume((pos_range == 0) == (depgraph.TxCount() == 0)); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { auto new_idx = mapping[i]; Assume(new_idx < pos_range); // Add transaction. @@ -100,7 +100,7 @@ public: // Fill in fee and size. entries[new_idx].feerate = depgraph.entries[i].feerate; } - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Fill in dependencies by mapping direct parents. SetType parents; for (auto j : depgraph.GetReducedParents(i)) parents.Set(mapping[j]); @@ -113,29 +113,29 @@ public: /** Get the set of transactions positions in use. Complexity: O(1). */ const SetType& Positions() const noexcept { return m_used; } /** Get the range of positions in this DepGraph. All entries in Positions() are in [0, PositionRange() - 1]. */ - ClusterIndex PositionRange() const noexcept { return entries.size(); } + DepGraphIndex PositionRange() const noexcept { return entries.size(); } /** Get the number of transactions in the graph. Complexity: O(1). */ auto TxCount() const noexcept { return m_used.Count(); } /** Get the feerate of a given transaction i. Complexity: O(1). */ - const FeeFrac& FeeRate(ClusterIndex i) const noexcept { return entries[i].feerate; } + const FeeFrac& FeeRate(DepGraphIndex i) const noexcept { return entries[i].feerate; } /** Get the mutable feerate of a given transaction i. Complexity: O(1). */ - FeeFrac& FeeRate(ClusterIndex i) noexcept { return entries[i].feerate; } + FeeFrac& FeeRate(DepGraphIndex i) noexcept { return entries[i].feerate; } /** Get the ancestors of a given transaction i. Complexity: O(1). */ - const SetType& Ancestors(ClusterIndex i) const noexcept { return entries[i].ancestors; } + const SetType& Ancestors(DepGraphIndex i) const noexcept { return entries[i].ancestors; } /** Get the descendants of a given transaction i. Complexity: O(1). */ - const SetType& Descendants(ClusterIndex i) const noexcept { return entries[i].descendants; } + const SetType& Descendants(DepGraphIndex i) const noexcept { return entries[i].descendants; } /** Add a new unconnected transaction to this transaction graph (in the first available - * position), and return its ClusterIndex. + * position), and return its DepGraphIndex. * * Complexity: O(1) (amortized, due to resizing of backing vector). */ - ClusterIndex AddTransaction(const FeeFrac& feefrac) noexcept + DepGraphIndex AddTransaction(const FeeFrac& feefrac) noexcept { static constexpr auto ALL_POSITIONS = SetType::Fill(SetType::Size()); auto available = ALL_POSITIONS - m_used; Assume(available.Any()); - ClusterIndex new_idx = available.First(); + DepGraphIndex new_idx = available.First(); if (new_idx == entries.size()) { entries.emplace_back(feefrac, SetType::Singleton(new_idx), SetType::Singleton(new_idx)); } else { @@ -174,7 +174,7 @@ public: * * Complexity: O(N) where N=TxCount(). */ - void AddDependencies(const SetType& parents, ClusterIndex child) noexcept + void AddDependencies(const SetType& parents, DepGraphIndex child) noexcept { Assume(m_used[child]); Assume(parents.IsSubsetOf(m_used)); @@ -205,7 +205,7 @@ public: * * Complexity: O(N) where N=Ancestors(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedParents(ClusterIndex i) const noexcept + SetType GetReducedParents(DepGraphIndex i) const noexcept { SetType parents = Ancestors(i); parents.Reset(i); @@ -226,7 +226,7 @@ public: * * Complexity: O(N) where N=Descendants(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedChildren(ClusterIndex i) const noexcept + SetType GetReducedChildren(DepGraphIndex i) const noexcept { SetType children = Descendants(i); children.Reset(i); @@ -298,11 +298,11 @@ public: * * Complexity: O(select.Count() * log(select.Count())). */ - void AppendTopo(std::vector& list, const SetType& select) const noexcept + void AppendTopo(std::vector& list, const SetType& select) const noexcept { - ClusterIndex old_len = list.size(); + DepGraphIndex old_len = list.size(); for (auto i : select) list.push_back(i); - std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept { + std::sort(list.begin() + old_len, list.end(), [&](DepGraphIndex a, DepGraphIndex b) noexcept { const auto a_anc_count = entries[a].ancestors.Count(); const auto b_anc_count = entries[b].ancestors.Count(); if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count; @@ -338,7 +338,7 @@ struct SetInfo SetInfo(const SetType& txn, const FeeFrac& fr) noexcept : transactions(txn), feerate(fr) {} /** Construct a SetInfo for a given transaction in a depgraph. */ - explicit SetInfo(const DepGraph& depgraph, ClusterIndex pos) noexcept : + explicit SetInfo(const DepGraph& depgraph, DepGraphIndex pos) noexcept : transactions(SetType::Singleton(pos)), feerate(depgraph.FeeRate(pos)) {} /** Construct a SetInfo for a set of transactions in a depgraph. */ @@ -346,7 +346,7 @@ struct SetInfo transactions(txn), feerate(depgraph.FeeRate(txn)) {} /** Add a transaction to this SetInfo (which must not yet be in it). */ - void Set(const DepGraph& depgraph, ClusterIndex pos) noexcept + void Set(const DepGraph& depgraph, DepGraphIndex pos) noexcept { Assume(!transactions[pos]); transactions.Set(pos); @@ -382,10 +382,10 @@ struct SetInfo /** Compute the feerates of the chunks of linearization. */ template -std::vector ChunkLinearization(const DepGraph& depgraph, Span linearization) noexcept +std::vector ChunkLinearization(const DepGraph& depgraph, Span linearization) noexcept { std::vector ret; - for (ClusterIndex i : linearization) { + for (DepGraphIndex i : linearization) { /** The new chunk to be added, initially a singleton. */ auto new_chunk = depgraph.FeeRate(i); // As long as the new chunk has a higher feerate than the last chunk so far, absorb it. @@ -407,13 +407,13 @@ class LinearizationChunking const DepGraph& m_depgraph; /** The linearization we started from, possibly with removed prefix stripped. */ - Span m_linearization; + Span m_linearization; /** Chunk sets and their feerates, of what remains of the linearization. */ std::vector> m_chunks; /** How large a prefix of m_chunks corresponds to removed transactions. */ - ClusterIndex m_chunks_skip{0}; + DepGraphIndex m_chunks_skip{0}; /** Which transactions remain in the linearization. */ SetType m_todo; @@ -448,7 +448,7 @@ class LinearizationChunking public: /** Initialize a LinearizationSubset object for a given length of linearization. */ - explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, Span lin LIFETIMEBOUND) noexcept : + explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, Span lin LIFETIMEBOUND) noexcept : m_depgraph(depgraph), m_linearization(lin) { // Mark everything in lin as todo still. @@ -459,10 +459,10 @@ public: } /** Determine how many chunks remain in the linearization. */ - ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } + DepGraphIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ - const SetInfo& GetChunk(ClusterIndex n) const noexcept + const SetInfo& GetChunk(DepGraphIndex n) const noexcept { Assume(n + m_chunks_skip < m_chunks.size()); return m_chunks[n + m_chunks_skip]; @@ -505,7 +505,7 @@ public: Assume(subset.transactions.IsSubsetOf(m_todo)); SetInfo accumulator; // Iterate over all chunks of the remaining linearization. - for (ClusterIndex i = 0; i < NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < NumChunksLeft(); ++i) { // Find what (if any) intersection the chunk has with subset. const SetType to_add = GetChunk(i).transactions & subset.transactions; if (to_add.Any()) { @@ -557,13 +557,13 @@ public: m_ancestor_set_feerates(depgraph.PositionRange()) { // Precompute ancestor-set feerates. - for (ClusterIndex i : m_depgraph.Positions()) { + for (DepGraphIndex i : m_depgraph.Positions()) { /** The remaining ancestors for transaction i. */ SetType anc_to_add = m_depgraph.Ancestors(i); FeeFrac anc_feerate; // Reuse accumulated feerate from first ancestor, if usable. Assume(anc_to_add.Any()); - ClusterIndex first = anc_to_add.First(); + DepGraphIndex first = anc_to_add.First(); if (first < i) { anc_feerate = m_ancestor_set_feerates[first]; Assume(!anc_feerate.IsEmpty()); @@ -603,7 +603,7 @@ public: } /** Count the number of remaining unlinearized transactions. */ - ClusterIndex NumRemaining() const noexcept + DepGraphIndex NumRemaining() const noexcept { return m_todo.Count(); } @@ -616,7 +616,7 @@ public: SetInfo FindCandidateSet() const noexcept { Assume(!AllDone()); - std::optional best; + std::optional best; for (auto i : m_todo) { if (best.has_value()) { Assume(!m_ancestor_set_feerates[i].IsEmpty()); @@ -644,9 +644,9 @@ class SearchCandidateFinder /** Internal RNG. */ InsecureRandomContext m_rng; /** m_sorted_to_original[i] is the original position that sorted transaction position i had. */ - std::vector m_sorted_to_original; + std::vector m_sorted_to_original; /** m_original_to_sorted[i] is the sorted position original transaction position i has. */ - std::vector m_original_to_sorted; + std::vector m_original_to_sorted; /** Internal dependency graph for the cluster (with transactions in decreasing individual * feerate order). */ DepGraph m_sorted_depgraph; @@ -684,7 +684,7 @@ public: { // Determine reordering mapping, by sorting by decreasing feerate. Unused positions are // not included, as they will never be looked up anyway. - ClusterIndex sorted_pos{0}; + DepGraphIndex sorted_pos{0}; for (auto i : depgraph.Positions()) { m_sorted_to_original[sorted_pos++] = i; } @@ -694,7 +694,7 @@ public: return feerate_cmp > 0; }); // Compute reverse mapping. - for (ClusterIndex i = 0; i < m_sorted_to_original.size(); ++i) { + for (DepGraphIndex i = 0; i < m_sorted_to_original.size(); ++i) { m_original_to_sorted[m_sorted_to_original[i]] = i; } // Compute reordered dependency graph. @@ -793,7 +793,7 @@ public: /** The set of transactions in m_todo which have feerate > best's. */ SetType imp = m_todo; while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -850,7 +850,7 @@ public: best = inc; // See if we can remove any entries from imp now. while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -891,7 +891,7 @@ public: // If pot is empty, then so is inc. Assume(elem.inc.feerate.IsEmpty() == elem.pot_feerate.IsEmpty()); - const ClusterIndex first = elem.und.First(); + const DepGraphIndex first = elem.und.First(); if (!elem.inc.feerate.IsEmpty()) { // If no undecided transactions remain with feerate higher than best, this entry // cannot be improved beyond best. @@ -917,17 +917,17 @@ public: // most. Let I(t) be the size of the undecided set after including t, and E(t) the size // of the undecided set after excluding t. Then choose the split transaction t such // that 2^I(t) + 2^E(t) is minimal, tie-breaking by highest individual feerate for t. - ClusterIndex split = 0; + DepGraphIndex split = 0; const auto select = elem.und & m_sorted_depgraph.Ancestors(first); Assume(select.Any()); - std::optional> split_counts; + std::optional> split_counts; for (auto t : select) { // Call max = max(I(t), E(t)) and min = min(I(t), E(t)). Let counts = {max,min}. // Sorting by the tuple counts is equivalent to sorting by 2^I(t) + 2^E(t). This // expression is equal to 2^max + 2^min = 2^max * (1 + 1/2^(max - min)). The second // factor (1 + 1/2^(max - min)) there is in (1,2]. Thus increasing max will always // increase it, even when min decreases. Because of this, we can first sort by max. - std::pair counts{ + std::pair counts{ (elem.und - m_sorted_depgraph.Ancestors(t)).Count(), (elem.und - m_sorted_depgraph.Descendants(t)).Count()}; if (counts.first < counts.second) std::swap(counts.first, counts.second); @@ -1027,13 +1027,13 @@ public: * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount(). */ template -std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span old_linearization = {}) noexcept +std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span old_linearization = {}) noexcept { Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount()); if (depgraph.TxCount() == 0) return {{}, true}; uint64_t iterations_left = max_iterations; - std::vector linearization; + std::vector linearization; AncestorCandidateFinder anc_finder(depgraph); std::optional> src_finder; @@ -1121,7 +1121,7 @@ std::pair, bool> Linearize(const DepGraph& de * postlinearize" process. */ template -void PostLinearize(const DepGraph& depgraph, Span linearization) +void PostLinearize(const DepGraph& depgraph, Span linearization) { // This algorithm performs a number of passes (currently 2); the even ones operate from back to // front, the odd ones from front to back. Each results in an equal-or-better linearization @@ -1159,9 +1159,9 @@ void PostLinearize(const DepGraph& depgraph, Span lineari // entries[0]. /** Index of the sentinel in the entries array below. */ - static constexpr ClusterIndex SENTINEL{0}; + static constexpr DepGraphIndex SENTINEL{0}; /** Indicator that a group has no previous transaction. */ - static constexpr ClusterIndex NO_PREV_TX{0}; + static constexpr DepGraphIndex NO_PREV_TX{0}; /** Data structure per transaction entry. */ @@ -1169,16 +1169,16 @@ void PostLinearize(const DepGraph& depgraph, Span lineari { /** The index of the previous transaction in this group; NO_PREV_TX if this is the first * entry of a group. */ - ClusterIndex prev_tx; + DepGraphIndex prev_tx; // The fields below are only used for transactions that are the last one in a group // (referred to as tail transactions below). /** Index of the first transaction in this group, possibly itself. */ - ClusterIndex first_tx; + DepGraphIndex first_tx; /** Index of the last transaction in the previous group. The first group (the sentinel) * points back to the last group here, making it a singly-linked circular list. */ - ClusterIndex prev_group; + DepGraphIndex prev_group; /** All transactions in the group. Empty for the sentinel. */ SetType group; /** All dependencies of the group (descendants in even passes; ancestors in odd ones). */ @@ -1221,12 +1221,12 @@ void PostLinearize(const DepGraph& depgraph, Span lineari Assume(entries[SENTINEL].feerate.IsEmpty()); // Iterate over all elements in the existing linearization. - for (ClusterIndex i = 0; i < linearization.size(); ++i) { + for (DepGraphIndex i = 0; i < linearization.size(); ++i) { // Even passes are from back to front; odd passes from front to back. - ClusterIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; + DepGraphIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; // Construct a new group containing just idx. In even passes, the meaning of // parent/child and high/low feerate are swapped. - ClusterIndex cur_group = idx + 1; + DepGraphIndex cur_group = idx + 1; entries[cur_group].group = SetType::Singleton(idx); entries[cur_group].deps = rev ? depgraph.Descendants(idx): depgraph.Ancestors(idx); entries[cur_group].feerate = depgraph.FeeRate(idx); @@ -1238,8 +1238,8 @@ void PostLinearize(const DepGraph& depgraph, Span lineari entries[SENTINEL].prev_group = cur_group; // Start merge/swap cycle. - ClusterIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. - ClusterIndex prev_group = entries[cur_group].prev_group; + DepGraphIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. + DepGraphIndex prev_group = entries[cur_group].prev_group; // Continue as long as the current group has higher feerate than the previous one. while (entries[cur_group].feerate >> entries[prev_group].feerate) { // prev_group/cur_group/next_group refer to (the last transactions of) 3 @@ -1267,7 +1267,7 @@ void PostLinearize(const DepGraph& depgraph, Span lineari entries[cur_group].prev_group = prev_group; } else { // There is no dependency between cur_group and prev_group; swap them. - ClusterIndex preprev_group = entries[prev_group].prev_group; + DepGraphIndex preprev_group = entries[prev_group].prev_group; // If PP, P, C, N were the old preprev, prev, cur, next groups, then the new // layout becomes [PP, C, P, N]. Update prev_groups to reflect that order. entries[next_group].prev_group = prev_group; @@ -1282,10 +1282,10 @@ void PostLinearize(const DepGraph& depgraph, Span lineari } // Convert the entries back to linearization (overwriting the existing one). - ClusterIndex cur_group = entries[0].prev_group; - ClusterIndex done = 0; + DepGraphIndex cur_group = entries[0].prev_group; + DepGraphIndex done = 0; while (cur_group != SENTINEL) { - ClusterIndex cur_tx = cur_group; + DepGraphIndex cur_tx = cur_group; // Traverse the transactions of cur_group (from back to front), and write them in the // same order during odd passes, and reversed (front to back) in even passes. if (rev) { @@ -1310,7 +1310,7 @@ void PostLinearize(const DepGraph& depgraph, Span lineari * Complexity: O(N^2) where N=depgraph.TxCount(); O(N) if both inputs are identical. */ template -std::vector MergeLinearizations(const DepGraph& depgraph, Span lin1, Span lin2) +std::vector MergeLinearizations(const DepGraph& depgraph, Span lin1, Span lin2) { Assume(lin1.size() == depgraph.TxCount()); Assume(lin2.size() == depgraph.TxCount()); @@ -1318,7 +1318,7 @@ std::vector MergeLinearizations(const DepGraph& depgraph, /** Chunkings of what remains of both input linearizations. */ LinearizationChunking chunking1(depgraph, lin1), chunking2(depgraph, lin2); /** Output linearization. */ - std::vector ret; + std::vector ret; if (depgraph.TxCount() == 0) return ret; ret.reserve(depgraph.TxCount()); @@ -1349,18 +1349,18 @@ std::vector MergeLinearizations(const DepGraph& depgraph, /** Make linearization topological, retaining its ordering where possible. */ template -void FixLinearization(const DepGraph& depgraph, Span linearization) noexcept +void FixLinearization(const DepGraph& depgraph, Span linearization) noexcept { // This algorithm can be summarized as moving every element in the linearization backwards // until it is placed after all its ancestors. SetType done; const auto len = linearization.size(); // Iterate over the elements of linearization from back to front (i is distance from back). - for (ClusterIndex i = 0; i < len; ++i) { + for (DepGraphIndex i = 0; i < len; ++i) { /** The element at that position. */ - ClusterIndex elem = linearization[len - 1 - i]; + DepGraphIndex elem = linearization[len - 1 - i]; /** j represents how far from the back of the linearization elem should be placed. */ - ClusterIndex j = i; + DepGraphIndex j = i; // Figure out which elements need to be moved before elem. SetType place_before = done & depgraph.Ancestors(elem); // Find which position to place elem in (updating j), continuously moving the elements diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp index 265ccdc805e..3413af4a219 100644 --- a/src/test/cluster_linearize_tests.cpp +++ b/src/test/cluster_linearize_tests.cpp @@ -28,11 +28,11 @@ void TestDepGraphSerialization(const std::vector>& c // Construct DepGraph from cluster argument. DepGraph depgraph; SetType holes; - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddTransaction(cluster[i].first); if (cluster[i] == HOLE) holes.Set(i); } - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddDependencies(cluster[i].second, i); } depgraph.RemoveTransactions(holes); diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index f5c0c897c98..c7e40a833da 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -149,9 +149,9 @@ public: * than AncestorCandidateFinder and SearchCandidateFinder. */ template -std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) +std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) { - std::vector linearization; + std::vector linearization; SimpleCandidateFinder finder(depgraph); SetType todo = depgraph.Positions(); bool optimal = true; @@ -203,9 +203,9 @@ SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& tod /** Given a dependency graph, construct any valid linearization for it, reading from a SpanReader. */ template -std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) +std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) { - std::vector linearization; + std::vector linearization; TestBitSet todo = depgraph.Positions(); // In every iteration one topologically-valid transaction is appended to linearization. while (todo.Any()) { @@ -253,18 +253,18 @@ FUZZ_TARGET(clusterlin_depgraph_sim) * sim[i]->first is its individual feerate, and sim[i]->second is its set of ancestors. */ std::array>, TestBitSet::Size()> sim; /** The number of non-nullopt position in sim. */ - ClusterIndex num_tx_sim{0}; + DepGraphIndex num_tx_sim{0}; /** Read a valid index of a transaction from the provider. */ auto idx_fn = [&]() { - auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); - for (ClusterIndex i = 0; i < sim.size(); ++i) { + auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (offset == 0) return i; --offset; } assert(false); - return ClusterIndex(-1); + return DepGraphIndex(-1); }; /** Read a valid subset of the transactions from the provider. */ @@ -273,7 +273,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) const auto mask = provider.ConsumeIntegralInRange(0, range); auto mask_shifted = mask; TestBitSet subset; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (mask_shifted & 1) { subset.Set(i); @@ -289,7 +289,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto range = (uint64_t{1} << sim.size()) - 1; const auto mask = provider.ConsumeIntegralInRange(0, range); TestBitSet set; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if ((mask >> i) & 1) { set.Set(i); } @@ -301,7 +301,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto anc_update_fn = [&]() { while (true) { bool updates{false}; - for (ClusterIndex chl = 0; chl < sim.size(); ++chl) { + for (DepGraphIndex chl = 0; chl < sim.size(); ++chl) { if (!sim[chl].has_value()) continue; for (auto par : sim[chl]->second) { if (!sim[chl]->second.IsSupersetOf(sim[par]->second)) { @@ -315,7 +315,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) }; /** Compare the state of transaction i in the simulation with the real one. */ - auto check_fn = [&](ClusterIndex i) { + auto check_fn = [&](DepGraphIndex i) { // Compare used positions. assert(real.Positions()[i] == sim[i].has_value()); if (sim[i].has_value()) { @@ -338,7 +338,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto idx = real.AddTransaction(feerate); // Verify that the returned index is correct. assert(!sim[idx].has_value()); - for (ClusterIndex i = 0; i < TestBitSet::Size(); ++i) { + for (DepGraphIndex i = 0; i < TestBitSet::Size(); ++i) { if (!sim[i].has_value()) { assert(idx == i); break; @@ -351,7 +351,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) } if ((command % 3) <= 1 && num_tx_sim > 0) { // AddDependencies. - ClusterIndex child = idx_fn(); + DepGraphIndex child = idx_fn(); auto parents = subset_fn(); // Apply to DepGraph. real.AddDependencies(parents, child); @@ -370,7 +370,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Apply to DepGraph. real.RemoveTransactions(del); // Apply to sim. - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (sim[i].has_value()) { if (del[i]) { --num_tx_sim; @@ -388,7 +388,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Compare the real obtained depgraph against the simulation. anc_update_fn(); - for (ClusterIndex i = 0; i < sim.size(); ++i) check_fn(i); + for (DepGraphIndex i = 0; i < sim.size(); ++i) check_fn(i); assert(real.TxCount() == num_tx_sim); // Sanity check the result (which includes round-tripping serialization, if applicable). SanityCheck(real); @@ -401,7 +401,7 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Construct a graph by deserializing. SpanReader reader(buffer); DepGraph depgraph; - ClusterIndex par_code{0}, chl_code{0}; + DepGraphIndex par_code{0}, chl_code{0}; try { reader >> Using(depgraph) >> VARINT(par_code) >> VARINT(chl_code); } catch (const std::ios_base::failure&) {} @@ -412,7 +412,7 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Introduce a cycle, and then test that IsAcyclic returns false. if (depgraph.TxCount() < 2) return; - ClusterIndex par(0), chl(0); + DepGraphIndex par(0), chl(0); // Pick any transaction of depgraph as parent. par_code %= depgraph.TxCount(); for (auto i : depgraph.Positions()) { @@ -498,7 +498,7 @@ FUZZ_TARGET(clusterlin_components) reader >> VARINT(subset_bits); } catch (const std::ios_base::failure&) {} TestBitSet subset; - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { if (todo[i]) { if (subset_bits & 1) subset.Set(i); subset_bits >>= 1; @@ -555,7 +555,7 @@ FUZZ_TARGET(clusterlin_chunking) for (const auto& chunk_feerate : chunking) { assert(todo.Any()); SetInfo accumulator, best; - for (ClusterIndex idx : linearization) { + for (DepGraphIndex idx : linearization) { if (todo[idx]) { accumulator.Set(depgraph, idx); if (best.feerate.IsEmpty() || accumulator.feerate >> best.feerate) { @@ -766,7 +766,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) assert(chunking.NumChunksLeft() > 0); // Construct linearization with just todo. - std::vector linearization_left; + std::vector linearization_left; for (auto i : linearization) { if (todo[i]) linearization_left.push_back(i); } @@ -776,13 +776,13 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // Verify that it matches the feerates of the chunks of chunking. assert(chunking.NumChunksLeft() == chunking_left.size()); - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { assert(chunking.GetChunk(i).feerate == chunking_left[i]); } // Check consistency of chunking. TestBitSet combined; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { const auto& chunk_info = chunking.GetChunk(i); // Chunks must be non-empty. assert(chunk_info.transactions.Any()); @@ -833,7 +833,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // - No non-empty intersection between the intersection and a prefix of the chunks of the // remainder of the linearization may be better than the intersection. TestBitSet prefix; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { prefix |= chunking.GetChunk(i).transactions; auto reintersect = SetInfo(depgraph, prefix & intersect.transactions); if (!reintersect.feerate.IsEmpty()) { @@ -875,7 +875,7 @@ FUZZ_TARGET(clusterlin_linearize) if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. - std::vector old_linearization; + std::vector old_linearization; { uint8_t have_old_linearization{0}; try { @@ -934,8 +934,8 @@ FUZZ_TARGET(clusterlin_linearize) // Only for very small clusters, test every topologically-valid permutation. if (depgraph.TxCount() <= 7) { - std::vector perm_linearization; - for (ClusterIndex i : depgraph.Positions()) perm_linearization.push_back(i); + std::vector perm_linearization; + for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i); // Iterate over all valid permutations. do { // Determine whether perm_linearization is topological. @@ -971,7 +971,7 @@ FUZZ_TARGET(clusterlin_postlinearize) } catch (const std::ios_base::failure&) {} // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph, reader); SanityCheck(depgraph, linearization); @@ -1019,7 +1019,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) // Now construct a new graph, copying the nodes, but leaving only the first parent (even // direction) or the first child (odd direction). DepGraph depgraph_tree; - for (ClusterIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { if (depgraph_gen.Positions()[i]) { depgraph_tree.AddTransaction(depgraph_gen.FeeRate(i)); } else { @@ -1031,14 +1031,14 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) depgraph_tree.RemoveTransactions(TestBitSet::Fill(depgraph_gen.PositionRange()) - depgraph_gen.Positions()); if (direction & 1) { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto children = depgraph_gen.GetReducedChildren(i); if (children.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(i), children.First()); } } } else { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto parents = depgraph_gen.GetReducedParents(i); if (parents.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(parents.First()), i); @@ -1047,7 +1047,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) } // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph_tree, reader); SanityCheck(depgraph_tree, linearization); @@ -1104,7 +1104,7 @@ FUZZ_TARGET(clusterlin_postlinearize_moved_leaf) // Construct a linearization identical to lin, but with the tail end of lin_leaf moved to the // back. - std::vector lin_moved; + std::vector lin_moved; for (auto i : lin) { if (i != lin_leaf.back()) lin_moved.push_back(i); } @@ -1160,7 +1160,7 @@ FUZZ_TARGET(clusterlin_fix_linearization) } catch (const std::ios_base::failure&) {} // Construct an arbitrary linearization (not necessarily topological for depgraph). - std::vector linearization; + std::vector linearization; /** Which transactions of depgraph are yet to be included in linearization. */ TestBitSet todo = depgraph.Positions(); while (todo.Any()) { @@ -1188,7 +1188,7 @@ FUZZ_TARGET(clusterlin_fix_linearization) size_t topo_prefix = 0; todo = depgraph.Positions(); while (topo_prefix < linearization.size()) { - ClusterIndex idx = linearization[topo_prefix]; + DepGraphIndex idx = linearization[topo_prefix]; todo.Reset(idx); if (todo.Overlaps(depgraph.Ancestors(idx))) break; ++topo_prefix; diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index cdde421637a..5992e819342 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -122,10 +122,10 @@ struct DepGraphFormatter static void Ser(Stream& s, const DepGraph& depgraph) { /** Construct a topological order to serialize the transactions in. */ - std::vector topo_order; + std::vector topo_order; topo_order.reserve(depgraph.TxCount()); for (auto i : depgraph.Positions()) topo_order.push_back(i); - std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) { + std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) { auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count(); if (anc_a != anc_b) return anc_a < anc_b; return a < b; @@ -136,9 +136,9 @@ struct DepGraphFormatter SetType done; // Loop over the transactions in topological order. - for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { + for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { /** Which depgraph index we are currently writing. */ - ClusterIndex idx = topo_order[topo_idx]; + DepGraphIndex idx = topo_order[topo_idx]; // Write size, which must be larger than 0. s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED); // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative). @@ -146,9 +146,9 @@ struct DepGraphFormatter // Write dependency information. SetType written_parents; uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over. - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which depgraph index we are currently considering as parent of idx. */ - ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; + DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; // Ignore transactions which are already known to be ancestors. if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue; if (depgraph.Ancestors(idx)[dep_idx]) { @@ -191,9 +191,9 @@ struct DepGraphFormatter DepGraph topo_depgraph; /** Mapping from serialization order to cluster order, used later to reconstruct the * cluster order. */ - std::vector reordering; + std::vector reordering; /** How big the entries vector in the reconstructed depgraph will be (including holes). */ - ClusterIndex total_size{0}; + DepGraphIndex total_size{0}; // Read transactions in topological order. while (true) { @@ -217,9 +217,9 @@ struct DepGraphFormatter // Read dependency information. auto topo_idx = reordering.size(); s >> VARINT(diff); - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which topo_depgraph index we are currently considering as parent of topo_idx. */ - ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist; + DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist; // Ignore transactions which are already known ancestors of topo_idx. if (new_ancestors[dep_topo_idx]) continue; if (diff == 0) { @@ -286,9 +286,9 @@ template void SanityCheck(const DepGraph& depgraph) { // Verify Positions and PositionRange consistency. - ClusterIndex num_positions{0}; - ClusterIndex position_range{0}; - for (ClusterIndex i : depgraph.Positions()) { + DepGraphIndex num_positions{0}; + DepGraphIndex position_range{0}; + for (DepGraphIndex i : depgraph.Positions()) { ++num_positions; position_range = i + 1; } @@ -297,7 +297,7 @@ void SanityCheck(const DepGraph& depgraph) assert(position_range >= num_positions); assert(position_range <= SetType::Size()); // Consistency check between ancestors internally. - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Transactions include themselves as ancestors. assert(depgraph.Ancestors(i)[i]); // If a is an ancestor of b, then b's ancestors must include all of a's ancestors. @@ -306,8 +306,8 @@ void SanityCheck(const DepGraph& depgraph) } } // Consistency check between ancestors and descendants. - for (ClusterIndex i : depgraph.Positions()) { - for (ClusterIndex j : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { + for (DepGraphIndex j : depgraph.Positions()) { assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]); } // No transaction is a parent or child of itself. @@ -348,7 +348,7 @@ void SanityCheck(const DepGraph& depgraph) // In acyclic graphs, the union of parents with parents of parents etc. yields the // full ancestor set (and similar for children and descendants). std::vector parents(depgraph.PositionRange()), children(depgraph.PositionRange()); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { parents[i] = depgraph.GetReducedParents(i); children[i] = depgraph.GetReducedChildren(i); } @@ -380,7 +380,7 @@ void SanityCheck(const DepGraph& depgraph) /** Perform a sanity check on a linearization. */ template -void SanityCheck(const DepGraph& depgraph, Span linearization) +void SanityCheck(const DepGraph& depgraph, Span linearization) { // Check completeness. assert(linearization.size() == depgraph.TxCount()); From 8d1bbafa84bbca0e412f939823fa1a30ef839951 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 30 Jan 2025 17:14:52 -0500 Subject: [PATCH 04/30] feefrac: introduce tagged wrappers to distinguish vsize/WU rates --- src/util/feefrac.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/util/feefrac.h b/src/util/feefrac.h index 161322b50a4..899bea90031 100644 --- a/src/util/feefrac.h +++ b/src/util/feefrac.h @@ -156,4 +156,26 @@ struct FeeFrac */ std::partial_ordering CompareChunks(Span chunks0, Span chunks1); +/** Tagged wrapper around FeeFrac to avoid unit confusion. */ +template +struct FeePerUnit : public FeeFrac +{ + // Inherit FeeFrac constructors. + using FeeFrac::FeeFrac; + + /** Convert a FeeFrac to a FeePerUnit. */ + static FeePerUnit FromFeeFrac(const FeeFrac& feefrac) noexcept + { + return {feefrac.fee, feefrac.size}; + } +}; + +// FeePerUnit instance for satoshi / vbyte. +struct VSizeTag {}; +using FeePerVSize = FeePerUnit; + +// FeePerUnit instance for satoshi / WU. +struct WeightTag {}; +using FeePerWeight = FeePerUnit; + #endif // BITCOIN_UTIL_FEEFRAC_H From e22a0b21f83691527c4a391681142f51a2be14ba Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 21 Aug 2024 14:37:00 -0400 Subject: [PATCH 05/30] txgraph: (feature) add initial version This adds an initial version of the txgraph module, with the TxGraph class. It encapsulates knowledge about the fees, sizes, and dependencies between all mempool transactions, but nothing else. In particular, it lacks knowledge about txids, inputs, outputs, CTransactions, ... and so on. Instead, it exposes a generic TxGraph::Ref type to reference nodes in the TxGraph, which can be passed around and stored by layers on top. --- src/CMakeLists.txt | 1 + src/txgraph.cpp | 1177 ++++++++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 124 +++++ 3 files changed, 1302 insertions(+) create mode 100644 src/txgraph.cpp create mode 100644 src/txgraph.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8862196dcf3..ba865b0bf0e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -279,6 +279,7 @@ add_library(bitcoin_node STATIC EXCLUDE_FROM_ALL signet.cpp torcontrol.cpp txdb.cpp + txgraph.cpp txmempool.cpp txorphanage.cpp txrequest.cpp diff --git a/src/txgraph.cpp b/src/txgraph.cpp new file mode 100644 index 00000000000..b56b536cc24 --- /dev/null +++ b/src/txgraph.cpp @@ -0,0 +1,1177 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +using namespace cluster_linearize; + +// Forward declare the TxGraph implementation class. +class TxGraphImpl; + +/** Position of a DepGraphIndex within a Cluster::m_linearization. */ +using LinearizationIndex = uint32_t; +/** Position of a Cluster within Graph::m_clusters. */ +using ClusterSetIndex = uint32_t; + +/** Quality levels for cached cluster linearizations. */ +enum class QualityLevel +{ + /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ + NEEDS_SPLIT, + /** This cluster has undergone changes that warrant re-linearization. */ + NEEDS_RELINEARIZE, + /** The minimal level of linearization has been performed, but it is not known to be optimal. */ + ACCEPTABLE, + /** The linearization is known to be optimal. */ + OPTIMAL, + /** This cluster is not registered in any m_clusters. + * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + NONE, +}; + +/** A grouping of connected transactions inside a TxGraphImpl. */ +class Cluster +{ + friend class TxGraphImpl; + using GraphIndex = TxGraph::GraphIndex; + using SetType = BitSet; + /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ + DepGraph m_depgraph; + /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for + * positions i that do not exist in m_depgraph shouldn't ever be accessed and thus don't + * matter. m_mapping.size() equals m_depgraph.PositionRange(). */ + std::vector m_mapping; + /** The current linearization of the cluster. m_linearization.size() equals + * m_depgraph.TxCount(). This is always kept topological. */ + std::vector m_linearization; + /** The quality level of m_linearization. */ + QualityLevel m_quality{QualityLevel::NONE}; + /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + +public: + /** Construct an empty Cluster. */ + Cluster() noexcept = default; + /** Construct a singleton Cluster. */ + explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; + + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + Cluster(const Cluster&) = delete; + Cluster& operator=(const Cluster&) = delete; + Cluster(Cluster&&) = delete; + Cluster& operator=(Cluster&&) = delete; + + // Generic helper functions. + + /** Whether the linearization of this Cluster can be exposed. */ + bool IsAcceptable() const noexcept + { + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; + } + /** Whether the linearization of this Cluster is optimal. */ + bool IsOptimal() const noexcept + { + return m_quality == QualityLevel::OPTIMAL; + } + /** Whether this cluster requires splitting. */ + bool NeedsSplitting() const noexcept + { + return m_quality == QualityLevel::NEEDS_SPLIT; + } + /** Get the number of transactions in this Cluster. */ + LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Only called by Graph::SwapIndexes. */ + void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } + /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ + void Updated(TxGraphImpl& graph) noexcept; + + // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. + + /** Apply all removals from the front of to_remove that apply to this Cluster, popping them + * off. These must be at least one such entry. */ + void ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept; + /** Split this cluster (must have a NEEDS_SPLIT* quality). Returns whether to delete this + * Cluster afterwards. */ + [[nodiscard]] bool Split(TxGraphImpl& graph) noexcept; + /** Move all transactions from cluster to *this (as separate components). */ + void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept; + /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */ + void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; + /** Improve the linearization of this Cluster. */ + void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + + // Functions that implement the Cluster-specific side of public TxGraph functions. + + /** Get a vector of Refs for the ancestors of a given Cluster element. */ + std::vector GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Get a vector of Refs for the descendants of a given Cluster element. */ + std::vector GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ + std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; + /** Get the individual transaction feerate of a Cluster element. */ + FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; + /** Modify the fee of a Cluster element. */ + void SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept; +}; + +/** The transaction graph. + * + * The overall design of the data structure consists of 3 interlinked representations: + * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) + * + * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects + * refer back to the Clusters and Refs the corresponding transaction is contained in. + * + * While redundant, this permits moving all of them independently, without invalidating things + * or costly iteration to fix up everything: + * - Entry objects can be moved to fill holes left by removed transactions in the Entry vector + * (see TxGraphImpl::Compact). + * - Clusters can be rewritten continuously (removals can cause them to split, new dependencies + * can cause them to be merged). + * - Ref objects can be held outside the class, while permitting them to be moved around, and + * inherited from. + */ +class TxGraphImpl final : public TxGraph +{ + friend class Cluster; +private: + /** Internal RNG. */ + FastRandomContext m_rng; + + /** Information about one group of Clusters to be merged. */ + struct GroupEntry + { + /** Which clusters are to be merged. */ + std::vector m_clusters; + /** Which dependencies are to be applied to those merged clusters, as (parent, child) + * pairs. */ + std::vector> m_deps; + }; + + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::vector> m_clusters[int(QualityLevel::NONE)]; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Information about the merges to be performed, if known. */ + std::optional> m_group_data = std::vector{}; + /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). + * */ + GraphIndex m_txcount{0}; + + /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + struct Locator + { + /** Which Cluster the Entry appears in (nullptr = missing). */ + Cluster* cluster{nullptr}; + /** Where in the Cluster it appears (only if cluster != nullptr). */ + DepGraphIndex index{0}; + + /** Mark this Locator as missing. */ + void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as present, in the specified Cluster. */ + void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is present (in some Cluster). */ + bool IsPresent() const noexcept { return cluster != nullptr; } + }; + + /** A class of objects held internally in TxGraphImpl, with information about a single + * transaction. */ + struct Entry + { + /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ + Ref* m_ref{nullptr}; + /** Which Cluster and position therein this Entry appears in. */ + Locator m_locator; + /** The chunk feerate of this transaction (if not missing). */ + FeePerWeight m_chunk_feerate; + }; + + /** The set of all transactions. GraphIndex values index into this. */ + std::vector m_entries; + + /** Set of Entries which have no linked Ref anymore. */ + std::vector m_unlinked; + +public: + /** Construct a new TxGraphImpl. */ + explicit TxGraphImpl() noexcept {} + + // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). + TxGraphImpl(const TxGraphImpl&) = delete; + TxGraphImpl& operator=(const TxGraphImpl&) = delete; + TxGraphImpl(TxGraphImpl&&) = delete; + TxGraphImpl& operator=(TxGraphImpl&&) = delete; + + // Simple helper functions. + + /** Swap the Entrys referred to by a and b. */ + void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; + /** Extract a Cluster. */ + std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** Delete a Cluster. */ + void DeleteCluster(Cluster& cluster) noexcept; + /** Insert a Cluster. */ + ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ + void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + + // Functions for handling Refs. + + /** Only called by Ref's move constructor/assignment to update Ref locations. */ + void UpdateRef(GraphIndex idx, Ref& new_location) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = &new_location; + } + + /** Only called by Ref::~Ref to unlink Refs, and Ref's move assignment. */ + void UnlinkRef(GraphIndex idx) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = nullptr; + m_unlinked.push_back(idx); + Compact(); + } + + // Functions related to various normalization/application steps. + /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex + * values for remaining Entrys, so this only does something when no to-be-applied operations + * referring to GraphIndexes remain). */ + void Compact() noexcept; + /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a + * NEEDS_SPLIT* QualityLevel). */ + void ApplyRemovals() noexcept; + /** Split an individual cluster. */ + void Split(Cluster& cluster) noexcept; + /** Split all clusters that need splitting. */ + void SplitAll() noexcept; + /** Populate m_group_data based on m_deps_to_add. */ + void GroupClusters() noexcept; + /** Merge the specified clusters. */ + void Merge(std::span to_merge) noexcept; + /** Apply all m_deps_to_add to the relevant Clusters. */ + void ApplyDependencies() noexcept; + /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ + void MakeAcceptable(Cluster& cluster) noexcept; + + // Implementations for the public TxGraph interface. + + Ref AddTransaction(const FeePerWeight& feerate) noexcept final; + void RemoveTransaction(const Ref& arg) noexcept final; + void AddDependency(const Ref& parent, const Ref& child) noexcept final; + void SetTransactionFee(const Ref&, int64_t fee) noexcept final; + + bool Exists(const Ref& arg) noexcept final; + FeePerWeight GetChunkFeerate(const Ref& arg) noexcept final; + FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; + std::vector GetCluster(const Ref& arg) noexcept final; + std::vector GetAncestors(const Ref& arg) noexcept final; + std::vector GetDescendants(const Ref& arg) noexcept final; + GraphIndex GetTransactionCount() noexcept final; +}; + +void Cluster::Updated(TxGraphImpl& graph) noexcept +{ + // Update all the Locators for this Cluster's Entrys. + for (DepGraphIndex idx : m_linearization) { + auto& entry = graph.m_entries[m_mapping[idx]]; + entry.m_locator.SetPresent(this, idx); + } + + // Compute its chunking and store its information in the Entry's m_chunk_feerate. + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + Assume(chunk.transactions.Any()); + // Iterate over the transactions in the linearization, which must match those in chunk. + do { + DepGraphIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + Assume(chunk.transactions[idx]); + chunk.transactions.Reset(idx); + } while(chunk.transactions.Any()); + } +} + +void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept +{ + // Iterate over the prefix of to_remove that applies to this cluster. + Assume(!to_remove.empty()); + SetType todo; + do { + GraphIndex idx = to_remove.front(); + Assume(idx < graph.m_entries.size()); + auto& entry = graph.m_entries[idx]; + auto& locator = entry.m_locator; + // Stop once we hit an entry that applies to another Cluster. + if (locator.cluster != this) break; + // - Remember it in a set of to-remove DepGraphIndexes. + todo.Set(locator.index); + // - Remove from m_mapping. This isn't strictly necessary as unused positions in m_mapping + // are just never accessed, but set it to -1 here to increase the ability to detect a bug + // that causes it to be accessed regardless. + m_mapping[locator.index] = GraphIndex(-1); + // - Mark it as removed in the Entry's locator. + locator.SetMissing(); + to_remove = to_remove.subspan(1); + --graph.m_txcount; + } while(!to_remove.empty()); + + Assume(todo.Any()); + // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries + // removed, so we benefit from batching all the removals). + m_depgraph.RemoveTransactions(todo); + m_mapping.resize(m_depgraph.PositionRange()); + + // Filter removals out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + Updated(graph); +} + +bool Cluster::Split(TxGraphImpl& graph) noexcept +{ + // This function can only be called when the Cluster needs splitting. + Assume(NeedsSplitting()); + /** Which positions are still left in this Cluster. */ + auto todo = m_depgraph.Positions(); + /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and + * its position therein. */ + std::vector> remap(m_depgraph.PositionRange()); + std::vector new_clusters; + bool first{true}; + // Iterate over the connected components of this Cluster's m_depgraph. + while (todo.Any()) { + auto component = m_depgraph.FindConnectedComponent(todo); + if (first && component == todo) { + // The existing Cluster is an entire component. Leave it be, but update its quality. + Assume(todo == m_depgraph.Positions()); + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + // We need to recompute and cache its chunking. + Updated(graph); + return false; + } + first = false; + // Construct a new Cluster to hold the found component. + auto new_cluster = std::make_unique(); + new_clusters.push_back(new_cluster.get()); + // Remember that all the component's transactions go to this new Cluster. The positions + // will be determined below, so use -1 for now. + for (auto i : component) { + remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; + } + graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + todo -= component; + } + // Redistribute the transactions. + for (auto i : m_linearization) { + /** The cluster which transaction originally in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy the transaction to the new cluster's depgraph, and remember the position. + remap[i].second = new_cluster->m_depgraph.AddTransaction(m_depgraph.FeeRate(i)); + // Create new mapping entry. + new_cluster->m_mapping.push_back(m_mapping[i]); + // Create a new linearization entry. As we're only appending transactions, they equal the + // DepGraphIndex. + new_cluster->m_linearization.push_back(remap[i].second); + } + // Redistribute the dependencies. + for (auto i : m_linearization) { + /** The cluster transaction in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy its parents, translating positions. + SetType new_parents; + for (auto par : m_depgraph.GetReducedParents(i)) new_parents.Set(remap[par].second); + new_cluster->m_depgraph.AddDependencies(new_parents, remap[i].second); + } + // Update all the Locators of moved transactions. + for (Cluster* new_cluster : new_clusters) { + new_cluster->Updated(graph); + } + // Wipe this Cluster, and return that it needs to be deleted. + m_depgraph = DepGraph{}; + m_mapping.clear(); + m_linearization.clear(); + return true; +} + +void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept +{ + /** Vector to store the positions in this Cluster for each position in other. */ + std::vector remap(other.m_depgraph.PositionRange()); + // Iterate over all transactions in the other Cluster (the one being absorbed). + for (auto pos : other.m_linearization) { + auto idx = other.m_mapping[pos]; + // Copy the transaction into this Cluster, and remember its position. + auto new_pos = m_depgraph.AddTransaction(other.m_depgraph.FeeRate(pos)); + remap[pos] = new_pos; + if (new_pos == m_mapping.size()) { + m_mapping.push_back(idx); + } else { + m_mapping[new_pos] = idx; + } + m_linearization.push_back(new_pos); + // Copy the transaction's dependencies, translating them using remap. Note that since + // pos iterates over other.m_linearization, which is in topological order, all parents + // of pos should already be in remap. + SetType parents; + for (auto par : other.m_depgraph.GetReducedParents(pos)) { + parents.Set(remap[par]); + } + m_depgraph.AddDependencies(parents, remap[pos]); + // Update the transaction's Locator. There is no need to call Updated() to update chunk + // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting + // merged Cluster later anyway). + graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + } + // Purge the other Cluster, now that everything has been moved. + other.m_depgraph = DepGraph{}; + other.m_linearization.clear(); + other.m_mapping.clear(); +} + +void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept +{ + // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters + // between which dependencies are added, which simply concatenates their linearizations. Invoke + // PostLinearize, which has the effect that the linearization becomes a merge-sort of the + // constituent linearizations. Do this here rather than in Cluster::Merge, because this + // function is only invoked once per merged Cluster, rather than once per constituent one. + // This concatenation + post-linearization could be replaced with an explicit merge-sort. + PostLinearize(m_depgraph, m_linearization); + + // Sort the list of dependencies to apply by child, so those can be applied in batch. + std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; }); + // Iterate over groups of to-be-added dependencies with the same child. + auto it = to_apply.begin(); + while (it != to_apply.end()) { + auto& first_child = graph.m_entries[it->second].m_locator; + const auto child_idx = first_child.index; + // Iterate over all to-be-added dependencies within that same child, gather the relevant + // parents. + SetType parents; + while (it != to_apply.end()) { + auto& child = graph.m_entries[it->second].m_locator; + auto& parent = graph.m_entries[it->first].m_locator; + Assume(child.cluster == this && parent.cluster == this); + if (child.index != child_idx) break; + parents.Set(parent.index); + ++it; + } + // Push all dependencies to the underlying DepGraph. Note that this is O(N) in the size of + // the cluster, regardless of the number of parents being added, so batching them together + // has a performance benefit. + m_depgraph.AddDependencies(parents, child_idx); + } + + // Finally fix the linearization, as the new dependencies may have invalidated the + // linearization, and post-linearize it to fix up the worst problems with it. + FixLinearization(m_depgraph, m_linearization); + PostLinearize(m_depgraph, m_linearization); + + // Finally push the changes to graph.m_entries. + Updated(graph); +} + +std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +{ + Assume(quality != QualityLevel::NONE); + + auto& quality_clusters = m_clusters[int(quality)]; + Assume(setindex < quality_clusters.size()); + + // Extract the Cluster-owning unique_ptr. + std::unique_ptr ret = std::move(quality_clusters[setindex]); + ret->m_quality = QualityLevel::NONE; + ret->m_setindex = ClusterSetIndex(-1); + + // Clean up space in quality_cluster. + auto max_setindex = quality_clusters.size() - 1; + if (setindex != max_setindex) { + // If the cluster was not the last element of quality_clusters, move that to take its place. + quality_clusters.back()->m_setindex = setindex; + quality_clusters[setindex] = std::move(quality_clusters.back()); + } + // The last element of quality_clusters is now unused; drop it. + quality_clusters.pop_back(); + + return ret; +} + +ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +{ + // Cannot insert with quality level NONE (as that would mean not inserted). + Assume(quality != QualityLevel::NONE); + // The passed-in Cluster must not currently be in the TxGraphImpl. + Assume(cluster->m_quality == QualityLevel::NONE); + + // Append it at the end of the relevant TxGraphImpl::m_cluster. + auto& quality_clusters = m_clusters[int(quality)]; + ClusterSetIndex ret = quality_clusters.size(); + cluster->m_quality = quality; + cluster->m_setindex = ret; + quality_clusters.push_back(std::move(cluster)); + return ret; +} + +void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +{ + Assume(new_quality != QualityLevel::NONE); + + // Don't do anything if the quality did not change. + if (old_quality == new_quality) return; + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(old_quality, old_index); + // And re-insert it where it belongs. + InsertCluster(std::move(cluster_ptr), new_quality); +} + +void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept +{ + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + // And throw it away. + cluster_ptr.reset(); +} + +void TxGraphImpl::ApplyRemovals() noexcept +{ + auto& to_remove = m_to_remove; + // Skip if there is nothing to remove. + if (to_remove.empty()) return; + // Group the set of to-be-removed entries by Cluster*. + std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + }); + // Process per Cluster. + std::span to_remove_span{m_to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. This may happen when RemoveTransaction + // was called twice on the same Ref. + to_remove_span = to_remove_span.subspan(1); + } + } + m_to_remove.clear(); + Compact(); +} + +void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept +{ + Assume(a < m_entries.size()); + Assume(b < m_entries.size()); + // Swap the Entry objects. + std::swap(m_entries[a], m_entries[b]); + // Iterate over both objects. + for (int i = 0; i < 2; ++i) { + GraphIndex idx = i ? b : a; + Entry& entry = m_entries[idx]; + // Update linked Ref. + if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update the locator. The rest of the Entry information will not change, so no need to + // invoke Cluster::Updated(). + Locator& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } + } +} + +void TxGraphImpl::Compact() noexcept +{ + // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. + // It is easier to delay the compaction until they have been applied. + if (!m_deps_to_add.empty()) return; + if (!m_to_remove.empty()) return; + + // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last + // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of + // later-processed ones during the "swap with end of m_entries" step below (which might + // invalidate them). + std::sort(m_unlinked.begin(), m_unlinked.end(), std::greater{}); + + auto last = GraphIndex(-1); + for (GraphIndex idx : m_unlinked) { + // m_unlinked should never contain the same GraphIndex twice (the code below would fail + // if so, because GraphIndexes get invalidated by removing them). + Assume(idx != last); + last = idx; + + // Make sure the entry is unlinked. + Entry& entry = m_entries[idx]; + Assume(entry.m_ref == nullptr); + // Make sure the entry does not occur in the graph. + Assume(!entry.m_locator.IsPresent()); + + // Move the entry to the end. + if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); + // Drop the entry for idx, now that it is at the end. + m_entries.pop_back(); + } + m_unlinked.clear(); +} + +void TxGraphImpl::Split(Cluster& cluster) noexcept +{ + // To split a Cluster, first make sure all removals are applied (as we might need to split + // again afterwards otherwise). + ApplyRemovals(); + bool del = cluster.Split(*this); + if (del) { + // Cluster::Split reports whether the Cluster is to be deleted. + DeleteCluster(cluster); + } +} + +void TxGraphImpl::SplitAll() noexcept +{ + // Before splitting all Cluster, first make sure all removals are applied. + ApplyRemovals(); + auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } +} + +void TxGraphImpl::GroupClusters() noexcept +{ + // If the groupings have been computed already, nothing is left to be done. + if (m_group_data.has_value()) return; + + // Before computing which Clusters need to be merged together, first apply all removals and + // split the Clusters into connected components. If we would group first, we might end up + // with inefficient Clusters which just end up being split again anyway. + SplitAll(); + + /** Annotated clusters: an entry for each Cluster, together with the representative for the + * partition it is in if known, or with nullptr if not yet known. */ + std::vector> an_clusters; + /** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply + * to removed transactions), together with the representative root of the partition of + * Clusters it applies to. */ + std::vector, Cluster*>> an_deps; + + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Skip dependencies for which the parent or child transaction is removed. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + an_clusters.emplace_back(par_cluster, nullptr); + // Do not include a duplicate when parent and child are identical, as it'll be removed + // below anyway. + if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr); + } + // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters + // to which dependencies apply. + std::sort(an_clusters.begin(), an_clusters.end()); + an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + + // Run the union-find algorithm to to find partitions of the input Clusters which need to be + // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + { + /** Each PartitionData entry contains information about a single input Cluster. */ + struct PartitionData + { + /** The cluster this holds information for. */ + Cluster* cluster; + /** All PartitionData entries belonging to the same partition are organized in a tree. + * Each element points to its parent, or to itself if it is the root. The root is then + * a representative for the entire tree, and can be found by walking upwards from any + * element. */ + PartitionData* parent; + /** (only if this is a root, so when parent == this) An upper bound on the height of + * tree for this partition. */ + unsigned rank; + }; + /** Information about each input Cluster. Sorted by Cluster* pointer. */ + std::vector partition_data; + + /** Given a Cluster, find its corresponding PartitionData. */ + auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* { + auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, + [](auto& a, Cluster* ptr) noexcept { return a.cluster < ptr; }); + Assume(it != partition_data.end()); + Assume(it->cluster == arg); + return &*it; + }; + + /** Given a PartitionData, find the root of the tree it is in (its representative). */ + static constexpr auto find_root_fn = [](PartitionData* data) noexcept -> PartitionData* { + while (data->parent != data) { + // Replace pointers to parents with pointers to grandparents. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = data->parent; + data->parent = par->parent; + data = par; + } + return data; + }; + + /** Given two PartitionDatas, union the partitions they are in. */ + static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept { + // Find the roots of the trees, and bail out if they are already equal (which would + // mean they are in the same partition already). + auto rep1 = find_root_fn(arg1); + auto rep2 = find_root_fn(arg2); + if (rep1 == rep2) return; + // Pick the lower-rank root to become a child of the higher-rank one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. + if (rep1->rank < rep2->rank) std::swap(rep1, rep2); + rep2->parent = rep1; + rep1->rank += (rep1->rank == rep2->rank); + }; + + // Start by initializing every Cluster as its own singleton partition. + partition_data.resize(an_clusters.size()); + for (size_t i = 0; i < an_clusters.size(); ++i) { + partition_data[i].cluster = an_clusters[i].first; + partition_data[i].parent = &partition_data[i]; + partition_data[i].rank = 0; + } + + // Run through all parent/child pairs in m_deps_to_add, and union the + // the partitions their Clusters are in. + for (const auto& [par, chl] : m_deps_to_add) { + auto par_cluster = m_entries[par].m_locator.cluster; + auto chl_cluster = m_entries[chl].m_locator.cluster; + // Nothing to do if parent and child are in the same Cluster. + if (par_cluster == chl_cluster) continue; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + Assume(par != chl); + union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + } + + // Populate the an_clusters and an_deps data structures with the list of input Clusters, + // and the input dependencies, annotated with the representative of the Cluster partition + // it applies to. + for (size_t i = 0; i < partition_data.size(); ++i) { + auto& data = partition_data[i]; + // Find the representative of the partition Cluster i is in, and store it with the + // Cluster. + auto rep = find_root_fn(&data)->cluster; + Assume(an_clusters[i].second == nullptr); + an_clusters[i].second = rep; + } + an_deps.reserve(m_deps_to_add.size()); + for (auto [par, chl] : m_deps_to_add) { + auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = m_entries[par].m_locator.cluster; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + // Find the representative of the partition which this dependency's child is in (which + // should be the same as the one for the parent). + auto rep = find_root_fn(locate_fn(chl_cluster))->cluster; + // Create an_deps entry. + an_deps.emplace_back(std::pair{par, chl}, rep); + } + } + + // Sort both an_clusters and an_deps by representative of the partition they are in, grouping + // all those applying to the same partition together. + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + + // Translate the resulting cluster groups to the m_group_data structure. + m_group_data = std::vector{}; + auto an_deps_it = an_deps.begin(); + auto an_clusters_it = an_clusters.begin(); + while (an_clusters_it != an_clusters.end()) { + // Process all clusters/dependencies belonging to the partition with representative rep. + auto rep = an_clusters_it->second; + // Create and initialize a new GroupData entry for the partition. + auto& new_entry = m_group_data->emplace_back(); + // Add all its clusters to it (copying those from an_clusters to m_clusters). + while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { + new_entry.m_clusters.push_back(an_clusters_it->first); + ++an_clusters_it; + } + // Add all its dependencies to it (copying those back from an_deps to m_deps). + while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { + new_entry.m_deps.push_back(an_deps_it->first); + ++an_deps_it; + } + } + Assume(an_deps_it == an_deps.end()); + Assume(an_clusters_it == an_clusters.end()); + Compact(); +} + +void TxGraphImpl::Merge(std::span to_merge) noexcept +{ + Assume(!to_merge.empty()); + // Nothing to do if a group consists of just a single Cluster. + if (to_merge.size() == 1) return; + + // Move the largest Cluster to the front of to_merge. As all transactions in other to-be-merged + // Clusters will be moved to that one, putting the largest one first minimizes the number of + // moves. + size_t max_size_pos{0}; + DepGraphIndex max_size = to_merge[max_size_pos]->GetTxCount(); + for (size_t i = 1; i < to_merge.size(); ++i) { + DepGraphIndex size = to_merge[i]->GetTxCount(); + if (size > max_size) { + max_size_pos = i; + max_size = size; + } + } + if (max_size_pos != 0) std::swap(to_merge[0], to_merge[max_size_pos]); + + // Merge all further Clusters in the group into the first one, and delete them. + for (size_t i = 1; i < to_merge.size(); ++i) { + to_merge[0]->Merge(*this, *to_merge[i]); + DeleteCluster(*to_merge[i]); + } +} + +void TxGraphImpl::ApplyDependencies() noexcept +{ + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + GroupClusters(); + Assume(m_group_data.has_value()); + // Nothing to do if there are no dependencies to be added. + if (m_deps_to_add.empty()) return; + + // For each group of to-be-merged Clusters. + for (auto& group_data : *m_group_data) { + // Invoke Merge() to merge them into a single Cluster. + Merge(group_data.m_clusters); + // Actually apply all to-be-added dependencies (all parents and children from this grouping + // belong to the same Cluster at this point because of the merging above). + const auto& loc = m_entries[group_data.m_deps[0].second].m_locator; + Assume(loc.IsPresent()); + loc.cluster->ApplyDependencies(*this, group_data.m_deps); + } + + // Wipe the list of to-be-added dependencies now that they are applied. + m_deps_to_add.clear(); + Compact(); + // Also no further Cluster mergings are needed (note that we clear, but don't set to + // std::nullopt, as that would imply the groupings are unknown). + m_group_data = std::vector{}; +} + +void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept +{ + // We can only relinearize Clusters that do not need splitting. + Assume(!NeedsSplitting()); + // No work is required for Clusters which are already optimally linearized. + if (IsOptimal()) return; + // Invoke the actual linearization algorithm (passing in the existing one). + uint64_t rng_seed = graph.m_rng.rand64(); + auto [linearization, optimal] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization); + // Postlinearize if the result isn't optimal already. This guarantees (among other things) + // that the chunks of the resulting linearization are all connected. + if (!optimal) PostLinearize(m_depgraph, linearization); + // Update the linearization. + m_linearization = std::move(linearization); + // Update the Cluster's quality. + auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; + graph.SetClusterQuality(m_quality, m_setindex, new_quality); + // Update the Entry objects. + Updated(graph); +} + +void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept +{ + // Relinearize the Cluster if needed. + if (!cluster.NeedsSplitting() && !cluster.IsAcceptable()) { + cluster.Relinearize(*this, 10000); + } +} + +Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept +{ + // Create a new transaction in the DepGraph, and remember its position in m_mapping. + auto cluster_idx = m_depgraph.AddTransaction(feerate); + m_mapping.push_back(graph_index); + m_linearization.push_back(cluster_idx); +} + +TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept +{ + // Construct a new Ref. + Ref ret; + // Construct a new Entry, and link it with the Ref. + auto idx = m_entries.size(); + m_entries.emplace_back(); + auto& entry = m_entries.back(); + entry.m_ref = &ret; + GetRefGraph(ret) = this; + GetRefIndex(ret) = idx; + // Construct a new singleton Cluster (which is necessarily optimally linearized). + auto cluster = std::make_unique(*this, feerate, idx); + auto cluster_ptr = cluster.get(); + InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + cluster_ptr->Updated(*this); + ++m_txcount; + // Return the Ref. + return ret; +} + +void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept +{ + // Don't do anything if the Ref is empty (which may be indicative of the transaction already + // having been removed). + if (GetRefGraph(arg) == nullptr) return; + Assume(GetRefGraph(arg) == this); + // Find the Cluster the transaction is in, and stop if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return; + // Remember that the transaction is to be removed. + m_to_remove.push_back(GetRefIndex(arg)); + // Wipe m_group_data (as it will need to be recomputed). + m_group_data.reset(); +} + +void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept +{ + // Don't do anything if either Ref is empty (which may be indicative of it having already been + // removed). + if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; + Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + // Don't do anything if this is a dependency on self. + if (GetRefIndex(parent) == GetRefIndex(child)) return; + // Find the Cluster the parent and child transaction are in, and stop if either appears to be + // already removed. + auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + if (par_cluster == nullptr) return; + auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + if (chl_cluster == nullptr) return; + // Remember that this dependency is to be applied. + m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + // Wipe m_group_data (as it will need to be recomputed). + m_group_data.reset(); +} + +bool TxGraphImpl::Exists(const Ref& arg) noexcept +{ + if (GetRefGraph(arg) == nullptr) return false; + Assume(GetRefGraph(arg) == this); + // Make sure the transaction isn't scheduled for removal. + ApplyRemovals(); + return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); +} + +std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +{ + std::vector ret; + ret.reserve(m_depgraph.Ancestors(idx).Count()); + // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Ancestors(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +{ + std::vector ret; + ret.reserve(m_depgraph.Descendants(idx).Count()); + // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : m_depgraph.Descendants(idx)) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept +{ + std::vector ret; + ret.reserve(m_linearization.size()); + // Translate all transactions in the Cluster (in linearization order) to Refs. + for (auto idx : m_linearization) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + ret.push_back(entry.m_ref); + } + return ret; +} + +FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept +{ + return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx)); +} + +std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + ApplyDependencies(); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then dispatch to it. + MakeAcceptable(*cluster); + return cluster->GetClusterRefs(*this); +} + +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +{ + ApplyRemovals(); + return m_txcount; +} + +FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + ApplyRemovals(); + // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); +} + +FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be inaccurate otherwise. + ApplyDependencies(); + // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. + auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then return the transaction's + // chunk feerate. + MakeAcceptable(*cluster); + const auto& entry = m_entries[GetRefIndex(arg)]; + return entry.m_chunk_feerate; +} + +void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept +{ + // Make sure the specified DepGraphIndex exists in this Cluster. + Assume(m_depgraph.Positions()[idx]); + // Bail out if the fee isn't actually being changed. + if (m_depgraph.FeeRate(idx).fee == fee) return; + // Update the fee, remember that relinearization will be necessary, and update the Entries + // in the same Cluster. + m_depgraph.FeeRate(idx).fee = fee; + if (!NeedsSplitting()) { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } + Updated(graph); +} + +void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept +{ + // Don't do anything if the passed Ref is empty. + if (GetRefGraph(ref) == nullptr) return; + Assume(GetRefGraph(ref) == this); + // Find the entry, its locator, and inform its Cluster about the new feerate, if any. + auto& entry = m_entries[GetRefIndex(ref)]; + auto& locator = entry.m_locator; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } +} + +} // namespace + +TxGraph::Ref::~Ref() +{ + if (m_graph) { + // Inform the TxGraph about the Ref being destroyed. + m_graph->UnlinkRef(m_index); + m_graph = nullptr; + } +} + +TxGraph::Ref& TxGraph::Ref::operator=(Ref&& other) noexcept +{ + // Unlink the current graph, if any. + if (m_graph) m_graph->UnlinkRef(m_index); + // Inform the other's graph about the move, if any. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually update the contents. + m_graph = other.m_graph; + m_index = other.m_index; + other.m_graph = nullptr; + other.m_index = GraphIndex(-1); + return *this; +} + +TxGraph::Ref::Ref(Ref&& other) noexcept +{ + // Inform the TxGraph of other that its Ref is being moved. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually move the contents. + std::swap(m_graph, other.m_graph); + std::swap(m_index, other.m_index); +} + +std::unique_ptr MakeTxGraph() noexcept +{ + return std::make_unique(); +} diff --git a/src/txgraph.h b/src/txgraph.h new file mode 100644 index 00000000000..0fdecddbda0 --- /dev/null +++ b/src/txgraph.h @@ -0,0 +1,124 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include + +#ifndef BITCOIN_TXGRAPH_H +#define BITCOIN_TXGRAPH_H + +/** No connected component within TxGraph is allowed to exceed this number of transactions. */ +static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; + +/** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. */ +class TxGraph +{ +public: + /** Internal identifier for a transaction within a TxGraph. */ + using GraphIndex = uint32_t; + + /** Data type used to reference transactions within a TxGraph. + * + * Every transaction within a TxGraph has exactly one corresponding TxGraph::Ref, held by users + * of the class. Destroying the TxGraph::Ref removes the corresponding transaction. + * + * Users of the class can inherit from TxGraph::Ref. If all Refs are inherited this way, the + * Ref* pointers returned by TxGraph functions can be used as this inherited type. + */ + class Ref + { + // Allow TxGraph's GetRefGraph and GetRefIndex to access internals. + friend class TxGraph; + /** Which Graph the Entry lives in. nullptr if this Ref is empty. */ + TxGraph* m_graph = nullptr; + /** Index into the Graph's m_entries. Only used if m_graph != nullptr. */ + GraphIndex m_index = GraphIndex(-1); + public: + /** Construct an empty Ref. Non-empty Refs can only be created using + * TxGraph::AddTransaction. */ + Ref() noexcept = default; + /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers + * to has been removed from the graph. */ + virtual ~Ref(); + // Support moving a Ref. + Ref& operator=(Ref&& other) noexcept; + Ref(Ref&& other) noexcept; + // Do not permit copy constructing or copy assignment. A TxGraph entry can have at most one + // Ref pointing to it. + Ref& operator=(const Ref&) = delete; + Ref(const Ref&) = delete; + }; + +protected: + // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. + friend class TxGraph::Ref; + /** Inform the TxGraph implementation that a TxGraph::Ref has moved. */ + virtual void UpdateRef(GraphIndex index, Ref& new_location) noexcept = 0; + /** Inform the TxGraph implementation that a TxGraph::Ref was destroyed. */ + virtual void UnlinkRef(GraphIndex index) noexcept = 0; + // Allow TxGraph implementations (inheriting from it) to access Ref internals. + static TxGraph*& GetRefGraph(Ref& arg) noexcept { return arg.m_graph; } + static TxGraph* GetRefGraph(const Ref& arg) noexcept { return arg.m_graph; } + static GraphIndex& GetRefIndex(Ref& arg) noexcept { return arg.m_index; } + static GraphIndex GetRefIndex(const Ref& arg) noexcept { return arg.m_index; } + +public: + /** Virtual destructor, so inheriting is safe. */ + virtual ~TxGraph() = default; + /** Construct a new transaction with the specified feerate, and return a Ref to it. In all + * further calls, only Refs created by AddTransaction() are allowed to be passed to this + * TxGraph object (or empty Ref objects). */ + [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; + /** Remove the specified transaction. This is a no-op if the transaction was already removed. + * + * TxGraph may internally reorder transaction removals with dependency additions for + * performance reasons. If together with any transaction removal all its descendants, or all + * its ancestors, are removed as well (which is what always happens in realistic scenarios), + * this reordering will not affect the behavior of TxGraph. + * + * As an example, imagine 3 transactions A,B,C where B depends on A. If a dependency of C on B + * is added, and then B is deleted, C will still depend on A. If the deletion of B is reordered + * before the C->B dependency is added, the dependency adding has no effect. If, together with + * the deletion of B also either A or C is deleted, there is no distinction between the + * original order case and the reordered case. + */ + virtual void RemoveTransaction(const Ref& arg) noexcept = 0; + /** Add a dependency between two specified transactions. Parent may not be a descendant of + * child already (but may be an ancestor of it already, in which case this is a no-op). If + * either transaction is already removed, this is a no-op. */ + virtual void AddDependency(const Ref& parent, const Ref& child) noexcept = 0; + /** Modify the fee of the specified transaction. If the transaction does not exist (or was + * removed), this has no effect. */ + virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + + /** Determine whether arg exists in this graph (i.e., was not removed). */ + virtual bool Exists(const Ref& arg) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeePerWeight if + * arg does not exist. */ + virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; + /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight + * if arg does not exist. */ + virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get pointers to all transactions in the connected component ("cluster") which arg is in. + * The transactions will be returned in a topologically-valid order of acceptable quality. + * Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetCluster(const Ref& arg) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction. Returns {} if arg does not + * exist. */ + virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; + /** Get pointers to all descendants of the specified transaction. Returns {} if arg does not + * exist in the graph. */ + virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; + /** Get the total number of transactions in the graph. */ + virtual GraphIndex GetTransactionCount() noexcept = 0; +}; + +/** Construct a new TxGraph. */ +std::unique_ptr MakeTxGraph() noexcept; + +#endif // BITCOIN_TXGRAPH_H From dde2bafeb1b19f706785f8af308d908b67e612f4 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 14:15:12 -0500 Subject: [PATCH 06/30] txgraph: (tests) add simulation fuzz test This adds a simulation fuzz test for txgraph, by comparing with a naive reimplementation that models the entire graph as a single DepGraph, and clusters in TxGraph as connected components within that DepGraph. --- src/test/fuzz/CMakeLists.txt | 1 + src/test/fuzz/txgraph.cpp | 441 +++++++++++++++++++++++++++++++++++ 2 files changed, 442 insertions(+) create mode 100644 src/test/fuzz/txgraph.cpp diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt index e99c6d91f47..846afeeb474 100644 --- a/src/test/fuzz/CMakeLists.txt +++ b/src/test/fuzz/CMakeLists.txt @@ -124,6 +124,7 @@ add_executable(fuzz tx_in.cpp tx_out.cpp tx_pool.cpp + txgraph.cpp txorphan.cpp txrequest.cpp # Visual Studio 2022 version 17.12 introduced a bug diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp new file mode 100644 index 00000000000..8cf1616f16a --- /dev/null +++ b/src/test/fuzz/txgraph.cpp @@ -0,0 +1,441 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cluster_linearize; + +namespace { + +/** Data type representing a naive simulated TxGraph, keeping all transactions (even from + * disconnected components) in a single DepGraph. */ +struct SimTxGraph +{ + /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's + * cluster count, so we can exercise situations with more transactions than fit in one + * cluster. */ + static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + /** Set type to use in the simulation. */ + using SetType = BitSet; + /** Data type for representing positions within SimTxGraph::graph. */ + using Pos = DepGraphIndex; + /** Constant to mean "missing in this graph". */ + static constexpr auto MISSING = Pos(-1); + + /** The dependency graph (for all transactions in the simulation, regardless of + * connectivity/clustering). */ + DepGraph graph; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ + std::array, MAX_TRANSACTIONS> simmap; + /** For each TxGraph::Ref in graph, the position it corresponds with. */ + std::map simrevmap; + /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ + std::vector> removed; + + /** Determine the number of (non-removed) transactions in the graph. */ + DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } + + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ + Pos Find(const TxGraph::Ref* ref) const + { + auto it = simrevmap.find(ref); + if (it != simrevmap.end()) return it->second; + return MISSING; + } + + /** Given a position in this simulated graph, get the corresponding TxGraph::Ref. */ + TxGraph::Ref* GetRef(Pos pos) + { + assert(graph.Positions()[pos]); + assert(simmap[pos]); + return simmap[pos].get(); + } + + /** Add a new transaction to the simulation. */ + TxGraph::Ref* AddTransaction(const FeePerWeight& feerate) + { + assert(graph.TxCount() < MAX_TRANSACTIONS); + auto simpos = graph.AddTransaction(feerate); + assert(graph.Positions()[simpos]); + simmap[simpos] = std::make_unique(); + auto ptr = simmap[simpos].get(); + simrevmap[ptr] = simpos; + return ptr; + } + + /** Add a dependency between two positions in this graph. */ + void AddDependency(TxGraph::Ref* parent, TxGraph::Ref* child) + { + auto par_pos = Find(parent); + if (par_pos == MISSING) return; + auto chl_pos = Find(child); + if (chl_pos == MISSING) return; + graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + } + + /** Modify the transaction fee of a ref, if it exists. */ + void SetTransactionFee(TxGraph::Ref* ref, int64_t fee) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.FeeRate(pos).fee = fee; + } + + /** Remove the transaction in the specified position from the graph. */ + void RemoveTransaction(TxGraph::Ref* ref) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't + // invoked until the simulation explicitly decided to do so. + removed.push_back(std::move(simmap[pos])); + simmap[pos].reset(); + } + + /** Construct the set with all positions in this graph corresponding to the specified + * TxGraph::Refs. All of them must occur in this graph and not be removed. */ + SetType MakeSet(std::span arg) + { + SetType ret; + for (TxGraph::Ref* ptr : arg) { + auto pos = Find(ptr); + assert(pos != Pos(-1)); + ret.Set(pos); + } + return ret; + } + + /** Get the set of ancestors (desc=false) or descendants (desc=true) in this graph. */ + SetType GetAncDesc(TxGraph::Ref* arg, bool desc) + { + auto pos = Find(arg); + if (pos == MISSING) return {}; + return desc ? graph.Descendants(pos) : graph.Ancestors(pos); + } + + /** Given a set of Refs (given as a vector of pointers), expand the set to include all its + * ancestors (desc=false) or all its descendants (desc=true) in this graph. */ + void IncludeAncDesc(std::vector& arg, bool desc) + { + std::vector ret; + for (auto ptr : arg) { + auto simpos = Find(ptr); + if (simpos != MISSING) { + for (auto i : desc ? graph.Descendants(simpos) : graph.Ancestors(simpos)) { + ret.push_back(simmap[i].get()); + } + } else { + ret.push_back(ptr); + } + } + // Deduplicate. + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + // Replace input. + arg = std::move(ret); + } +}; + +} // namespace + +FUZZ_TARGET(txgraph) +{ + // This is a big simulation test for TxGraph, which performs a fuzz-derived sequence of valid + // operations on a TxGraph instance, as well as on a simpler (mostly) reimplementation (see + // SimTxGraph above), comparing the outcome of functions that return a result, and finally + // performing a full comparison between the two. + + SeedRandomStateForTest(SeedRand::ZEROS); + FuzzedDataProvider provider(buffer.data(), buffer.size()); + + /** Internal test RNG, used only for decisions which would require significant amount of data + * to be read from the provider, without realistically impacting test sensitivity. */ + InsecureRandomContext rng(0xdecade2009added + buffer.size()); + + /** Variable used whenever an empty TxGraph::Ref is needed. */ + TxGraph::Ref empty_ref; + + // Construct a real and a simulated graph. + auto real = MakeTxGraph(); + SimTxGraph sim; + + /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ + auto pick_fn = [&]() noexcept -> TxGraph::Ref* { + auto tx_count = sim.GetTransactionCount(); + /** The number of possible choices. */ + size_t choices = tx_count + sim.removed.size() + 1; + /** Pick one of them. */ + auto choice = provider.ConsumeIntegralInRange(0, choices - 1); + if (choice < tx_count) { + // Return from real. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count; + } + if (choice < sim.removed.size()) { + // Return from removed. + return sim.removed[choice].get(); + } else { + choice -= sim.removed.size(); + } + // Return empty. + assert(choice == 0); + return &empty_ref; + }; + + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { + // Read a one-byte command. + int command = provider.ConsumeIntegral(); + // Treat it lowest bit as a flag (which selects a variant of some of the operations), and + // leave the rest of the bits in command. + bool alt = command & 1; + command >>= 1; + + // Keep decrementing command for each applicable operation, until one is hit. Multiple + // iterations may be necessary. + while (true) { + if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + // AddTransaction. + int64_t fee; + int32_t size; + if (alt) { + // If alt is true, pick fee and size from the entire range. + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + size = provider.ConsumeIntegralInRange(1, 0x3fffff); + } else { + // Otherwise, use smaller range which consume fewer fuzz input bytes, as just + // these are likely sufficient to trigger all interesting code paths already. + fee = provider.ConsumeIntegral(); + size = provider.ConsumeIntegral() + 1; + } + FeePerWeight feerate{fee, size}; + // Create a real TxGraph::Ref. + auto ref = real->AddTransaction(feerate); + // Create a unique_ptr place in the simulation to put the Ref in. + auto ref_loc = sim.AddTransaction(feerate); + // Move it in place. + *ref_loc = std::move(ref); + break; + } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + // AddDependency. + auto par = pick_fn(); + auto chl = pick_fn(); + auto pos_par = sim.Find(par); + auto pos_chl = sim.Find(chl); + if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { + // Determine if adding this would introduce a cycle (not allowed by TxGraph), + // and if so, skip. + if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. + auto temp_depgraph = sim.graph; + temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); + auto todo = temp_depgraph.Positions(); + bool oversize{false}; + while (todo.Any()) { + auto component = temp_depgraph.FindConnectedComponent(todo); + if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; + todo -= component; + } + if (oversize) break; + } + sim.AddDependency(par, chl); + real->AddDependency(*par, *chl); + break; + } else if (sim.removed.size() < 100 && command-- == 0) { + // RemoveTransaction. Either all its ancestors or all its descendants are also + // removed (if any), to make sure TxGraph's reordering of removals and dependencies + // has no effect. + std::vector to_remove; + to_remove.push_back(pick_fn()); + sim.IncludeAncDesc(to_remove, alt); + // The order in which these ancestors/descendants are removed should not matter; + // randomly shuffle them. + std::shuffle(to_remove.begin(), to_remove.end(), rng); + for (TxGraph::Ref* ptr : to_remove) { + real->RemoveTransaction(*ptr); + sim.RemoveTransaction(ptr); + } + break; + } else if (sim.removed.size() > 0 && command-- == 0) { + // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it + // refers to, so this simulation permits doing so separately from other actions on + // TxGraph. + + // Pick a Ref of sim.removed to destroy. + auto removed_pos = provider.ConsumeIntegralInRange(0, sim.removed.size() - 1); + if (removed_pos != sim.removed.size() - 1) { + std::swap(sim.removed[removed_pos], sim.removed.back()); + } + sim.removed.pop_back(); + break; + } else if (command-- == 0) { + // SetTransactionFee. + int64_t fee; + if (alt) { + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + } else { + fee = provider.ConsumeIntegral(); + } + auto ref = pick_fn(); + real->SetTransactionFee(*ref, fee); + sim.SetTransactionFee(ref, fee); + break; + } else if (command-- == 0) { + // GetTransactionCount. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + break; + } else if (command-- == 0) { + // Exists. + auto ref = pick_fn(); + bool exists = real->Exists(*ref); + bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + assert(exists == should_exist); + break; + } else if (command-- == 0) { + // GetIndividualFeerate. + auto ref = pick_fn(); + auto feerate = real->GetIndividualFeerate(*ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + assert(feerate == sim.graph.FeeRate(simpos)); + } + break; + } else if (command-- == 0) { + // GetChunkFeerate. + auto ref = pick_fn(); + auto feerate = real->GetChunkFeerate(*ref); + auto simpos = sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + // Just do some quick checks that the reported value is in range. A full + // recomputation of expected chunk feerates is done at the end. + assert(feerate.size >= sim.graph.FeeRate(simpos).size); + } + break; + } else if (command-- == 0) { + // GetAncestors/GetDescendants. + auto ref = pick_fn(); + auto result_set = sim.MakeSet(alt ? real->GetDescendants(*ref) : + real->GetAncestors(*ref)); + auto expect_set = sim.GetAncDesc(ref, alt); + assert(result_set == expect_set); + break; + } else if (command-- == 0) { + // GetCluster. + auto ref = pick_fn(); + auto result = real->GetCluster(*ref); + // Check cluster count limit. + assert(result.size() <= CLUSTER_COUNT_LIMIT); + // Require the result to be topologically valid and not contain duplicates. + auto left = sim.graph.Positions(); + for (auto refptr : result) { + auto simpos = sim.Find(refptr); + assert(simpos != SimTxGraph::MISSING); + assert(left[simpos]); + left.Reset(simpos); + assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + } + // Require the set to be connected. + auto result_set = sim.MakeSet(result); + assert(sim.graph.IsConnected(result_set)); + // If ref exists, the result must contain it. If not, it must be empty. + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + assert(result_set[simpos]); + } else { + assert(result_set.None()); + } + // Require the set not to have ancestors or descendants outside of it. + for (auto i : result_set) { + assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + } + break; + } + } + } + // Compare simple properties of the graph with the simulation. + assert(real->GetTransactionCount() == sim.GetTransactionCount()); + + // Perform a full comparison. + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i)); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } + } + } + } + + // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be + // addressed in a follow-up commit). + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + real->RemoveTransaction(*ref); + } +} From aa8267ec2e4165381c6d04c823aa958eb3f5df43 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 22:45:46 -0500 Subject: [PATCH 07/30] txgraph: (tests) add internal sanity check function To make testing more powerful, expose a function to perform an internal sanity check on the state of a TxGraph. This is especially important as TxGraphImpl contains many redundantly represented pieces of information: * graph contains clusters, which refer to entries, but the entries refer back * graph maintains pointers to Ref objects, which point back to the graph. This lets us make sure they are always in sync. --- src/test/fuzz/txgraph.cpp | 8 +++ src/txgraph.cpp | 124 ++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 3 + 3 files changed, 135 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 8cf1616f16a..cec1cd56edc 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -376,6 +376,11 @@ FUZZ_TARGET(txgraph) } } } + + // After running all modifications, perform an internal sanity check (before invoking + // inspectors that may modify the internal state). + real->SanityCheck(); + // Compare simple properties of the graph with the simulation. assert(real->GetTransactionCount() == sim.GetTransactionCount()); @@ -432,6 +437,9 @@ FUZZ_TARGET(txgraph) } } + // Sanity check again (because invoking inspectors may modify internal unobservable state). + real->SanityCheck(); + // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be // addressed in a follow-up commit). for (auto i : sim.graph.Positions()) { diff --git a/src/txgraph.cpp b/src/txgraph.cpp index b56b536cc24..e4b97b59491 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -94,6 +95,8 @@ public: } /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */ + GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept { return m_mapping[index]; } /** Only called by Graph::SwapIndexes. */ void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ @@ -126,6 +129,10 @@ public: FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; /** Modify the fee of a Cluster element. */ void SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept; + + // Debugging functions. + + void SanityCheck(const TxGraphImpl& graph) const; }; /** The transaction graph. @@ -189,6 +196,8 @@ private: void SetMissing() noexcept { cluster = nullptr; index = 0; } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is missing. */ + bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -289,6 +298,8 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + + void SanityCheck() const final; }; void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -1137,6 +1148,119 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept } } +void Cluster::SanityCheck(const TxGraphImpl& graph) const +{ + // There must be an m_mapping for each m_depgraph position (including holes). + assert(m_depgraph.PositionRange() == m_mapping.size()); + // The linearization for this Cluster must contain every transaction once. + assert(m_depgraph.TxCount() == m_linearization.size()); + // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. + + // Compute the chunking of m_linearization. + LinearizationChunking linchunking(m_depgraph, m_linearization); + + // Verify m_linearization. + SetType m_done; + assert(m_depgraph.IsAcyclic()); + for (auto lin_pos : m_linearization) { + assert(lin_pos < m_mapping.size()); + const auto& entry = graph.m_entries[m_mapping[lin_pos]]; + // Check that the linearization is topological. + m_done.Set(lin_pos); + assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + // Check that the Entry has a locator pointing back to this Cluster & position within it. + assert(entry.m_locator.cluster == this); + assert(entry.m_locator.index == lin_pos); + // Check linearization position and chunk feerate. + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + // If this Cluster has an acceptable quality level, its chunks must be connected. + if (IsAcceptable()) { + assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + } + } + // Verify that each element of m_depgraph occured in m_linearization. + assert(m_done == m_depgraph.Positions()); +} + +void TxGraphImpl::SanityCheck() const +{ + /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */ + std::set expected_unlinked; + /** Which Clusters ought to occur in m_clusters, based on m_entries. */ + std::set expected_clusters; + + // Go over all Entry objects in m_entries. + for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { + const auto& entry = m_entries[idx]; + if (entry.m_ref == nullptr) { + // Unlinked Entry must have indexes appear in m_unlinked. + expected_unlinked.insert(idx); + } else { + // Every non-unlinked Entry must have a Ref that points back to it. + assert(GetRefGraph(*entry.m_ref) == this); + assert(GetRefIndex(*entry.m_ref) == idx); + } + const auto& locator = entry.m_locator; + // Every Locator must be in exactly one of these 2 states. + assert(locator.IsMissing() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the m_clusters. + expected_clusters.insert(locator.cluster); + } + + } + + std::set actual_clusters; + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } + + // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + for (GraphIndex idx : m_to_remove) { + assert(idx < m_entries.size()); + assert(m_entries[idx].m_locator.IsPresent()); + } + + // Verify that all to-be-added dependencies have valid identifiers. + for (auto [par_idx, chl_idx] : m_deps_to_add) { + assert(par_idx != chl_idx); + assert(par_idx < m_entries.size()); + assert(chl_idx < m_entries.size()); + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters); + + // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. + std::set actual_unlinked(m_unlinked.begin(), m_unlinked.end()); + assert(actual_unlinked == expected_unlinked); + + // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be + // empty (to prevent memory leaks due to an ever-growing m_entries vector). + if (m_to_remove.empty() && m_deps_to_add.empty()) assert(actual_unlinked.empty()); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 0fdecddbda0..e6aa07021a6 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -116,6 +116,9 @@ public: virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; /** Get the total number of transactions in the graph. */ virtual GraphIndex GetTransactionCount() noexcept = 0; + + /** Perform an internal consistency check on this object. */ + virtual void SanityCheck() const = 0; }; /** Construct a new TxGraph. */ From f62e4320eceacd958cf7560e5ecef8e814f27acd Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sat, 25 Jan 2025 23:23:28 -0500 Subject: [PATCH 08/30] txgraph: (optimization) avoid per-group vectors for clusters & dependencies Instead construct a single vector with the list of all clusters in all groups, and then store per-group offset/range in that list. For dependencies, reuse m_deps_to_add, and store offset/range into that. --- src/txgraph.cpp | 63 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index e4b97b59491..8dfe5c77bd5 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -164,11 +164,23 @@ private: /** Information about one group of Clusters to be merged. */ struct GroupEntry { - /** Which clusters are to be merged. */ - std::vector m_clusters; - /** Which dependencies are to be applied to those merged clusters, as (parent, child) - * pairs. */ - std::vector> m_deps; + /** Where the clusters to be merged start in m_group_clusters. */ + uint32_t m_cluster_offset; + /** How many clusters to merge. */ + uint32_t m_cluster_count; + /** Where the dependencies for this cluster group in m_deps_to_add start. */ + uint32_t m_deps_offset; + /** How many dependencies to add. */ + uint32_t m_deps_count; + }; + + /** Information about all groups of Clusters to be merged. */ + struct GroupData + { + /** The groups of Clusters to be merged. */ + std::vector m_groups; + /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ + std::vector m_group_clusters; }; /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ @@ -179,7 +191,7 @@ private: * into this. */ std::vector> m_deps_to_add; /** Information about the merges to be performed, if known. */ - std::optional> m_group_data = std::vector{}; + std::optional m_group_data = GroupData{}; /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). * */ GraphIndex m_txcount{0}; @@ -818,24 +830,34 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); - // Translate the resulting cluster groups to the m_group_data structure. - m_group_data = std::vector{}; + // Translate the resulting cluster groups to the m_group_data structure, and the dependencies + // back to m_deps_to_add. + m_group_data = GroupData{}; + m_group_data->m_group_clusters.reserve(an_clusters.size()); + m_deps_to_add.clear(); + m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_clusters_it != an_clusters.end()) { // Process all clusters/dependencies belonging to the partition with representative rep. auto rep = an_clusters_it->second; // Create and initialize a new GroupData entry for the partition. - auto& new_entry = m_group_data->emplace_back(); - // Add all its clusters to it (copying those from an_clusters to m_clusters). + auto& new_entry = m_group_data->m_groups.emplace_back(); + new_entry.m_cluster_offset = m_group_data->m_group_clusters.size(); + new_entry.m_cluster_count = 0; + new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_count = 0; + // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { - new_entry.m_clusters.push_back(an_clusters_it->first); + m_group_data->m_group_clusters.push_back(an_clusters_it->first); ++an_clusters_it; + ++new_entry.m_cluster_count; } - // Add all its dependencies to it (copying those back from an_deps to m_deps). + // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { - new_entry.m_deps.push_back(an_deps_it->first); + m_deps_to_add.push_back(an_deps_it->first); ++an_deps_it; + ++new_entry.m_deps_count; } } Assume(an_deps_it == an_deps.end()); @@ -879,14 +901,19 @@ void TxGraphImpl::ApplyDependencies() noexcept if (m_deps_to_add.empty()) return; // For each group of to-be-merged Clusters. - for (auto& group_data : *m_group_data) { + for (const auto& group_data : m_group_data->m_groups) { // Invoke Merge() to merge them into a single Cluster. - Merge(group_data.m_clusters); + auto cluster_span = std::span{m_group_data->m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). - const auto& loc = m_entries[group_data.m_deps[0].second].m_locator; + auto deps_span = std::span{m_deps_to_add} + .subspan(group_data.m_deps_offset, group_data.m_deps_count); + Assume(!deps_span.empty()); + const auto& loc = m_entries[deps_span[0].second].m_locator; Assume(loc.IsPresent()); - loc.cluster->ApplyDependencies(*this, group_data.m_deps); + loc.cluster->ApplyDependencies(*this, deps_span); } // Wipe the list of to-be-added dependencies now that they are applied. @@ -894,7 +921,7 @@ void TxGraphImpl::ApplyDependencies() noexcept Compact(); // Also no further Cluster mergings are needed (note that we clear, but don't set to // std::nullopt, as that would imply the groupings are unknown). - m_group_data = std::vector{}; + m_group_data = GroupData{}; } void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept From 3ceb877012a9bc82da641fbb4f27c981ed206808 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:53:50 -0500 Subject: [PATCH 09/30] txgraph: (feature) make max cluster count configurable and "oversize" state Instead of leaving the responsibility on higher layers to guarantee that no connected component within TxGraph (a barely exposed concept, except through GetCluster()) exceeds the cluster count limit, move this responsibility to TxGraph itself: * TxGraph retains a cluster count limit, but it becomes configurable at construction time (this primarily helps with testing that it is properly enforced). * It is always allowed to perform mutators on TxGraph, even if they would cause the cluster count limit to be exceeded. Instead, TxGraph exposes an IsOversized() function, which queries whether it is in a special "oversize" state. * During oversize state, many inspectors are unavailable, but mutators remain valid, so the higher layer can "fix" the oversize state before continuing. --- src/test/fuzz/txgraph.cpp | 169 +++++++++++++++++++++++--------------- src/txgraph.cpp | 51 ++++++++++-- src/txgraph.h | 31 ++++--- 3 files changed, 164 insertions(+), 87 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index cec1cd56edc..b6163ed5544 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -27,7 +27,7 @@ struct SimTxGraph /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's * cluster count, so we can exercise situations with more transactions than fit in one * cluster. */ - static constexpr unsigned MAX_TRANSACTIONS = CLUSTER_COUNT_LIMIT * 2; + static constexpr unsigned MAX_TRANSACTIONS = MAX_CLUSTER_COUNT_LIMIT * 2; /** Set type to use in the simulation. */ using SetType = BitSet; /** Data type for representing positions within SimTxGraph::graph. */ @@ -44,6 +44,31 @@ struct SimTxGraph std::map simrevmap; /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ std::vector> removed; + /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ + std::optional oversized; + /** The configured maximum number of transactions per cluster. */ + DepGraphIndex max_cluster_count; + + /** Construct a new SimData with the specified maximum cluster count. */ + explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {} + + /** Check whether this graph is oversized (contains a connected component whose number of + * transactions exceeds max_cluster_count. */ + bool IsOversized() + { + if (!oversized.has_value()) { + // Only recompute when oversized isn't already known. + oversized = false; + auto todo = graph.Positions(); + // Iterate over all connected components of the graph. + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + if (component.Count() > max_cluster_count) oversized = true; + todo -= component; + } + } + return *oversized; + } /** Determine the number of (non-removed) transactions in the graph. */ DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } @@ -84,6 +109,8 @@ struct SimTxGraph auto chl_pos = Find(child); if (chl_pos == MISSING) return; graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; } /** Modify the transaction fee of a ref, if it exists. */ @@ -105,6 +132,8 @@ struct SimTxGraph // invoked until the simulation explicitly decided to do so. removed.push_back(std::move(simmap[pos])); simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (oversized.has_value() && *oversized) oversized = std::nullopt; } /** Construct the set with all positions in this graph corresponding to the specified @@ -170,9 +199,12 @@ FUZZ_TARGET(txgraph) /** Variable used whenever an empty TxGraph::Ref is needed. */ TxGraph::Ref empty_ref; + // Decide the maximum number of transactions per cluster we will use in this simulation. + auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + // Construct a real and a simulated graph. - auto real = MakeTxGraph(); - SimTxGraph sim; + auto real = MakeTxGraph(max_count); + SimTxGraph sim(max_count); /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref* { @@ -245,17 +277,6 @@ FUZZ_TARGET(txgraph) // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. if (sim.graph.Ancestors(pos_par)[pos_chl]) break; - // Determine if adding this would violate CLUSTER_COUNT_LIMIT, and if so, skip. - auto temp_depgraph = sim.graph; - temp_depgraph.AddDependencies(SimTxGraph::SetType::Singleton(pos_par), pos_chl); - auto todo = temp_depgraph.Positions(); - bool oversize{false}; - while (todo.Any()) { - auto component = temp_depgraph.FindConnectedComponent(todo); - if (component.Count() > CLUSTER_COUNT_LIMIT) oversize = true; - todo -= component; - } - if (oversize) break; } sim.AddDependency(par, chl); real->AddDependency(*par, *chl); @@ -310,6 +331,10 @@ FUZZ_TARGET(txgraph) bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; + } else if (command-- == 0) { + // IsOversized. + assert(sim.IsOversized() == real->IsOversized()); + break; } else if (command-- == 0) { // GetIndividualFeerate. auto ref = pick_fn(); @@ -321,7 +346,7 @@ FUZZ_TARGET(txgraph) assert(feerate == sim.graph.FeeRate(simpos)); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetChunkFeerate. auto ref = pick_fn(); auto feerate = real->GetChunkFeerate(*ref); @@ -334,20 +359,22 @@ FUZZ_TARGET(txgraph) assert(feerate.size >= sim.graph.FeeRate(simpos).size); } break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto ref = pick_fn(); - auto result_set = sim.MakeSet(alt ? real->GetDescendants(*ref) : - real->GetAncestors(*ref)); + auto result = alt ? real->GetDescendants(*ref) : real->GetAncestors(*ref); + assert(result.size() <= max_count); + auto result_set = sim.MakeSet(result); + assert(result.size() == result_set.Count()); auto expect_set = sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (command-- == 0) { + } else if (!sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); auto result = real->GetCluster(*ref); // Check cluster count limit. - assert(result.size() <= CLUSTER_COUNT_LIMIT); + assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. auto left = sim.graph.Positions(); for (auto refptr : result) { @@ -382,56 +409,62 @@ FUZZ_TARGET(txgraph) real->SanityCheck(); // Compare simple properties of the graph with the simulation. + assert(real->IsOversized() == sim.IsOversized()); assert(real->GetTransactionCount() == sim.GetTransactionCount()); - // Perform a full comparison. - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(*sim.GetRef(i)); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - DepGraphIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i)); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); + ++idx; + } } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 8dfe5c77bd5..ee3c78aaacc 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -49,7 +49,7 @@ class Cluster { friend class TxGraphImpl; using GraphIndex = TxGraph::GraphIndex; - using SetType = BitSet; + using SetType = BitSet; /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ DepGraph m_depgraph; /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for @@ -160,6 +160,8 @@ class TxGraphImpl final : public TxGraph private: /** Internal RNG. */ FastRandomContext m_rng; + /** This TxGraphImpl's maximum cluster count limit. */ + const DepGraphIndex m_max_cluster_count; /** Information about one group of Clusters to be merged. */ struct GroupEntry @@ -181,6 +183,9 @@ private: std::vector m_groups; /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ std::vector m_group_clusters; + /** Whether at least one of the groups cannot be applied because it would result in a + * Cluster that violates the cluster count limit. */ + bool m_group_oversized; }; /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ @@ -233,8 +238,13 @@ private: std::vector m_unlinked; public: - /** Construct a new TxGraphImpl. */ - explicit TxGraphImpl() noexcept {} + /** Construct a new TxGraphImpl with the specified maximum cluster count. */ + explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept : + m_max_cluster_count(max_cluster_count) + { + Assume(max_cluster_count >= 1); + Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + } // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). TxGraphImpl(const TxGraphImpl&) = delete; @@ -310,6 +320,7 @@ public: std::vector GetAncestors(const Ref& arg) noexcept final; std::vector GetDescendants(const Ref& arg) noexcept final; GraphIndex GetTransactionCount() noexcept final; + bool IsOversized() noexcept final; void SanityCheck() const final; }; @@ -697,7 +708,7 @@ void TxGraphImpl::GroupClusters() noexcept // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up - // with inefficient Clusters which just end up being split again anyway. + // with inefficient and/or oversized Clusters which just end up being split again anyway. SplitAll(); /** Annotated clusters: an entry for each Cluster, together with the representative for the @@ -834,6 +845,7 @@ void TxGraphImpl::GroupClusters() noexcept // back to m_deps_to_add. m_group_data = GroupData{}; m_group_data->m_group_clusters.reserve(an_clusters.size()); + m_group_data->m_group_oversized = false; m_deps_to_add.clear(); m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); @@ -847,9 +859,11 @@ void TxGraphImpl::GroupClusters() noexcept new_entry.m_cluster_count = 0; new_entry.m_deps_offset = m_deps_to_add.size(); new_entry.m_deps_count = 0; + uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { m_group_data->m_group_clusters.push_back(an_clusters_it->first); + total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } @@ -859,6 +873,10 @@ void TxGraphImpl::GroupClusters() noexcept ++an_deps_it; ++new_entry.m_deps_count; } + // Detect oversizedness. + if (total_count > m_max_cluster_count) { + m_group_data->m_group_oversized = true; + } } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); @@ -899,6 +917,8 @@ void TxGraphImpl::ApplyDependencies() noexcept Assume(m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (m_deps_to_add.empty()) return; + // Dependencies cannot be applied if it would result in oversized clusters. + if (m_group_data->m_group_oversized) return; // For each group of to-be-merged Clusters. for (const auto& group_data : m_group_data->m_groups) { @@ -1074,6 +1094,8 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1088,6 +1110,8 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1102,6 +1126,8 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); + // Cluster linearization cannot be known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1137,6 +1163,8 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); + // Chunk feerates cannot be accurately known if unapplied dependencies remain. + Assume(m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1147,6 +1175,15 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept return entry.m_chunk_feerate; } +bool TxGraphImpl::IsOversized() noexcept +{ + // Find which Clusters will need to be merged together, as that is where the oversize + // property is assessed. + GroupClusters(); + Assume(m_group_data.has_value()); + return m_group_data->m_group_oversized; +} + void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept { // Make sure the specified DepGraphIndex exists in this Cluster. @@ -1181,6 +1218,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.PositionRange() == m_mapping.size()); // The linearization for this Cluster must contain every transaction once. assert(m_depgraph.TxCount() == m_linearization.size()); + // The number of transactions in a Cluster cannot exceed m_max_cluster_count. + assert(m_linearization.size() <= graph.m_max_cluster_count); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1322,7 +1361,7 @@ TxGraph::Ref::Ref(Ref&& other) noexcept std::swap(m_index, other.m_index); } -std::unique_ptr MakeTxGraph() noexcept +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept { - return std::make_unique(); + return std::make_unique(max_cluster_count); } diff --git a/src/txgraph.h b/src/txgraph.h index e6aa07021a6..04663131611 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -12,8 +12,7 @@ #ifndef BITCOIN_TXGRAPH_H #define BITCOIN_TXGRAPH_H -/** No connected component within TxGraph is allowed to exceed this number of transactions. */ -static constexpr unsigned CLUSTER_COUNT_LIMIT{64}; +static constexpr unsigned MAX_CLUSTER_COUNT_LIMIT{64}; /** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. */ class TxGraph @@ -98,30 +97,36 @@ public: /** Determine whether arg exists in this graph (i.e., was not removed). */ virtual bool Exists(const Ref& arg) noexcept = 0; - /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeePerWeight if - * arg does not exist. */ + /** Determine whether the graph is oversized (contains a connected component of more than the + * configured maximum cluster count). Some of the functions below are not available + * for oversized graphs. The mutators above are always available. */ + virtual bool IsOversized() noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeeFrac if arg + * does not exist. The graph must not be oversized. */ virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; - /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight - * if arg does not exist. */ + /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if + * arg does not exist. This is available even for oversized graphs. */ virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the connected component ("cluster") which arg is in. * The transactions will be returned in a topologically-valid order of acceptable quality. * Returns {} if arg does not exist in the queried graph. */ virtual std::vector GetCluster(const Ref& arg) noexcept = 0; - /** Get pointers to all ancestors of the specified transaction. Returns {} if arg does not - * exist. */ + /** Get pointers to all ancestors of the specified transaction. The queried graph must not be + * oversized. Returns {} if arg does not exist. */ virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; - /** Get pointers to all descendants of the specified transaction. Returns {} if arg does not - * exist in the graph. */ + /** Get pointers to all descendants of the specified transaction. The graph must not be + * oversized. Returns {} if arg does not exist in the graph. */ virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. */ + /** Get the total number of transactions in the graph. This is available even for oversized + * graphs. */ virtual GraphIndex GetTransactionCount() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; }; -/** Construct a new TxGraph. */ -std::unique_ptr MakeTxGraph() noexcept; +/** Construct a new TxGraph with the specified limit on transactions within a cluster. That + * number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */ +std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept; #endif // BITCOIN_TXGRAPH_H From 1f35d5f46389ebfbaf76754de55334c85170564a Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 22 Jan 2025 14:53:32 -0500 Subject: [PATCH 10/30] txgraph: (optimization) avoid representative lookup for each dependency The m_deps_to_add vector is sorted by child Cluster*, which matches the order of an_clusters. This means we can walk through m_deps_to_add while doing the representative lookups for an_clusters, and reuse them. --- src/txgraph.cpp | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index ee3c78aaacc..fcea5836959 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -735,6 +735,15 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(an_clusters.begin(), an_clusters.end()); an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + // Sort the dependencies by child Cluster. + std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + auto [_a_par, a_chl] = a; + auto [_b_par, b_chl] = b; + auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; + auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + return std::less{}(a_chl_cluster, b_chl_cluster); + }); + // Run the union-find algorithm to to find partitions of the input Clusters which need to be // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. { @@ -814,6 +823,8 @@ void TxGraphImpl::GroupClusters() noexcept // Populate the an_clusters and an_deps data structures with the list of input Clusters, // and the input dependencies, annotated with the representative of the Cluster partition // it applies to. + an_deps.reserve(m_deps_to_add.size()); + auto deps_it = m_deps_to_add.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; // Find the representative of the partition Cluster i is in, and store it with the @@ -821,18 +832,20 @@ void TxGraphImpl::GroupClusters() noexcept auto rep = find_root_fn(&data)->cluster; Assume(an_clusters[i].second == nullptr); an_clusters[i].second = rep; - } - an_deps.reserve(m_deps_to_add.size()); - for (auto [par, chl] : m_deps_to_add) { - auto chl_cluster = m_entries[chl].m_locator.cluster; - auto par_cluster = m_entries[par].m_locator.cluster; - // Nothing to do if either parent or child transaction is removed already. - if (par_cluster == nullptr || chl_cluster == nullptr) continue; - // Find the representative of the partition which this dependency's child is in (which - // should be the same as the one for the parent). - auto rep = find_root_fn(locate_fn(chl_cluster))->cluster; - // Create an_deps entry. - an_deps.emplace_back(std::pair{par, chl}, rep); + // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. + while (deps_it != m_deps_to_add.end()) { + auto [par, chl] = *deps_it; + auto chl_cluster = m_entries[chl].m_locator.cluster; + if (std::greater{}(chl_cluster, data.cluster)) break; + // Skip dependencies that apply to earlier Clusters (those necessary are for + // deleted transactions, as otherwise we'd have processed them already). + if (chl_cluster == data.cluster) { + auto par_cluster = m_entries[par].m_locator.cluster; + // Also filter out dependencies applying to a removed parent. + if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); + } + ++deps_it; + } } } From 05400bd33d7b5a0ef28704734bc71131debfe861 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 22 Jan 2025 14:36:00 -0500 Subject: [PATCH 11/30] txgraph: (optimization) avoid looking up the same child cluster repeatedly Since m_deps_to_add has been sorted by child Cluster* already, all dependencies with the same child will be processed consecutively. Take advantage of this by remember the last partition merged with, and reusing that if applicable. --- src/txgraph.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index fcea5836959..8c8eb874e20 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -785,18 +785,20 @@ void TxGraphImpl::GroupClusters() noexcept return data; }; - /** Given two PartitionDatas, union the partitions they are in. */ + /** Given two PartitionDatas, union the partitions they are in, and return their + * representative. */ static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept { // Find the roots of the trees, and bail out if they are already equal (which would // mean they are in the same partition already). auto rep1 = find_root_fn(arg1); auto rep2 = find_root_fn(arg2); - if (rep1 == rep2) return; + if (rep1 == rep2) return rep1; // Pick the lower-rank root to become a child of the higher-rank one. // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. if (rep1->rank < rep2->rank) std::swap(rep1, rep2); rep2->parent = rep1; rep1->rank += (rep1->rank == rep2->rank); + return rep1; }; // Start by initializing every Cluster as its own singleton partition. @@ -809,6 +811,8 @@ void TxGraphImpl::GroupClusters() noexcept // Run through all parent/child pairs in m_deps_to_add, and union the // the partitions their Clusters are in. + Cluster* last_chl_cluster{nullptr}; + PartitionData* last_partition{nullptr}; for (const auto& [par, chl] : m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; @@ -817,7 +821,15 @@ void TxGraphImpl::GroupClusters() noexcept // Nothing to do if either parent or child transaction is removed already. if (par_cluster == nullptr || chl_cluster == nullptr) continue; Assume(par != chl); - union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + if (chl_cluster == last_chl_cluster) { + // If the child Clusters is the same as the previous iteration, union with the + // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add + // is sorted by child Cluster, so batches with the same child are expected. + last_partition = union_fn(locate_fn(par_cluster), last_partition); + } else { + last_chl_cluster = chl_cluster; + last_partition = union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + } } // Populate the an_clusters and an_deps data structures with the list of input Clusters, From f027e2cd929d4a46115fab7b02d7efa5634039de Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Fri, 15 Nov 2024 13:31:23 -0500 Subject: [PATCH 12/30] txgraph: (optimization) delay chunking while sub-acceptable Chunk-based information (primarily, chunk feerates) are never accessed without first bringing the relevant Clusters to an "acceptable" quality level. Thus, while operations are ongoing and Clusters are not acceptable, we can omit computing the chunkings and chunk feerates for Clusters. --- src/txgraph.cpp | 50 +++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 8c8eb874e20..e3d24d7fdc7 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -332,23 +332,27 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[m_mapping[idx]]; entry.m_locator.SetPresent(this, idx); } - - // Compute its chunking and store its information in the Entry's m_chunk_feerate. - LinearizationChunking chunking(m_depgraph, m_linearization); - LinearizationIndex lin_idx{0}; - // Iterate over the chunks. - for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { - auto chunk = chunking.GetChunk(chunk_idx); - Assume(chunk.transactions.Any()); - // Iterate over the transactions in the linearization, which must match those in chunk. - do { - DepGraphIndex idx = m_linearization[lin_idx++]; - GraphIndex graph_idx = m_mapping[idx]; - auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); - Assume(chunk.transactions[idx]); - chunk.transactions.Reset(idx); - } while(chunk.transactions.Any()); + // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its + // information in the Entry's m_chunk_feerate. These fields are only accessed after making + // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that + // quality level yet. + if (IsAcceptable()) { + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + Assume(chunk.transactions.Any()); + // Iterate over the transactions in the linearization, which must match those in chunk. + do { + DepGraphIndex idx = m_linearization[lin_idx++]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + Assume(chunk.transactions[idx]); + chunk.transactions.Reset(idx); + } while(chunk.transactions.Any()); + } } } @@ -410,8 +414,6 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); - // We need to recompute and cache its chunking. - Updated(graph); return false; } first = false; @@ -1263,12 +1265,12 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(entry.m_locator.cluster == this); assert(entry.m_locator.index == lin_pos); // Check linearization position and chunk feerate. - if (!linchunking.GetChunk(0).transactions[lin_pos]) { - linchunking.MarkDone(linchunking.GetChunk(0).transactions); - } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); - // If this Cluster has an acceptable quality level, its chunks must be connected. if (IsAcceptable()) { + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + } + assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } } From f8eff1bc0825091e236ecdbc7ad2036928b8f174 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 18:10:24 -0500 Subject: [PATCH 13/30] txgraph: (optimization) special-case removal of tail of cluster When transactions are removed from the tail of a cluster, we know the existing linearization remains acceptable (if it already was), but may just need splitting and postlinearization, so special case these into separate quality levels. --- src/txgraph.cpp | 69 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index e3d24d7fdc7..01443046455 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -33,6 +33,8 @@ enum class QualityLevel { /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, + /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ + NEEDS_SPLIT_ACCEPTABLE, /** This cluster has undergone changes that warrant re-linearization. */ NEEDS_RELINEARIZE, /** The minimal level of linearization has been performed, but it is not known to be optimal. */ @@ -79,9 +81,10 @@ public: // Generic helper functions. /** Whether the linearization of this Cluster can be exposed. */ - bool IsAcceptable() const noexcept + bool IsAcceptable(bool after_split = false) const noexcept { - return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL; + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL || + (after_split && m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE); } /** Whether the linearization of this Cluster is optimal. */ bool IsOptimal() const noexcept @@ -91,7 +94,8 @@ public: /** Whether this cluster requires splitting. */ bool NeedsSplitting() const noexcept { - return m_quality == QualityLevel::NEEDS_SPLIT; + return m_quality == QualityLevel::NEEDS_SPLIT || + m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE; } /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } @@ -380,19 +384,35 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove --graph.m_txcount; } while(!to_remove.empty()); + auto quality = m_quality; Assume(todo.Any()); // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries // removed, so we benefit from batching all the removals). m_depgraph.RemoveTransactions(todo); m_mapping.resize(m_depgraph.PositionRange()); - // Filter removals out of m_linearization. - m_linearization.erase(std::remove_if( - m_linearization.begin(), - m_linearization.end(), - [&](auto pos) { return todo[pos]; }), m_linearization.end()); - - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + // First remove all removals at the end of the linearization. + while (!m_linearization.empty() && todo[m_linearization.back()]) { + todo.Reset(m_linearization.back()); + m_linearization.pop_back(); + } + if (todo.None()) { + // If no further removals remain, and thus all removals were at the end, we may be able + // to leave the cluster at a better quality level. + if (IsAcceptable(/*after_split=*/true)) { + quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } else { + quality = QualityLevel::NEEDS_SPLIT; + } + } else { + // If more removals remain, filter those out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + quality = QualityLevel::NEEDS_SPLIT; + } + graph.SetClusterQuality(m_quality, m_setindex, quality); Updated(graph); } @@ -400,6 +420,18 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. Assume(NeedsSplitting()); + // Determine the new quality the split-off Clusters will have. + QualityLevel new_quality = IsAcceptable(/*after_split=*/true) ? QualityLevel::ACCEPTABLE + : QualityLevel::NEEDS_RELINEARIZE; + // If we're going to produce ACCEPTABLE clusters (i.e., when in NEEDS_SPLIT_ACCEPTABLE), we + // need to post-linearize to make sure the split-out versions are all connected (as + // connectivity may have changed by removing part of the cluster). This could be done on each + // resulting split-out cluster separately, but it is simpler to do it once up front before + // splitting. This step is not necessary if the resulting clusters are NEEDS_RELINEARIZE, as + // they will be post-linearized anyway in MakeAcceptable(). + if (new_quality == QualityLevel::ACCEPTABLE) { + PostLinearize(m_depgraph, m_linearization); + } /** Which positions are still left in this Cluster. */ auto todo = m_depgraph.Positions(); /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and @@ -413,7 +445,10 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_quality, m_setindex, new_quality); + // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its + // chunking. + Updated(graph); return false; } first = false; @@ -425,7 +460,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), QualityLevel::NEEDS_RELINEARIZE); + graph.InsertCluster(std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -697,9 +732,11 @@ void TxGraphImpl::SplitAll() noexcept { // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); - auto& queue = m_clusters[int(QualityLevel::NEEDS_SPLIT)]; - while (!queue.empty()) { - Split(*queue.back().get()); + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { + auto& queue = m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } @@ -1222,6 +1259,8 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep m_depgraph.FeeRate(idx).fee = fee; if (!NeedsSplitting()) { graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } else { + graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } From 11f303faa32deaa616e72f30e3da1e1151494259 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 08:37:53 -0500 Subject: [PATCH 14/30] txgraph: (refactor) group per-graph data in ClusterSet This is a preparation for a next commit where a TxGraph will start representing potentially two distinct graphs (a main one, and a staging one with proposed changes). --- src/txgraph.cpp | 153 ++++++++++++++++++++++++++---------------------- 1 file changed, 83 insertions(+), 70 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 01443046455..044eba33c3e 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -25,7 +25,7 @@ class TxGraphImpl; /** Position of a DepGraphIndex within a Cluster::m_linearization. */ using LinearizationIndex = uint32_t; -/** Position of a Cluster within Graph::m_clusters. */ +/** Position of a Cluster within Graph::ClusterSet::m_clusters. */ using ClusterSetIndex = uint32_t; /** Quality levels for cached cluster linearizations. */ @@ -41,12 +41,12 @@ enum class QualityLevel ACCEPTABLE, /** The linearization is known to be optimal. */ OPTIMAL, - /** This cluster is not registered in any m_clusters. - * This must be the last entry in QualityLevel as m_clusters is sized using it. */ + /** This cluster is not registered in any ClusterSet::m_clusters. + * This must be the last entry in QualityLevel as ClusterSet::m_clusters is sized using it. */ NONE, }; -/** A grouping of connected transactions inside a TxGraphImpl. */ +/** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */ class Cluster { friend class TxGraphImpl; @@ -63,7 +63,7 @@ class Cluster std::vector m_linearization; /** The quality level of m_linearization. */ QualityLevel m_quality{QualityLevel::NONE}; - /** Which position this Cluster has in Graph::m_clusters[m_quality]. */ + /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; public: @@ -72,7 +72,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl). */ + // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -192,18 +192,25 @@ private: bool m_group_oversized; }; - /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ - std::vector> m_clusters[int(QualityLevel::NONE)]; - /** Which removals have yet to be applied. */ - std::vector m_to_remove; - /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes - * into this. */ - std::vector> m_deps_to_add; - /** Information about the merges to be performed, if known. */ - std::optional m_group_data = GroupData{}; - /** Total number of transactions in this graph (sum of all transaction counts in all Clusters). - * */ - GraphIndex m_txcount{0}; + /** The collection of all Clusters in main or staged. */ + struct ClusterSet + { + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::vector> m_clusters[int(QualityLevel::NONE)]; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Information about the merges to be performed, if known. */ + std::optional m_group_data = GroupData{}; + /** Total number of transactions in this graph (sum of all transaction counts in all + * Clusters). */ + GraphIndex m_txcount{0}; + }; + + /** The ClusterSet for this TxGraphImpl. */ + ClusterSet m_clusterset; /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ struct Locator @@ -381,7 +388,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // - Mark it as removed in the Entry's locator. locator.SetMissing(); to_remove = to_remove.subspan(1); - --graph.m_txcount; + --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; @@ -577,7 +584,7 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust { Assume(quality != QualityLevel::NONE); - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. @@ -606,7 +613,7 @@ ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, Q Assume(cluster->m_quality == QualityLevel::NONE); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusters[int(quality)]; + auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; @@ -636,15 +643,16 @@ void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept void TxGraphImpl::ApplyRemovals() noexcept { - auto& to_remove = m_to_remove; + auto& clusterset = m_clusterset; + auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; // Group the set of to-be-removed entries by Cluster*. - std::sort(m_to_remove.begin(), m_to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); }); // Process per Cluster. - std::span to_remove_span{m_to_remove}; + std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; if (cluster != nullptr) { @@ -657,7 +665,7 @@ void TxGraphImpl::ApplyRemovals() noexcept to_remove_span = to_remove_span.subspan(1); } } - m_to_remove.clear(); + to_remove.clear(); Compact(); } @@ -686,8 +694,8 @@ void TxGraphImpl::Compact() noexcept { // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. // It is easier to delay the compaction until they have been applied. - if (!m_deps_to_add.empty()) return; - if (!m_to_remove.empty()) return; + if (!m_clusterset.m_deps_to_add.empty()) return; + if (!m_clusterset.m_to_remove.empty()) return; // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of @@ -733,7 +741,7 @@ void TxGraphImpl::SplitAll() noexcept // Before splitting all Cluster, first make sure all removals are applied. ApplyRemovals(); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = m_clusters[int(quality)]; + auto& queue = m_clusterset.m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } @@ -742,8 +750,9 @@ void TxGraphImpl::SplitAll() noexcept void TxGraphImpl::GroupClusters() noexcept { + auto& clusterset = m_clusterset; // If the groupings have been computed already, nothing is left to be done. - if (m_group_data.has_value()) return; + if (clusterset.m_group_data.has_value()) return; // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up @@ -759,7 +768,7 @@ void TxGraphImpl::GroupClusters() noexcept std::vector, Cluster*>> an_deps; // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Skip dependencies for which the parent or child transaction is removed. @@ -775,7 +784,7 @@ void TxGraphImpl::GroupClusters() noexcept an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); // Sort the dependencies by child Cluster. - std::sort(m_deps_to_add.begin(), m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; @@ -852,7 +861,7 @@ void TxGraphImpl::GroupClusters() noexcept // the partitions their Clusters are in. Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; - for (const auto& [par, chl] : m_deps_to_add) { + for (const auto& [par, chl] : clusterset.m_deps_to_add) { auto par_cluster = m_entries[par].m_locator.cluster; auto chl_cluster = m_entries[chl].m_locator.cluster; // Nothing to do if parent and child are in the same Cluster. @@ -874,8 +883,8 @@ void TxGraphImpl::GroupClusters() noexcept // Populate the an_clusters and an_deps data structures with the list of input Clusters, // and the input dependencies, annotated with the representative of the Cluster partition // it applies to. - an_deps.reserve(m_deps_to_add.size()); - auto deps_it = m_deps_to_add.begin(); + an_deps.reserve(clusterset.m_deps_to_add.size()); + auto deps_it = clusterset.m_deps_to_add.begin(); for (size_t i = 0; i < partition_data.size(); ++i) { auto& data = partition_data[i]; // Find the representative of the partition Cluster i is in, and store it with the @@ -884,7 +893,7 @@ void TxGraphImpl::GroupClusters() noexcept Assume(an_clusters[i].second == nullptr); an_clusters[i].second = rep; // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. - while (deps_it != m_deps_to_add.end()) { + while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; auto chl_cluster = m_entries[chl].m_locator.cluster; if (std::greater{}(chl_cluster, data.cluster)) break; @@ -907,39 +916,39 @@ void TxGraphImpl::GroupClusters() noexcept // Translate the resulting cluster groups to the m_group_data structure, and the dependencies // back to m_deps_to_add. - m_group_data = GroupData{}; - m_group_data->m_group_clusters.reserve(an_clusters.size()); - m_group_data->m_group_oversized = false; - m_deps_to_add.clear(); - m_deps_to_add.reserve(an_deps.size()); + clusterset.m_group_data = GroupData{}; + clusterset.m_group_data->m_group_clusters.reserve(an_clusters.size()); + clusterset.m_group_data->m_group_oversized = false; + clusterset.m_deps_to_add.clear(); + clusterset.m_deps_to_add.reserve(an_deps.size()); auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_clusters_it != an_clusters.end()) { // Process all clusters/dependencies belonging to the partition with representative rep. auto rep = an_clusters_it->second; // Create and initialize a new GroupData entry for the partition. - auto& new_entry = m_group_data->m_groups.emplace_back(); - new_entry.m_cluster_offset = m_group_data->m_group_clusters.size(); + auto& new_entry = clusterset.m_group_data->m_groups.emplace_back(); + new_entry.m_cluster_offset = clusterset.m_group_data->m_group_clusters.size(); new_entry.m_cluster_count = 0; - new_entry.m_deps_offset = m_deps_to_add.size(); + new_entry.m_deps_offset = clusterset.m_deps_to_add.size(); new_entry.m_deps_count = 0; uint32_t total_count{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { - m_group_data->m_group_clusters.push_back(an_clusters_it->first); + clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first); total_count += an_clusters_it->first->GetTxCount(); ++an_clusters_it; ++new_entry.m_cluster_count; } // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { - m_deps_to_add.push_back(an_deps_it->first); + clusterset.m_deps_to_add.push_back(an_deps_it->first); ++an_deps_it; ++new_entry.m_deps_count; } // Detect oversizedness. if (total_count > m_max_cluster_count) { - m_group_data->m_group_oversized = true; + clusterset.m_group_data->m_group_oversized = true; } } Assume(an_deps_it == an_deps.end()); @@ -976,23 +985,24 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept void TxGraphImpl::ApplyDependencies() noexcept { + auto& clusterset = m_clusterset; // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). GroupClusters(); - Assume(m_group_data.has_value()); + Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. - if (m_deps_to_add.empty()) return; + if (clusterset.m_deps_to_add.empty()) return; // Dependencies cannot be applied if it would result in oversized clusters. - if (m_group_data->m_group_oversized) return; + if (clusterset.m_group_data->m_group_oversized) return; // For each group of to-be-merged Clusters. - for (const auto& group_data : m_group_data->m_groups) { + for (const auto& group_data : clusterset.m_group_data->m_groups) { // Invoke Merge() to merge them into a single Cluster. - auto cluster_span = std::span{m_group_data->m_group_clusters} + auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). - auto deps_span = std::span{m_deps_to_add} + auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); const auto& loc = m_entries[deps_span[0].second].m_locator; @@ -1001,11 +1011,11 @@ void TxGraphImpl::ApplyDependencies() noexcept } // Wipe the list of to-be-added dependencies now that they are applied. - m_deps_to_add.clear(); + clusterset.m_deps_to_add.clear(); Compact(); // Also no further Cluster mergings are needed (note that we clear, but don't set to // std::nullopt, as that would imply the groupings are unknown). - m_group_data = GroupData{}; + clusterset.m_group_data = GroupData{}; } void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept @@ -1061,7 +1071,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept auto cluster_ptr = cluster.get(); InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_txcount; + ++m_clusterset.m_txcount; // Return the Ref. return ret; } @@ -1076,9 +1086,9 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_to_remove.push_back(GetRefIndex(arg)); + m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). - m_group_data.reset(); + m_clusterset.m_group_data.reset(); } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1096,9 +1106,9 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; if (chl_cluster == nullptr) return; // Remember that this dependency is to be applied. - m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). - m_group_data.reset(); + m_clusterset.m_group_data.reset(); } bool TxGraphImpl::Exists(const Ref& arg) noexcept @@ -1159,7 +1169,7 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1175,7 +1185,7 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1191,7 +1201,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be incorrect otherwise. ApplyDependencies(); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1203,7 +1213,7 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept { ApplyRemovals(); - return m_txcount; + return m_clusterset.m_txcount; } FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1228,7 +1238,7 @@ FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept // Apply all removals and dependencies, as the result might be inaccurate otherwise. ApplyDependencies(); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(m_deps_to_add.empty()); + Assume(m_clusterset.m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; if (cluster == nullptr) return {}; @@ -1244,8 +1254,8 @@ bool TxGraphImpl::IsOversized() noexcept // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(); - Assume(m_group_data.has_value()); - return m_group_data->m_group_oversized; + Assume(m_clusterset.m_group_data.has_value()); + return m_clusterset.m_group_data->m_group_oversized; } void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept @@ -1347,11 +1357,12 @@ void TxGraphImpl::SanityCheck() const } + auto& clusterset = m_clusterset; std::set actual_clusters; // For all quality levels... for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { QualityLevel quality{qual}; - const auto& quality_clusters = m_clusters[qual]; + const auto& quality_clusters = clusterset.m_clusters[qual]; // ... for all clusters in them ... for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { const auto& cluster = *quality_clusters[setindex]; @@ -1369,13 +1380,13 @@ void TxGraphImpl::SanityCheck() const } // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. - for (GraphIndex idx : m_to_remove) { + for (GraphIndex idx : m_clusterset.m_to_remove) { assert(idx < m_entries.size()); assert(m_entries[idx].m_locator.IsPresent()); } // Verify that all to-be-added dependencies have valid identifiers. - for (auto [par_idx, chl_idx] : m_deps_to_add) { + for (auto [par_idx, chl_idx] : m_clusterset.m_deps_to_add) { assert(par_idx != chl_idx); assert(par_idx < m_entries.size()); assert(chl_idx < m_entries.size()); @@ -1390,7 +1401,9 @@ void TxGraphImpl::SanityCheck() const // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be // empty (to prevent memory leaks due to an ever-growing m_entries vector). - if (m_to_remove.empty() && m_deps_to_add.empty()) assert(actual_unlinked.empty()); + if (clusterset.m_to_remove.empty() && clusterset.m_deps_to_add.empty()) { + assert(actual_unlinked.empty()); + } } } // namespace From d86294aed17b0f799b9c60adc49584447dc20a7c Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sun, 24 Nov 2024 10:00:59 -0500 Subject: [PATCH 15/30] txgraph: (refactor) abstract out ClearLocator Move a number of related modifications to TxGraphImpl into a separate function for removal of transactions. This is preparation for a later commit where this will be useful in more than one place. --- src/txgraph.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 044eba33c3e..04f9f7fed7d 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -275,6 +275,8 @@ public: ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist. It must currently exist. */ + void ClearLocator(GraphIndex index) noexcept; // Functions for handling Refs. @@ -336,6 +338,16 @@ public: void SanityCheck() const final; }; +void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +{ + auto& entry = m_entries[idx]; + Assume(entry.m_locator.IsPresent()); + // Change the locator from Present to Missing. + entry.m_locator.SetMissing(); + // Update the transaction count. + --m_clusterset.m_txcount; +} + void Cluster::Updated(TxGraphImpl& graph) noexcept { // Update all the Locators for this Cluster's Entrys. @@ -386,9 +398,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); // - Mark it as removed in the Entry's locator. - locator.SetMissing(); + graph.ClearLocator(idx); to_remove = to_remove.subspan(1); - --graph.m_clusterset.m_txcount; } while(!to_remove.empty()); auto quality = m_quality; From 247b8e42fb40362151e41ed5b8f106fadd19352f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 4 Dec 2024 09:40:53 -0500 Subject: [PATCH 16/30] txgraph: (feature) add staging support In order to make it easy to evaluate proposed changes to a TxGraph, introduce a "staging" mode, where mutators (AddTransaction, AddDependency, RemoveTransaction) do not modify the actual graph, but just a staging version of it. That staging graph can then be commited (replacing the main one with it), or aborted (discarding the staging). --- src/test/fuzz/txgraph.cpp | 333 +++++++++------- src/txgraph.cpp | 787 +++++++++++++++++++++++++++----------- src/txgraph.h | 78 ++-- 3 files changed, 825 insertions(+), 373 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index b6163ed5544..cc20f9e3c4f 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,8 @@ using namespace cluster_linearize; namespace { /** Data type representing a naive simulated TxGraph, keeping all transactions (even from - * disconnected components) in a single DepGraph. */ + * disconnected components) in a single DepGraph. Unlike the real TxGraph, this only models + * a single graph, and multiple instances are used to simulate main/staging. */ struct SimTxGraph { /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's @@ -38,20 +40,28 @@ struct SimTxGraph /** The dependency graph (for all transactions in the simulation, regardless of * connectivity/clustering). */ DepGraph graph; - /** For each position in graph, which TxGraph::Ref it corresponds with (if any). */ - std::array, MAX_TRANSACTIONS> simmap; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). Use shared_ptr + * so that a SimTxGraph can be copied to create a staging one, while sharing Refs with + * the main graph. */ + std::array, MAX_TRANSACTIONS> simmap; /** For each TxGraph::Ref in graph, the position it corresponds with. */ std::map simrevmap; /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ - std::vector> removed; + std::vector> removed; /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ std::optional oversized; /** The configured maximum number of transactions per cluster. */ DepGraphIndex max_cluster_count; - /** Construct a new SimData with the specified maximum cluster count. */ + /** Construct a new SimTxGraph with the specified maximum cluster count. */ explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {} + // Permit copying and moving. + SimTxGraph(const SimTxGraph&) noexcept = default; + SimTxGraph& operator=(const SimTxGraph&) noexcept = default; + SimTxGraph(SimTxGraph&&) noexcept = default; + SimTxGraph& operator=(SimTxGraph&&) noexcept = default; + /** Check whether this graph is oversized (contains a connected component whose number of * transactions exceeds max_cluster_count. */ bool IsOversized() @@ -95,7 +105,7 @@ struct SimTxGraph assert(graph.TxCount() < MAX_TRANSACTIONS); auto simpos = graph.AddTransaction(feerate); assert(graph.Positions()[simpos]); - simmap[simpos] = std::make_unique(); + simmap[simpos] = std::make_shared(); auto ptr = simmap[simpos].get(); simrevmap[ptr] = simpos; return ptr; @@ -202,32 +212,43 @@ FUZZ_TARGET(txgraph) // Decide the maximum number of transactions per cluster we will use in this simulation. auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); - // Construct a real and a simulated graph. + // Construct a real graph, and a vector of simulated graphs (main, and possibly staging). auto real = MakeTxGraph(max_count); - SimTxGraph sim(max_count); + std::vector sims; + sims.reserve(2); + sims.emplace_back(max_count); - /** Function to pick any Ref (from sim.simmap or sim.removed, or the empty Ref). */ + /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the + * empty Ref). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref* { - auto tx_count = sim.GetTransactionCount(); + size_t tx_count[2] = {sims[0].GetTransactionCount(), 0}; /** The number of possible choices. */ - size_t choices = tx_count + sim.removed.size() + 1; + size_t choices = tx_count[0] + sims[0].removed.size() + 1; + if (sims.size() == 2) { + tx_count[1] = sims[1].GetTransactionCount(); + choices += tx_count[1] + sims[1].removed.size(); + } /** Pick one of them. */ auto choice = provider.ConsumeIntegralInRange(0, choices - 1); - if (choice < tx_count) { - // Return from real. - for (auto i : sim.graph.Positions()) { - if (choice == 0) return sim.GetRef(i); - --choice; + // Consider both main and (if it exists) staging. + for (size_t level = 0; level < sims.size(); ++level) { + auto& sim = sims[level]; + if (choice < tx_count[level]) { + // Return from graph. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count[level]; + } + if (choice < sim.removed.size()) { + // Return from removed. + return sim.removed[choice].get(); + } else { + choice -= sim.removed.size(); } - assert(false); - } else { - choice -= tx_count; - } - if (choice < sim.removed.size()) { - // Return from removed. - return sim.removed[choice].get(); - } else { - choice -= sim.removed.size(); } // Return empty. assert(choice == 0); @@ -237,15 +258,24 @@ FUZZ_TARGET(txgraph) LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); - // Treat it lowest bit as a flag (which selects a variant of some of the operations), and - // leave the rest of the bits in command. + // Treat the lowest bit of a command as a flag (which selects a variant of some of the + // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave + // the rest of the bits in command. bool alt = command & 1; - command >>= 1; + bool use_main = command & 2; + command >>= 2; + + // Provide convenient aliases for the top simulated graph (main, or staging if it exists), + // one for the simulated graph selected based on use_main (for operations that can operate + // on both graphs), and one that always refers to the main graph. + auto& top_sim = sims.back(); + auto& sel_sim = use_main ? sims[0] : top_sim; + auto& main_sim = sims[0]; // Keep decrementing command for each applicable operation, until one is hit. Multiple // iterations may be necessary. while (true) { - if (sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { // AddTransaction. int64_t fee; int32_t size; @@ -262,51 +292,54 @@ FUZZ_TARGET(txgraph) FeePerWeight feerate{fee, size}; // Create a real TxGraph::Ref. auto ref = real->AddTransaction(feerate); - // Create a unique_ptr place in the simulation to put the Ref in. - auto ref_loc = sim.AddTransaction(feerate); + // Create a shared_ptr place in the simulation to put the Ref in. + auto ref_loc = top_sim.AddTransaction(feerate); // Move it in place. *ref_loc = std::move(ref); break; - } else if (sim.GetTransactionCount() + sim.removed.size() > 1 && command-- == 0) { + } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { // AddDependency. auto par = pick_fn(); auto chl = pick_fn(); - auto pos_par = sim.Find(par); - auto pos_chl = sim.Find(chl); + auto pos_par = top_sim.Find(par); + auto pos_chl = top_sim.Find(chl); if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { // Determine if adding this would introduce a cycle (not allowed by TxGraph), // and if so, skip. - if (sim.graph.Ancestors(pos_par)[pos_chl]) break; + if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break; } - sim.AddDependency(par, chl); + top_sim.AddDependency(par, chl); real->AddDependency(*par, *chl); break; - } else if (sim.removed.size() < 100 && command-- == 0) { + } else if (top_sim.removed.size() < 100 && command-- == 0) { // RemoveTransaction. Either all its ancestors or all its descendants are also // removed (if any), to make sure TxGraph's reordering of removals and dependencies // has no effect. std::vector to_remove; to_remove.push_back(pick_fn()); - sim.IncludeAncDesc(to_remove, alt); + top_sim.IncludeAncDesc(to_remove, alt); // The order in which these ancestors/descendants are removed should not matter; // randomly shuffle them. std::shuffle(to_remove.begin(), to_remove.end(), rng); for (TxGraph::Ref* ptr : to_remove) { real->RemoveTransaction(*ptr); - sim.RemoveTransaction(ptr); + top_sim.RemoveTransaction(ptr); } break; - } else if (sim.removed.size() > 0 && command-- == 0) { + } else if (sel_sim.removed.size() > 0 && command-- == 0) { // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it // refers to, so this simulation permits doing so separately from other actions on // TxGraph. - // Pick a Ref of sim.removed to destroy. - auto removed_pos = provider.ConsumeIntegralInRange(0, sim.removed.size() - 1); - if (removed_pos != sim.removed.size() - 1) { - std::swap(sim.removed[removed_pos], sim.removed.back()); + // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur + // in the other graph, and thus not actually trigger ~Ref yet (which is exactly + // what we want, as destroying Refs is only allowed when it does not refer to an + // existing transaction in either graph). + auto removed_pos = provider.ConsumeIntegralInRange(0, sel_sim.removed.size() - 1); + if (removed_pos != sel_sim.removed.size() - 1) { + std::swap(sel_sim.removed[removed_pos], sel_sim.removed.back()); } - sim.removed.pop_back(); + sel_sim.removed.pop_back(); break; } else if (command-- == 0) { // SetTransactionFee. @@ -318,77 +351,83 @@ FUZZ_TARGET(txgraph) } auto ref = pick_fn(); real->SetTransactionFee(*ref, fee); - sim.SetTransactionFee(ref, fee); + for (auto& sim : sims) { + sim.SetTransactionFee(ref, fee); + } break; } else if (command-- == 0) { // GetTransactionCount. - assert(real->GetTransactionCount() == sim.GetTransactionCount()); + assert(real->GetTransactionCount(use_main) == sel_sim.GetTransactionCount()); break; } else if (command-- == 0) { // Exists. auto ref = pick_fn(); - bool exists = real->Exists(*ref); - bool should_exist = sim.Find(ref) != SimTxGraph::MISSING; + bool exists = real->Exists(*ref, use_main); + bool should_exist = sel_sim.Find(ref) != SimTxGraph::MISSING; assert(exists == should_exist); break; } else if (command-- == 0) { // IsOversized. - assert(sim.IsOversized() == real->IsOversized()); + assert(sel_sim.IsOversized() == real->IsOversized(use_main)); break; } else if (command-- == 0) { // GetIndividualFeerate. auto ref = pick_fn(); auto feerate = real->GetIndividualFeerate(*ref); - auto simpos = sim.Find(ref); - if (simpos == SimTxGraph::MISSING) { - assert(feerate.IsEmpty()); - } else { - assert(feerate == sim.graph.FeeRate(simpos)); + bool found{false}; + for (auto& sim : sims) { + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + found = true; + assert(feerate == sim.graph.FeeRate(simpos)); + } } + if (!found) assert(feerate.IsEmpty()); break; - } else if (!sim.IsOversized() && command-- == 0) { - // GetChunkFeerate. + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetMainChunkFeerate. auto ref = pick_fn(); - auto feerate = real->GetChunkFeerate(*ref); - auto simpos = sim.Find(ref); + auto feerate = real->GetMainChunkFeerate(*ref); + auto simpos = main_sim.Find(ref); if (simpos == SimTxGraph::MISSING) { assert(feerate.IsEmpty()); } else { // Just do some quick checks that the reported value is in range. A full // recomputation of expected chunk feerates is done at the end. - assert(feerate.size >= sim.graph.FeeRate(simpos).size); + assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); } break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetAncestors/GetDescendants. auto ref = pick_fn(); - auto result = alt ? real->GetDescendants(*ref) : real->GetAncestors(*ref); + auto result = alt ? real->GetDescendants(*ref, use_main) + : real->GetAncestors(*ref, use_main); assert(result.size() <= max_count); - auto result_set = sim.MakeSet(result); + auto result_set = sel_sim.MakeSet(result); assert(result.size() == result_set.Count()); - auto expect_set = sim.GetAncDesc(ref, alt); + auto expect_set = sel_sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; - } else if (!sim.IsOversized() && command-- == 0) { + } else if (!sel_sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); - auto result = real->GetCluster(*ref); + auto result = real->GetCluster(*ref, use_main); // Check cluster count limit. assert(result.size() <= max_count); // Require the result to be topologically valid and not contain duplicates. - auto left = sim.graph.Positions(); + auto left = sel_sim.graph.Positions(); for (auto refptr : result) { - auto simpos = sim.Find(refptr); + auto simpos = sel_sim.Find(refptr); assert(simpos != SimTxGraph::MISSING); assert(left[simpos]); left.Reset(simpos); - assert(!sim.graph.Ancestors(simpos).Overlaps(left)); + assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left)); } // Require the set to be connected. - auto result_set = sim.MakeSet(result); - assert(sim.graph.IsConnected(result_set)); + auto result_set = sel_sim.MakeSet(result); + assert(sel_sim.graph.IsConnected(result_set)); // If ref exists, the result must contain it. If not, it must be empty. - auto simpos = sim.Find(ref); + auto simpos = sel_sim.Find(ref); if (simpos != SimTxGraph::MISSING) { assert(result_set[simpos]); } else { @@ -396,10 +435,29 @@ FUZZ_TARGET(txgraph) } // Require the set not to have ancestors or descendants outside of it. for (auto i : result_set) { - assert(sim.graph.Ancestors(i).IsSubsetOf(result_set)); - assert(sim.graph.Descendants(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Descendants(i).IsSubsetOf(result_set)); } break; + } else if (command-- == 0) { + // HaveStaging. + assert((sims.size() == 2) == real->HaveStaging()); + break; + } else if (sims.size() < 2 && command-- == 0) { + // StartStaging. + sims.emplace_back(sims.back()); + real->StartStaging(); + break; + } else if (sims.size() > 1 && command-- == 0) { + // CommitStaging. + real->CommitStaging(); + sims.erase(sims.begin()); + break; + } else if (sims.size() > 1 && command-- == 0) { + // AbortStaging. + real->AbortStaging(); + sims.pop_back(); + break; } } } @@ -407,63 +465,70 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + assert(real->HaveStaging() == (sims.size() > 1)); - // Compare simple properties of the graph with the simulation. - assert(real->IsOversized() == sim.IsOversized()); - assert(real->GetTransactionCount() == sim.GetTransactionCount()); - - // If the graph (and the simulation) are not oversized, perform a full comparison. - if (!sim.IsOversized()) { - auto todo = sim.graph.Positions(); - // Iterate over all connected components of the resulting (simulated) graph, each of which - // should correspond to a cluster in the real one. - while (todo.Any()) { - auto component = sim.graph.FindConnectedComponent(todo); - todo -= component; - // Iterate over the transactions in that component. - for (auto i : component) { - // Check its individual feerate against simulation. - assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); - // Check its ancestors against simulation. - auto expect_anc = sim.graph.Ancestors(i); - auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i))); - assert(anc.Count() <= max_count); - assert(anc == expect_anc); - // Check its descendants against simulation. - auto expect_desc = sim.graph.Descendants(i); - auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i))); - assert(desc.Count() <= max_count); - assert(desc == expect_desc); - // Check the cluster the transaction is part of. - auto cluster = real->GetCluster(*sim.GetRef(i)); - assert(cluster.size() <= max_count); - assert(sim.MakeSet(cluster) == component); - // Check that the cluster is reported in a valid topological order (its - // linearization). - std::vector simlin; - SimTxGraph::SetType done; - for (TxGraph::Ref* ptr : cluster) { - auto simpos = sim.Find(ptr); - assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); - done.Set(simpos); - assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); - simlin.push_back(simpos); - } - // Construct a chunking object for the simulated graph, using the reported cluster - // linearization as ordering, and compare it against the reported chunk feerates. - cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); - DepGraphIndex idx{0}; - for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { - auto chunk = simlinchunk.GetChunk(chunknum); - // Require that the chunks of cluster linearizations are connected (this must - // be the case as all linearizations inside are PostLinearized). - assert(sim.graph.IsConnected(chunk.transactions)); - // Check the chunk feerates of all transactions in the cluster. - while (chunk.transactions.Any()) { - assert(chunk.transactions[simlin[idx]]); - chunk.transactions.Reset(simlin[idx]); - assert(chunk.feerate == real->GetChunkFeerate(*cluster[idx])); - ++idx; + // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph + // inspector functions that support both. + for (int main_only = 0; main_only < 2; ++main_only) { + auto& sim = main_only ? sims[0] : sims.back(); + // Compare simple properties of the graph with the simulation. + assert(real->IsOversized(main_only) == sim.IsOversized()); + assert(real->GetTransactionCount(main_only) == sim.GetTransactionCount()); + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i), main_only)); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i), main_only)); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i), main_only); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + } + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + if (sims.size() == 1 || main_only) { + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetMainChunkFeerate(*cluster[idx])); + ++idx; + } + } } } } @@ -475,8 +540,10 @@ FUZZ_TARGET(txgraph) // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be // addressed in a follow-up commit). - for (auto i : sim.graph.Positions()) { - auto ref = sim.GetRef(i); - real->RemoveTransaction(*ref); + for (auto& sim : sims) { + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + real->RemoveTransaction(*ref); + } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 04f9f7fed7d..7d0fd4eb6fe 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -20,6 +20,9 @@ namespace { using namespace cluster_linearize; +/** The maximum number of levels a TxGraph can have (0 = main, 1 = staging). */ +static constexpr int MAX_LEVELS{2}; + // Forward declare the TxGraph implementation class. class TxGraphImpl; @@ -65,6 +68,8 @@ class Cluster QualityLevel m_quality{QualityLevel::NONE}; /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + /** Which level this Cluster is at in the graph (-1=not inserted, 0=main, 1=staging). */ + int m_level{-1}; public: /** Construct an empty Cluster. */ @@ -72,7 +77,7 @@ public: /** Construct a singleton Cluster. */ explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; - // Cannot move or copy (would invalidate Cluster* in Locator and TxGraphImpl::ClusterSet). */ + // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */ Cluster(const Cluster&) = delete; Cluster& operator=(const Cluster&) = delete; Cluster(Cluster&&) = delete; @@ -105,6 +110,17 @@ public: void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ void Updated(TxGraphImpl& graph) noexcept; + /** Create a copy of this Cluster, returning a pointer to it (used by PullIn). */ + Cluster* CopyTo(TxGraphImpl& graph, int to_level) const noexcept; + /** Get the list of Clusters that conflict with this one (at the level below this Cluster). */ + void GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept; + /** Mark all the Entry objects belonging to this Cluster as missing. The Cluster must be + * deleted immediately after. */ + void MakeTransactionsMissing(TxGraphImpl& graph) noexcept; + /** Remove all transactions from a Cluster. */ + void Clear(TxGraphImpl& graph) noexcept; + /** Change a Cluster's level from level to level-1. */ + void LevelDown(TxGraphImpl& graph) noexcept; // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. @@ -136,16 +152,20 @@ public: // Debugging functions. - void SanityCheck(const TxGraphImpl& graph) const; + void SanityCheck(const TxGraphImpl& graph, int level) const; }; -/** The transaction graph. +/** The transaction graph, including staged changes. * * The overall design of the data structure consists of 3 interlinked representations: * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). - * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl::ClusterSet). * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) * + * The Clusters are kept in one or two ClusterSet objects, one for the "main" graph, and one for + * the proposed changes ("staging"). If a transaction occurs in both, they share the same Entry, + * but there will be a separate Cluster per graph. + * * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects * refer back to the Clusters and Refs the corresponding transaction is contained in. * @@ -204,28 +224,63 @@ private: std::vector> m_deps_to_add; /** Information about the merges to be performed, if known. */ std::optional m_group_data = GroupData{}; + /** Which entries were removed in this ClusterSet (so they can be wiped on abort). */ + std::vector m_removed; /** Total number of transactions in this graph (sum of all transaction counts in all - * Clusters). */ + * Clusters, and for staging also those inherited from the main ClusterSet). */ GraphIndex m_txcount{0}; }; - /** The ClusterSet for this TxGraphImpl. */ - ClusterSet m_clusterset; + /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ + std::vector m_clustersets; - /** A Locator that describes whether, where, and in which Cluster an Entry appears. */ + /** A Locator that describes whether, where, and in which Cluster an Entry appears. + * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. + * + * Each level of a Locator is in one of three states: + * + * - (P)resent: actually occurs in a Cluster at that level. + * + * - (M)issing: + * - In the main graph: the transaction does not exist in main. + * - In the staging graph: the transaction's existence is the same as in main. If it doesn't + * exist in main, (M) in staging means it does not exist there + * either. If it does exist in main, (M) in staging means the + * cluster it is in has not been modified in staging, and thus the + * transaction implicitly exists in staging too (without explicit + * Cluster object; see PullIn() to create it in staging too). + * + * - (R)emoved: only possible in staging; it means the transaction exists in main, but is + * removed in staging. + * + * The following combinations are possible: + * - (M,M): the transaction doesn't exist in either graph. + * - (P,M): the transaction exists in both, but only exists explicitly in a Cluster object in + * main. Its existence in staging is inherited from main. + * - (P,P): the transaction exists in both, and is materialized in both. Thus, the clusters + * and/or their linearizations may be different in main and staging. + * - (M,P): the transaction is added in staging, and does not exist in main. + * - (P,R): the transaction exists in main, but is removed in staging. + * + * When staging does not exist, only (M,M) and (P,M) are possible. + */ struct Locator { /** Which Cluster the Entry appears in (nullptr = missing). */ Cluster* cluster{nullptr}; - /** Where in the Cluster it appears (only if cluster != nullptr). */ + /** Where in the Cluster it appears (if cluster == nullptr: 0 = missing, -1 = removed). */ DepGraphIndex index{0}; - /** Mark this Locator as missing. */ + /** Mark this Locator as missing (= same as lower level, or non-existing if level 0). */ void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as removed (not allowed in level 0). */ + void SetRemoved() noexcept { cluster = nullptr; index = DepGraphIndex(-1); } /** Mark this Locator as present, in the specified Cluster. */ void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } /** Check if this Locator is missing. */ bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } + /** Check if this Locator is removed. */ + bool IsRemoved() const noexcept { return cluster == nullptr && index == DepGraphIndex(-1); } /** Check if this Locator is present (in some Cluster). */ bool IsPresent() const noexcept { return cluster != nullptr; } }; @@ -236,13 +291,13 @@ private: { /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ Ref* m_ref{nullptr}; - /** Which Cluster and position therein this Entry appears in. */ - Locator m_locator; - /** The chunk feerate of this transaction (if not missing). */ - FeePerWeight m_chunk_feerate; + /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ + Locator m_locator[MAX_LEVELS]; + /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ + FeePerWeight m_main_chunk_feerate; }; - /** The set of all transactions. GraphIndex values index into this. */ + /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ std::vector m_entries; /** Set of Entries which have no linked Ref anymore. */ @@ -255,6 +310,8 @@ public: { Assume(max_cluster_count >= 1); Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + m_clustersets.reserve(MAX_LEVELS); + m_clustersets.emplace_back(); } // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). @@ -267,16 +324,21 @@ public: /** Swap the Entrys referred to by a and b. */ void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; - /** Extract a Cluster. */ - std::unique_ptr ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not + * removed), return the Cluster it is in. Otherwise, return nullptr. */ + Cluster* FindCluster(GraphIndex idx, int level) const noexcept; + /** Extract a Cluster from its ClusterSet. */ + std::unique_ptr ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept; /** Delete a Cluster. */ void DeleteCluster(Cluster& cluster) noexcept; - /** Insert a Cluster. */ - ClusterSetIndex InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Insert a Cluster into its ClusterSet. */ + ClusterSetIndex InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept; /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ - void SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; - /** Make a transaction not exist. It must currently exist. */ - void ClearLocator(GraphIndex index) noexcept; + void SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist at a specified level. It must currently exist there. */ + void ClearLocator(int level, GraphIndex index) noexcept; + /** Find which Clusters conflict with the top level. */ + std::vector GetConflicts() const noexcept; // Functions for handling Refs. @@ -301,21 +363,25 @@ public: // Functions related to various normalization/application steps. /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex * values for remaining Entrys, so this only does something when no to-be-applied operations - * referring to GraphIndexes remain). */ + * or staged removals referring to GraphIndexes remain). */ void Compact() noexcept; + /** If cluster is not in the top level, copy it there, and return a pointer to it. This has no + * effect if only a main graph exists, but if staging exists this modifies the locators of its + * transactions from inherited (P,M) to explicit (P,P). */ + Cluster* PullIn(Cluster* cluster) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a - * NEEDS_SPLIT* QualityLevel). */ - void ApplyRemovals() noexcept; - /** Split an individual cluster. */ + * NEEDS_SPLIT* QualityLevel) in the specified level. */ + void ApplyRemovals(int level) noexcept; + /** Split an individual cluster (which must be in the top-level ClusterSet). */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting. */ - void SplitAll() noexcept; - /** Populate m_group_data based on m_deps_to_add. */ - void GroupClusters() noexcept; + /** Split all clusters that need splitting in the specified level. */ + void SplitAll(int level) noexcept; + /** Populate m_group_data based on m_deps_to_add in the specified level. */ + void GroupClusters(int level) noexcept; /** Merge the specified clusters. */ void Merge(std::span to_merge) noexcept; - /** Apply all m_deps_to_add to the relevant Clusters. */ - void ApplyDependencies() noexcept; + /** Apply all m_deps_to_add to the relevant Clusters in the specified level. */ + void ApplyDependencies(int level) noexcept; /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ void MakeAcceptable(Cluster& cluster) noexcept; @@ -326,26 +392,36 @@ public: void AddDependency(const Ref& parent, const Ref& child) noexcept final; void SetTransactionFee(const Ref&, int64_t fee) noexcept final; - bool Exists(const Ref& arg) noexcept final; - FeePerWeight GetChunkFeerate(const Ref& arg) noexcept final; + void StartStaging() noexcept final; + void CommitStaging() noexcept final; + void AbortStaging() noexcept final; + bool HaveStaging() const noexcept final { return m_clustersets.size() > 1; } + + bool Exists(const Ref& arg, bool main_only = false) noexcept final; + FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept final; FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; - std::vector GetCluster(const Ref& arg) noexcept final; - std::vector GetAncestors(const Ref& arg) noexcept final; - std::vector GetDescendants(const Ref& arg) noexcept final; - GraphIndex GetTransactionCount() noexcept final; - bool IsOversized() noexcept final; + std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + GraphIndex GetTransactionCount(bool main_only = false) noexcept final; + bool IsOversized(bool main_only = false) noexcept final; void SanityCheck() const final; }; -void TxGraphImpl::ClearLocator(GraphIndex idx) noexcept +void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; - Assume(entry.m_locator.IsPresent()); - // Change the locator from Present to Missing. - entry.m_locator.SetMissing(); + Assume(entry.m_locator[level].IsPresent()); + // Change the locator from Present to Missing or Removed. + if (level == 0 || !entry.m_locator[level - 1].IsPresent()) { + entry.m_locator[level].SetMissing(); + } else { + entry.m_locator[level].SetRemoved(); + m_clustersets[level].m_removed.push_back(idx); + } // Update the transaction count. - --m_clusterset.m_txcount; + --m_clustersets[level].m_txcount; } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -353,13 +429,13 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (DepGraphIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; - entry.m_locator.SetPresent(this, idx); + entry.m_locator[m_level].SetPresent(this, idx); } - // If the Cluster's quality is ACCEPTABLE or OPTIMAL, compute its chunking and store its - // information in the Entry's m_chunk_feerate. These fields are only accessed after making - // the entire graph ACCEPTABLE, so it is pointless to compute these if we haven't reached that - // quality level yet. - if (IsAcceptable()) { + // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. + // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless + // to compute these if we haven't reached that quality level yet. + if (m_level == 0 && IsAcceptable()) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; // Iterate over the chunks. @@ -371,7 +447,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept DepGraphIndex idx = m_linearization[lin_idx++]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; - entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); Assume(chunk.transactions[idx]); chunk.transactions.Reset(idx); } while(chunk.transactions.Any()); @@ -379,6 +455,57 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept } } +void Cluster::GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept +{ + for (auto i : m_linearization) { + auto& entry = graph.m_entries[m_mapping[i]]; + // For every transaction Entry in this Cluster, if it also exists in a lower-level Cluster, + // then that Cluster conflicts. + if (entry.m_locator[m_level - 1].IsPresent()) { + out.push_back(entry.m_locator[m_level - 1].cluster); + } + } +} + +std::vector TxGraphImpl::GetConflicts() const noexcept +{ + int level = m_clustersets.size() - 1; + std::vector ret; + // All Clusters at level-1 containing transactions in m_removed (so (P,R) ones) are conflicts. + for (auto i : m_clustersets[level].m_removed) { + auto& entry = m_entries[i]; + Assume(entry.m_locator[level - 1].IsPresent()); + ret.push_back(entry.m_locator[level - 1].cluster); + } + // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones). + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& clusters = m_clustersets[level].m_clusters[quality]; + for (const auto& cluster : clusters) { + cluster->GetConflicts(*this, ret); + } + } + // Deduplicate the result (the same Cluster may appear multiple times). + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + return ret; +} + +Cluster* Cluster::CopyTo(TxGraphImpl& graph, int to_level) const noexcept +{ + // Construct an empty Cluster. + auto ret = std::make_unique(); + auto ptr = ret.get(); + // Copy depgraph, mapping, and linearization/ + ptr->m_depgraph = m_depgraph; + ptr->m_mapping = m_mapping; + ptr->m_linearization = m_linearization; + // Insert the new Cluster into the graph. + graph.InsertCluster(to_level, std::move(ret), m_quality); + // Update its Locators. + ptr->Updated(graph); + return ptr; +} + void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept { // Iterate over the prefix of to_remove that applies to this cluster. @@ -388,7 +515,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove GraphIndex idx = to_remove.front(); Assume(idx < graph.m_entries.size()); auto& entry = graph.m_entries[idx]; - auto& locator = entry.m_locator; + auto& locator = entry.m_locator[m_level]; // Stop once we hit an entry that applies to another Cluster. if (locator.cluster != this) break; // - Remember it in a set of to-remove DepGraphIndexes. @@ -397,8 +524,8 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // are just never accessed, but set it to -1 here to increase the ability to detect a bug // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); - // - Mark it as removed in the Entry's locator. - graph.ClearLocator(idx); + // - Mark it as missing/removed in the Entry's locator. + graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); } while(!to_remove.empty()); @@ -430,7 +557,32 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove [&](auto pos) { return todo[pos]; }), m_linearization.end()); quality = QualityLevel::NEEDS_SPLIT; } - graph.SetClusterQuality(m_quality, m_setindex, quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, quality); + Updated(graph); +} + +void Cluster::Clear(TxGraphImpl& graph) noexcept +{ + for (auto i : m_linearization) { + graph.ClearLocator(m_level, m_mapping[i]); + } + m_depgraph = {}; + m_linearization.clear(); + m_mapping.clear(); +} + +void Cluster::LevelDown(TxGraphImpl& graph) noexcept +{ + int level = m_level; + Assume(level > 0); + for (auto i : m_linearization) { + GraphIndex idx = m_mapping[i]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[level].SetMissing(); + } + auto quality = m_quality; + auto cluster = graph.ExtractCluster(level, quality, m_setindex); + graph.InsertCluster(level - 1, std::move(cluster), quality); Updated(graph); } @@ -463,7 +615,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept if (first && component == todo) { // The existing Cluster is an entire component. Leave it be, but update its quality. Assume(todo == m_depgraph.Positions()); - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its // chunking. Updated(graph); @@ -478,7 +630,7 @@ bool Cluster::Split(TxGraphImpl& graph) noexcept for (auto i : component) { remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; } - graph.InsertCluster(std::move(new_cluster), new_quality); + graph.InsertCluster(m_level, std::move(new_cluster), new_quality); todo -= component; } // Redistribute the transactions. @@ -540,7 +692,7 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // Update the transaction's Locator. There is no need to call Updated() to update chunk // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). - graph.m_entries[idx].m_locator.SetPresent(this, new_pos); + graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. other.m_depgraph = DepGraph{}; @@ -563,14 +715,14 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::spansecond].m_locator; + auto& first_child = graph.m_entries[it->second].m_locator[m_level]; const auto child_idx = first_child.index; // Iterate over all to-be-added dependencies within that same child, gather the relevant // parents. SetType parents; while (it != to_apply.end()) { - auto& child = graph.m_entries[it->second].m_locator; - auto& parent = graph.m_entries[it->first].m_locator; + auto& child = graph.m_entries[it->second].m_locator[m_level]; + auto& parent = graph.m_entries[it->first].m_locator[m_level]; Assume(child.cluster == this && parent.cluster == this); if (child.index != child_idx) break; parents.Set(parent.index); @@ -591,23 +743,27 @@ void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span TxGraphImpl::ExtractCluster(QualityLevel quality, ClusterSetIndex setindex) noexcept +std::unique_ptr TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept { Assume(quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; Assume(setindex < quality_clusters.size()); // Extract the Cluster-owning unique_ptr. std::unique_ptr ret = std::move(quality_clusters[setindex]); ret->m_quality = QualityLevel::NONE; ret->m_setindex = ClusterSetIndex(-1); + ret->m_level = -1; // Clean up space in quality_cluster. auto max_setindex = quality_clusters.size() - 1; if (setindex != max_setindex) { // If the cluster was not the last element of quality_clusters, move that to take its place. quality_clusters.back()->m_setindex = setindex; + quality_clusters.back()->m_level = level; quality_clusters[setindex] = std::move(quality_clusters.back()); } // The last element of quality_clusters is now unused; drop it. @@ -616,56 +772,105 @@ std::unique_ptr TxGraphImpl::ExtractCluster(QualityLevel quality, Clust return ret; } -ClusterSetIndex TxGraphImpl::InsertCluster(std::unique_ptr&& cluster, QualityLevel quality) noexcept +ClusterSetIndex TxGraphImpl::InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept { // Cannot insert with quality level NONE (as that would mean not inserted). Assume(quality != QualityLevel::NONE); // The passed-in Cluster must not currently be in the TxGraphImpl. Assume(cluster->m_quality == QualityLevel::NONE); + // The specified level must exist. + Assume(level >= 0 && size_t(level) < m_clustersets.size()); // Append it at the end of the relevant TxGraphImpl::m_cluster. - auto& quality_clusters = m_clusterset.m_clusters[int(quality)]; + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; ClusterSetIndex ret = quality_clusters.size(); cluster->m_quality = quality; cluster->m_setindex = ret; + cluster->m_level = level; quality_clusters.push_back(std::move(cluster)); return ret; } -void TxGraphImpl::SetClusterQuality(QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +void TxGraphImpl::SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept { Assume(new_quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); // Don't do anything if the quality did not change. if (old_quality == new_quality) return; // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(old_quality, old_index); + auto cluster_ptr = ExtractCluster(level, old_quality, old_index); // And re-insert it where it belongs. - InsertCluster(std::move(cluster_ptr), new_quality); + InsertCluster(level, std::move(cluster_ptr), new_quality); } void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept { // Extract the cluster from where it currently resides. - auto cluster_ptr = ExtractCluster(cluster.m_quality, cluster.m_setindex); + auto cluster_ptr = ExtractCluster(cluster.m_level, cluster.m_quality, cluster.m_setindex); // And throw it away. cluster_ptr.reset(); } -void TxGraphImpl::ApplyRemovals() noexcept +Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept { - auto& clusterset = m_clusterset; + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + auto& entry = m_entries[idx]; + // Search the entry's locators from top to bottom. + for (int l = level; l >= 0; --l) { + // If the locator is missing, dig deeper; it may exist at a lower level and therefore be + // implicitly existing at this level too. + if (entry.m_locator[l].IsMissing()) continue; + // If the locator has the entry marked as explicitly removed, stop. + if (entry.m_locator[l].IsRemoved()) break; + // Otherwise, we have found the topmost ClusterSet that contains this entry. + return entry.m_locator[l].cluster; + } + // If no non-empty locator was found, or an explicitly removed was hit, return nothing. + return nullptr; +} + +Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept +{ + int to_level = m_clustersets.size() - 1; + if (to_level == 0) return cluster; + int level = cluster->m_level; + Assume(level <= to_level); + // Copy the Cluster from the level it was found at to higher levels, if any. + while (level < to_level) { + // Make the Cluster Acceptable before copying. This isn't strictly necessary, but doing it + // now avoids doing double work later. + MakeAcceptable(*cluster); + ++level; + auto new_cluster = cluster->CopyTo(*this, level); + cluster = new_cluster; + } + return cluster; +} + +void TxGraphImpl::ApplyRemovals(int level) noexcept +{ + auto& clusterset = m_clustersets[level]; auto& to_remove = clusterset.m_to_remove; // Skip if there is nothing to remove. if (to_remove.empty()) return; + // There cannot be removals to be applied in main when staging exists (they should have been + // applied in StartStaging already, and none can be added to main while staging exists). + Assume(size_t(level) == m_clustersets.size() - 1); + // Pull in all Clusters that are not in the top ClusterSet. + for (GraphIndex index : to_remove) { + auto cluster = FindCluster(index, level); + PullIn(cluster); + } // Group the set of to-be-removed entries by Cluster*. std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator.cluster, m_entries[b].m_locator.cluster); + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); }); // Process per Cluster. std::span to_remove_span{to_remove}; while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator.cluster; + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; if (cluster != nullptr) { // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it // can pop off whatever applies to it. @@ -692,21 +897,26 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept Entry& entry = m_entries[idx]; // Update linked Ref. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; - // Update the locator. The rest of the Entry information will not change, so no need to - // invoke Cluster::Updated(). - Locator& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->UpdateMapping(locator.index, idx); + // Update the locators for both levels. The rest of the Entry information will not change, + // so no need to invoke Cluster::Updated(). + for (int level = 0; level < MAX_LEVELS; ++level) { + Locator& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } } } } void TxGraphImpl::Compact() noexcept { - // We cannot compact while any to-be-applied operations remain, as we'd need to rewrite them. - // It is easier to delay the compaction until they have been applied. - if (!m_clusterset.m_deps_to_add.empty()) return; - if (!m_clusterset.m_to_remove.empty()) return; + // We cannot compact while any to-be-applied operations or staged removals remain as we'd need + // to rewrite them. It is easier to delay the compaction until they have been applied. + for (auto& clusterset : m_clustersets) { + if (!clusterset.m_deps_to_add.empty()) return; + if (!clusterset.m_to_remove.empty()) return; + if (!clusterset.m_removed.empty()) return; + } // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of @@ -725,7 +935,9 @@ void TxGraphImpl::Compact() noexcept Entry& entry = m_entries[idx]; Assume(entry.m_ref == nullptr); // Make sure the entry does not occur in the graph. - Assume(!entry.m_locator.IsPresent()); + for (int level = 0; level < MAX_LEVELS; ++level) { + Assume(!entry.m_locator[level].IsPresent()); + } // Move the entry to the end. if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); @@ -739,7 +951,7 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept { // To split a Cluster, first make sure all removals are applied (as we might need to split // again afterwards otherwise). - ApplyRemovals(); + ApplyRemovals(cluster.m_level); bool del = cluster.Split(*this); if (del) { // Cluster::Split reports whether the Cluster is to be deleted. @@ -747,28 +959,32 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept } } -void TxGraphImpl::SplitAll() noexcept +void TxGraphImpl::SplitAll(int level) noexcept { // Before splitting all Cluster, first make sure all removals are applied. - ApplyRemovals(); + ApplyRemovals(level); for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = m_clusterset.m_clusters[int(quality)]; + auto& queue = m_clustersets[level].m_clusters[int(quality)]; while (!queue.empty()) { Split(*queue.back().get()); } } } -void TxGraphImpl::GroupClusters() noexcept +void TxGraphImpl::GroupClusters(int level) noexcept { - auto& clusterset = m_clusterset; + auto& clusterset = m_clustersets[level]; // If the groupings have been computed already, nothing is left to be done. if (clusterset.m_group_data.has_value()) return; + // We should never need to compute main grouping while staging exists (it should have already + // been computing in StartStaging, and no modifications that invalidate it can be made while + // staging exists). + Assume(size_t(level) == m_clustersets.size() - 1); // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up // with inefficient and/or oversized Clusters which just end up being split again anyway. - SplitAll(); + SplitAll(level); /** Annotated clusters: an entry for each Cluster, together with the representative for the * partition it is in if known, or with nullptr if not yet known. */ @@ -780,8 +996,8 @@ void TxGraphImpl::GroupClusters() noexcept // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Skip dependencies for which the parent or child transaction is removed. if (par_cluster == nullptr || chl_cluster == nullptr) continue; an_clusters.emplace_back(par_cluster, nullptr); @@ -798,8 +1014,8 @@ void TxGraphImpl::GroupClusters() noexcept std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { auto [_a_par, a_chl] = a; auto [_b_par, b_chl] = b; - auto a_chl_cluster = m_entries[a_chl].m_locator.cluster; - auto b_chl_cluster = m_entries[b_chl].m_locator.cluster; + auto a_chl_cluster = FindCluster(a_chl, level); + auto b_chl_cluster = FindCluster(b_chl, level); return std::less{}(a_chl_cluster, b_chl_cluster); }); @@ -873,8 +1089,8 @@ void TxGraphImpl::GroupClusters() noexcept Cluster* last_chl_cluster{nullptr}; PartitionData* last_partition{nullptr}; for (const auto& [par, chl] : clusterset.m_deps_to_add) { - auto par_cluster = m_entries[par].m_locator.cluster; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); // Nothing to do if parent and child are in the same Cluster. if (par_cluster == chl_cluster) continue; // Nothing to do if either parent or child transaction is removed already. @@ -906,12 +1122,12 @@ void TxGraphImpl::GroupClusters() noexcept // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. while (deps_it != clusterset.m_deps_to_add.end()) { auto [par, chl] = *deps_it; - auto chl_cluster = m_entries[chl].m_locator.cluster; + auto chl_cluster = FindCluster(chl, level); if (std::greater{}(chl_cluster, data.cluster)) break; // Skip dependencies that apply to earlier Clusters (those necessary are for // deleted transactions, as otherwise we'd have processed them already). if (chl_cluster == data.cluster) { - auto par_cluster = m_entries[par].m_locator.cluster; + auto par_cluster = FindCluster(par, level); // Also filter out dependencies applying to a removed parent. if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); } @@ -994,11 +1210,11 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept } } -void TxGraphImpl::ApplyDependencies() noexcept +void TxGraphImpl::ApplyDependencies(int level) noexcept { - auto& clusterset = m_clusterset; + auto& clusterset = m_clustersets[level]; // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). - GroupClusters(); + GroupClusters(level); Assume(clusterset.m_group_data.has_value()); // Nothing to do if there are no dependencies to be added. if (clusterset.m_deps_to_add.empty()) return; @@ -1007,16 +1223,20 @@ void TxGraphImpl::ApplyDependencies() noexcept // For each group of to-be-merged Clusters. for (const auto& group_data : clusterset.m_group_data->m_groups) { - // Invoke Merge() to merge them into a single Cluster. auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + // Pull in all the Clusters that contain dependencies. + for (Cluster*& cluster : cluster_span) { + cluster = PullIn(cluster); + } + // Invoke Merge() to merge them into a single Cluster. Merge(cluster_span); // Actually apply all to-be-added dependencies (all parents and children from this grouping // belong to the same Cluster at this point because of the merging above). auto deps_span = std::span{clusterset.m_deps_to_add} .subspan(group_data.m_deps_offset, group_data.m_deps_count); Assume(!deps_span.empty()); - const auto& loc = m_entries[deps_span[0].second].m_locator; + const auto& loc = m_entries[deps_span[0].second].m_locator[level]; Assume(loc.IsPresent()); loc.cluster->ApplyDependencies(*this, deps_span); } @@ -1045,7 +1265,7 @@ void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept m_linearization = std::move(linearization); // Update the Cluster's quality. auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; - graph.SetClusterQuality(m_quality, m_setindex, new_quality); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); // Update the Entry objects. Updated(graph); } @@ -1080,9 +1300,10 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept // Construct a new singleton Cluster (which is necessarily optimally linearized). auto cluster = std::make_unique(*this, feerate, idx); auto cluster_ptr = cluster.get(); - InsertCluster(std::move(cluster), QualityLevel::OPTIMAL); + int level = m_clustersets.size() - 1; + InsertCluster(level, std::move(cluster), QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); - ++m_clusterset.m_txcount; + ++m_clustersets[level].m_txcount; // Return the Ref. return ret; } @@ -1094,12 +1315,13 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept if (GetRefGraph(arg) == nullptr) return; Assume(GetRefGraph(arg) == this); // Find the Cluster the transaction is in, and stop if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), m_clustersets.size() - 1); if (cluster == nullptr) return; // Remember that the transaction is to be removed. - m_clusterset.m_to_remove.push_back(GetRefIndex(arg)); + auto& clusterset = m_clustersets.back(); + clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). - m_clusterset.m_group_data.reset(); + clusterset.m_group_data.reset(); } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1112,23 +1334,26 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept if (GetRefIndex(parent) == GetRefIndex(child)) return; // Find the Cluster the parent and child transaction are in, and stop if either appears to be // already removed. - auto par_cluster = m_entries[GetRefIndex(parent)].m_locator.cluster; + auto par_cluster = FindCluster(GetRefIndex(parent), m_clustersets.size() - 1); if (par_cluster == nullptr) return; - auto chl_cluster = m_entries[GetRefIndex(child)].m_locator.cluster; + auto chl_cluster = FindCluster(GetRefIndex(child), m_clustersets.size() - 1); if (chl_cluster == nullptr) return; // Remember that this dependency is to be applied. - m_clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + auto& clusterset = m_clustersets.back(); + clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). - m_clusterset.m_group_data.reset(); + clusterset.m_group_data.reset(); } -bool TxGraphImpl::Exists(const Ref& arg) noexcept +bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept { if (GetRefGraph(arg) == nullptr) return false; Assume(GetRefGraph(arg) == this); + size_t level = main_only ? 0 : m_clustersets.size() - 1; // Make sure the transaction isn't scheduled for removal. - ApplyRemovals(); - return m_entries[GetRefIndex(arg)].m_locator.IsPresent(); + ApplyRemovals(level); + auto cluster = FindCluster(GetRefIndex(arg), level); + return cluster != nullptr; } std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept @@ -1172,59 +1397,75 @@ FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx)); } -std::vector TxGraphImpl::GetAncestors(const Ref& arg) noexcept +void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept { - // Return the empty vector if the Ref is empty. - if (GetRefGraph(arg) == nullptr) return {}; - Assume(GetRefGraph(arg) == this); - // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); - // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); - // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; - if (cluster == nullptr) return {}; - // Dispatch to the Cluster. - return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + // Mark all transactions of a Cluster missing, needed when aborting staging, so that the + // corresponding Locators don't retain references into aborted Clusters. + for (auto ci : m_linearization) { + GraphIndex idx = m_mapping[ci]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[m_level].SetMissing(); + } } -std::vector TxGraphImpl::GetDescendants(const Ref& arg) noexcept +std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); // Ancestry cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(m_clustersets[level].m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -std::vector TxGraphImpl::GetCluster(const Ref& arg) noexcept +std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be incorrect otherwise. - ApplyDependencies(); + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); // Cluster linearization cannot be known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(m_clustersets[level].m_deps_to_add.empty()); // Find the Cluster the argument is in, and return the empty vector if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then dispatch to it. MakeAcceptable(*cluster); return cluster->GetClusterRefs(*this); } -TxGraph::GraphIndex TxGraphImpl::GetTransactionCount() noexcept +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept { - ApplyRemovals(); - return m_clusterset.m_txcount; + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyRemovals(level); + return m_clustersets[level].m_txcount; } FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept @@ -1232,41 +1473,125 @@ FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept // Return the empty FeePerWeight if the passed Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); - // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. - ApplyRemovals(); - // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + // Find the cluster the argument is in (the level does not matter as individual feerates will + // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any. + Cluster* cluster{nullptr}; + for (int level = 0; size_t(level) < m_clustersets.size(); ++level) { + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + ApplyRemovals(level); + if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { + cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; + break; + } + } if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator.index); + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); } -FeePerWeight TxGraphImpl::GetChunkFeerate(const Ref& arg) noexcept +FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept { // Return the empty FeePerWeight if the passed Ref is empty. if (GetRefGraph(arg) == nullptr) return {}; Assume(GetRefGraph(arg) == this); // Apply all removals and dependencies, as the result might be inaccurate otherwise. - ApplyDependencies(); + ApplyDependencies(/*level=*/0); // Chunk feerates cannot be accurately known if unapplied dependencies remain. - Assume(m_clusterset.m_deps_to_add.empty()); + Assume(m_clustersets[0].m_deps_to_add.empty()); // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. - auto cluster = m_entries[GetRefIndex(arg)].m_locator.cluster; + auto cluster = FindCluster(GetRefIndex(arg), 0); if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then return the transaction's // chunk feerate. MakeAcceptable(*cluster); const auto& entry = m_entries[GetRefIndex(arg)]; - return entry.m_chunk_feerate; + return entry.m_main_chunk_feerate; } -bool TxGraphImpl::IsOversized() noexcept +bool TxGraphImpl::IsOversized(bool main_only) noexcept { + size_t level = main_only ? 0 : m_clustersets.size() - 1; // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. - GroupClusters(); - Assume(m_clusterset.m_group_data.has_value()); - return m_clusterset.m_group_data->m_group_oversized; + GroupClusters(level); + Assume(m_clustersets[level].m_group_data.has_value()); + return m_clustersets[level].m_group_data->m_group_oversized; +} + +void TxGraphImpl::StartStaging() noexcept +{ + Assume(m_clustersets.size() < MAX_LEVELS); + // Apply all remaining dependencies in main before creating a staging graph. Once staging + // exists, we cannot merge Clusters anymore (because of interference with Clusters being + // pulled into staging), so to make sure all inspectors are available (if not oversized), + // do all merging work now. This also involves applying all removals. + ApplyDependencies(m_clustersets.size() - 1); + // Construct a new graph. + m_clustersets.emplace_back(); + // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to + // the new graph. To-be-applied removals will always be empty at this point. + auto& stage = m_clustersets.back(); + auto& main = *(m_clustersets.rbegin() + 1); + stage.m_txcount = main.m_txcount; + stage.m_deps_to_add = main.m_deps_to_add; + stage.m_group_data = main.m_group_data; +} + +void TxGraphImpl::AbortStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + auto& stage = m_clustersets[stage_level]; + // Mark all removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created). + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Do the same with the non-removed transactions in staging Clusters. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (auto& cluster : stage.m_clusters[quality]) { + cluster->MakeTransactionsMissing(*this); + } + } + // Destroy the staging graph data. + m_clustersets.pop_back(); + Compact(); +} + +void TxGraphImpl::CommitStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + int main_level = stage_level - 1; + auto& stage = m_clustersets[stage_level]; + auto& main = m_clustersets[main_level]; + // Delete all conflicting Clusters in main_level, to make place for moving the staging ones + // there. All of these have been PullIn()'d to stage_level before. + auto conflicts = GetConflicts(); + for (Cluster* conflict : conflicts) { + conflict->Clear(*this); + DeleteCluster(*conflict); + } + // Mark the removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created0. + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Then move all Clusters in staging to main. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& stage_sets = stage.m_clusters[quality]; + while (!stage_sets.empty()) { + stage_sets.back()->LevelDown(*this); + } + } + // Move all statistics, precomputed data, and to-be-applied removals and dependencies. + main.m_deps_to_add = std::move(stage.m_deps_to_add); + main.m_to_remove = std::move(stage.m_to_remove); + main.m_group_data = std::move(stage.m_group_data); + main.m_txcount = std::move(stage.m_txcount); + // Delete the old staging graph, after all its information was moved to main. + m_clustersets.pop_back(); + Compact(); } void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept @@ -1279,9 +1604,9 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep // in the same Cluster. m_depgraph.FeeRate(idx).fee = fee; if (!NeedsSplitting()) { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); } else { - graph.SetClusterQuality(m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); } Updated(graph); } @@ -1293,13 +1618,15 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept Assume(GetRefGraph(ref) == this); // Find the entry, its locator, and inform its Cluster about the new feerate, if any. auto& entry = m_entries[GetRefIndex(ref)]; - auto& locator = entry.m_locator; - if (locator.IsPresent()) { - locator.cluster->SetFee(*this, locator.index, fee); + for (int level = 0; level < MAX_LEVELS; ++level) { + auto& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } } } -void Cluster::SanityCheck(const TxGraphImpl& graph) const +void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). assert(m_depgraph.PositionRange() == m_mapping.size()); @@ -1307,6 +1634,8 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const assert(m_depgraph.TxCount() == m_linearization.size()); // The number of transactions in a Cluster cannot exceed m_max_cluster_count. assert(m_linearization.size() <= graph.m_max_cluster_count); + // The level must match the level the Cluster occurs in. + assert(m_level == level); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. // Compute the chunking of m_linearization. @@ -1322,14 +1651,14 @@ void Cluster::SanityCheck(const TxGraphImpl& graph) const m_done.Set(lin_pos); assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); // Check that the Entry has a locator pointing back to this Cluster & position within it. - assert(entry.m_locator.cluster == this); - assert(entry.m_locator.index == lin_pos); - // Check linearization position and chunk feerate. - if (IsAcceptable()) { + assert(entry.m_locator[level].cluster == this); + assert(entry.m_locator[level].index == lin_pos); + // For top-level entries, check linearization position and chunk feerate. + if (level == 0 && IsAcceptable()) { if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } - assert(entry.m_chunk_feerate == linchunking.GetChunk(0).feerate); + assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } @@ -1342,8 +1671,12 @@ void TxGraphImpl::SanityCheck() const { /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */ std::set expected_unlinked; - /** Which Clusters ought to occur in m_clusters, based on m_entries. */ - std::set expected_clusters; + /** Which Clusters ought to occur in ClusterSet::m_clusters, based on m_entries. */ + std::set expected_clusters[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ + std::set expected_removed[MAX_LEVELS]; + /** Whether compaction is possible in the current state. */ + bool compact_possible{true}; // Go over all Entry objects in m_entries. for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { @@ -1356,63 +1689,91 @@ void TxGraphImpl::SanityCheck() const assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } - const auto& locator = entry.m_locator; - // Every Locator must be in exactly one of these 2 states. - assert(locator.IsMissing() + locator.IsPresent() == 1); - if (locator.IsPresent()) { - // Verify that the Cluster agrees with where the Locator claims the transaction is. - assert(locator.cluster->GetClusterEntry(locator.index) == idx); - // Remember that we expect said Cluster to appear in the m_clusters. - expected_clusters.insert(locator.cluster); - } - - } - - auto& clusterset = m_clusterset; - std::set actual_clusters; - // For all quality levels... - for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { - QualityLevel quality{qual}; - const auto& quality_clusters = clusterset.m_clusters[qual]; - // ... for all clusters in them ... - for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { - const auto& cluster = *quality_clusters[setindex]; - // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't - // expected to be referenced by the Entry vector). - if (cluster.GetTxCount() != 0) { - actual_clusters.insert(&cluster); + // Verify the Entry m_locators. + bool was_present{false}, was_removed{false}; + for (int level = 0; level < MAX_LEVELS; ++level) { + const auto& locator = entry.m_locator[level]; + // Every Locator must be in exactly one of these 3 states. + assert(locator.IsMissing() + locator.IsRemoved() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Once removed, a transaction cannot be revived. + assert(!was_removed); + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the ClusterSet::m_clusters. + expected_clusters[level].insert(locator.cluster); + was_present = true; + } else if (locator.IsRemoved()) { + // Level 0 (main) cannot have IsRemoved locators (IsMissing there means non-existing). + assert(level > 0); + // A Locator can only be IsRemoved if it was IsPresent before, and only once. + assert(was_present && !was_removed); + // Remember that we expect this GraphIndex to occur in the ClusterSet::m_removed. + expected_removed[level].insert(idx); + was_removed = true; } - // Sanity check the cluster, according to the Cluster's internal rules. - cluster.SanityCheck(*this); - // Check that the cluster's quality and setindex matches its position in the quality list. - assert(cluster.m_quality == quality); - assert(cluster.m_setindex == setindex); } } - // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. - for (GraphIndex idx : m_clusterset.m_to_remove) { - assert(idx < m_entries.size()); - assert(m_entries[idx].m_locator.IsPresent()); - } + // For all levels (0 = main, 1 = staged)... + for (size_t level = 0; level < m_clustersets.size(); ++level) { + assert(level < MAX_LEVELS); + auto& clusterset = m_clustersets[level]; + std::set actual_clusters; - // Verify that all to-be-added dependencies have valid identifiers. - for (auto [par_idx, chl_idx] : m_clusterset.m_deps_to_add) { - assert(par_idx != chl_idx); - assert(par_idx < m_entries.size()); - assert(chl_idx < m_entries.size()); - } + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = clusterset.m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this, level); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } - // Verify that the actually encountered clusters match the ones occurring in Entry vector. - assert(actual_clusters == expected_clusters); + // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + for (GraphIndex idx : clusterset.m_to_remove) { + assert(idx < m_entries.size()); + assert(FindCluster(idx, level) != nullptr); + } + + // Verify that all to-be-added dependencies have valid identifiers. + for (auto [par_idx, chl_idx] : clusterset.m_deps_to_add) { + assert(par_idx != chl_idx); + assert(par_idx < m_entries.size()); + assert(chl_idx < m_entries.size()); + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters[level]); + + // Verify that the contents of m_removed matches what was expected based on the Entry vector. + std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + assert(actual_removed == expected_removed[level]); + + // If any GraphIndex entries remain in this ClusterSet, compact is not possible. + if (!clusterset.m_deps_to_add.empty()) compact_possible = false; + if (!clusterset.m_to_remove.empty()) compact_possible = false; + if (!clusterset.m_removed.empty()) compact_possible = false; + } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. std::set actual_unlinked(m_unlinked.begin(), m_unlinked.end()); assert(actual_unlinked == expected_unlinked); - // If no to-be-removed transactions, or to-be-added dependencies remain, m_unlinked must be + // If compaction was possible, it should have been performed already, and m_unlinked must be // empty (to prevent memory leaks due to an ever-growing m_entries vector). - if (clusterset.m_to_remove.empty() && clusterset.m_deps_to_add.empty()) { + if (compact_possible) { assert(actual_unlinked.empty()); } } diff --git a/src/txgraph.h b/src/txgraph.h index 04663131611..7a1cf53dea6 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -42,7 +42,7 @@ public: * TxGraph::AddTransaction. */ Ref() noexcept = default; /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers - * to has been removed from the graph. */ + * to does not exist in the graph (in main nor staging). */ virtual ~Ref(); // Support moving a Ref. Ref& operator=(Ref&& other) noexcept; @@ -69,11 +69,13 @@ protected: public: /** Virtual destructor, so inheriting is safe. */ virtual ~TxGraph() = default; - /** Construct a new transaction with the specified feerate, and return a Ref to it. In all + /** Construct a new transaction with the specified feerate, and return a Ref to it. + * If a staging graph exists, the new transaction is only created there. In all * further calls, only Refs created by AddTransaction() are allowed to be passed to this * TxGraph object (or empty Ref objects). */ [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; - /** Remove the specified transaction. This is a no-op if the transaction was already removed. + /** Remove the specified transaction. If a staging graph exists, the removal only happens + * there. This is a no-op if the transaction was already removed. * * TxGraph may internally reorder transaction removals with dependency additions for * performance reasons. If together with any transaction removal all its descendants, or all @@ -87,39 +89,61 @@ public: * original order case and the reordered case. */ virtual void RemoveTransaction(const Ref& arg) noexcept = 0; - /** Add a dependency between two specified transactions. Parent may not be a descendant of - * child already (but may be an ancestor of it already, in which case this is a no-op). If - * either transaction is already removed, this is a no-op. */ + /** Add a dependency between two specified transactions. If a staging graph exists, the + * dependency is only added there. Parent may not be a descendant of child already (but may + * be an ancestor of it already, in which case this is a no-op). If either transaction is + * already removed, this is a no-op. */ virtual void AddDependency(const Ref& parent, const Ref& child) noexcept = 0; - /** Modify the fee of the specified transaction. If the transaction does not exist (or was - * removed), this has no effect. */ + /** Modify the fee of the specified transaction, in both the main graph and the staging + * graph if it exists. Wherever the transaction does not exist (or was removed), this has no + * effect. */ virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; - /** Determine whether arg exists in this graph (i.e., was not removed). */ - virtual bool Exists(const Ref& arg) noexcept = 0; + /** Create a staging graph (which cannot exist already). This acts as if a full copy of + * the transaction graph is made, upon which further modifications are made. This copy can + * be inspected, and then either discarded, or the main graph can be replaced by it by + * commiting it. */ + virtual void StartStaging() noexcept = 0; + /** Discard the existing active staging graph (which must exist). */ + virtual void AbortStaging() noexcept = 0; + /** Replace the main graph with the staging graph (which must exist). */ + virtual void CommitStaging() noexcept = 0; + /** Check whether a staging graph exists. */ + virtual bool HaveStaging() const noexcept = 0; + + /** Determine whether arg exists in the graph (i.e., was not removed). If main_only is false + * and a staging graph exists, it is queried; otherwise the main graph is queried. */ + virtual bool Exists(const Ref& arg, bool main_only = false) noexcept = 0; /** Determine whether the graph is oversized (contains a connected component of more than the - * configured maximum cluster count). Some of the functions below are not available + * configured maximum cluster count). If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. Some of the functions below are not available * for oversized graphs. The mutators above are always available. */ - virtual bool IsOversized() noexcept = 0; - /** Get the feerate of the chunk which transaction arg is in. Returns the empty FeeFrac if arg - * does not exist. The graph must not be oversized. */ - virtual FeePerWeight GetChunkFeerate(const Ref& arg) noexcept = 0; + virtual bool IsOversized(bool main_only = false) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in the main graph. Returns the empty + * FeeFrac if arg does not exist in the main graph. The main graph must not be oversized. */ + virtual FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept = 0; /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if - * arg does not exist. This is available even for oversized graphs. */ + * arg does not exist in either main or staging. This is available even for oversized + * graphs. */ virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the connected component ("cluster") which arg is in. * The transactions will be returned in a topologically-valid order of acceptable quality. - * Returns {} if arg does not exist in the queried graph. */ - virtual std::vector GetCluster(const Ref& arg) noexcept = 0; - /** Get pointers to all ancestors of the specified transaction. The queried graph must not be - * oversized. Returns {} if arg does not exist. */ - virtual std::vector GetAncestors(const Ref& arg) noexcept = 0; - /** Get pointers to all descendants of the specified transaction. The graph must not be - * oversized. Returns {} if arg does not exist in the graph. */ - virtual std::vector GetDescendants(const Ref& arg) noexcept = 0; - /** Get the total number of transactions in the graph. This is available even for oversized - * graphs. */ - virtual GraphIndex GetTransactionCount() noexcept = 0; + * If main_only is false and a staging graph exists, it is queried; otherwise the main graph + * is queried. The queried graph must not be oversized. Returns {} if arg does not exist in + * the queried graph. */ + virtual std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all descendants of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get the total number of transactions in the graph. If main_only is false and a staging + * graph exists, it is queried; otherwise the main graph is queried. This is available even + * for oversized graphs. */ + virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 75b463feaf32eb3d9abb1ba5e8846402561285f8 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Sat, 25 Jan 2025 00:27:52 -0500 Subject: [PATCH 17/30] txgraph: (optimization) cache oversizedness of graphs --- src/txgraph.cpp | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 7d0fd4eb6fe..9b605a4e85b 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -229,6 +229,9 @@ private: /** Total number of transactions in this graph (sum of all transaction counts in all * Clusters, and for staging also those inherited from the main ClusterSet). */ GraphIndex m_txcount{0}; + /** Whether this graph is oversized (if known). This roughly matches + * m_group_data->m_group_oversized, but may be known even if m_group_data is not. */ + std::optional m_oversized{false}; }; /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ @@ -1180,6 +1183,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); + clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; Compact(); } @@ -1213,6 +1217,8 @@ void TxGraphImpl::Merge(std::span to_merge) noexcept void TxGraphImpl::ApplyDependencies(int level) noexcept { auto& clusterset = m_clustersets[level]; + // Do not bother computing groups if we already know the result will be oversized. + if (clusterset.m_oversized == true) return; // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). GroupClusters(level); Assume(clusterset.m_group_data.has_value()); @@ -1322,6 +1328,7 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept clusterset.m_to_remove.push_back(GetRefIndex(arg)); // Wipe m_group_data (as it will need to be recomputed). clusterset.m_group_data.reset(); + if (clusterset.m_oversized == true) clusterset.m_oversized = std::nullopt; } void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept @@ -1343,6 +1350,7 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); // Wipe m_group_data (as it will need to be recomputed). clusterset.m_group_data.reset(); + if (clusterset.m_oversized == false) clusterset.m_oversized = std::nullopt; } bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept @@ -1511,11 +1519,17 @@ FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept bool TxGraphImpl::IsOversized(bool main_only) noexcept { size_t level = main_only ? 0 : m_clustersets.size() - 1; + auto& clusterset = m_clustersets[level]; + if (clusterset.m_oversized.has_value()) { + // Return cached value if known. + return *clusterset.m_oversized; + } // Find which Clusters will need to be merged together, as that is where the oversize // property is assessed. GroupClusters(level); - Assume(m_clustersets[level].m_group_data.has_value()); - return m_clustersets[level].m_group_data->m_group_oversized; + Assume(clusterset.m_group_data.has_value()); + clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; + return *clusterset.m_oversized; } void TxGraphImpl::StartStaging() noexcept @@ -1523,8 +1537,10 @@ void TxGraphImpl::StartStaging() noexcept Assume(m_clustersets.size() < MAX_LEVELS); // Apply all remaining dependencies in main before creating a staging graph. Once staging // exists, we cannot merge Clusters anymore (because of interference with Clusters being - // pulled into staging), so to make sure all inspectors are available (if not oversized), - // do all merging work now. This also involves applying all removals. + // pulled into staging), so to make sure all inspectors are available (if not oversized), do + // all merging work now. Call SplitAll() first, so that even if ApplyDependencies does not do + // any thing due to knowing the result is oversized, splitting is still performed. + SplitAll(m_clustersets.size() - 1); ApplyDependencies(m_clustersets.size() - 1); // Construct a new graph. m_clustersets.emplace_back(); @@ -1535,6 +1551,8 @@ void TxGraphImpl::StartStaging() noexcept stage.m_txcount = main.m_txcount; stage.m_deps_to_add = main.m_deps_to_add; stage.m_group_data = main.m_group_data; + stage.m_oversized = main.m_oversized; + Assume(stage.m_oversized.has_value()); } void TxGraphImpl::AbortStaging() noexcept @@ -1588,6 +1606,7 @@ void TxGraphImpl::CommitStaging() noexcept main.m_deps_to_add = std::move(stage.m_deps_to_add); main.m_to_remove = std::move(stage.m_to_remove); main.m_group_data = std::move(stage.m_group_data); + main.m_oversized = std::move(stage.m_oversized); main.m_txcount = std::move(stage.m_txcount); // Delete the old staging graph, after all its information was moved to main. m_clustersets.pop_back(); @@ -1765,6 +1784,11 @@ void TxGraphImpl::SanityCheck() const if (!clusterset.m_deps_to_add.empty()) compact_possible = false; if (!clusterset.m_to_remove.empty()) compact_possible = false; if (!clusterset.m_removed.empty()) compact_possible = false; + + // If m_group_data exists, its m_group_oversized must match m_oversized. + if (clusterset.m_group_data.has_value()) { + assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized); + } } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. From ee5c413b43947b47b3598dbf29cee1dd54e49623 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 3 Dec 2024 11:25:49 -0500 Subject: [PATCH 18/30] txgraph: (feature) destroying Ref means removing transaction Before this commit, if a TxGraph::Ref object is destroyed, it becomes impossible to refer to, but the actual corresponding transaction node in the TxGraph remains, and remains indefinitely as there is no way to remove it. Fix this by making the destruction of TxGraph::Ref trigger immediate removal of the corresponding transaction in TxGraph, both in main and staging if it exists. --- src/test/fuzz/txgraph.cpp | 65 +++++++++++--- src/txgraph.cpp | 178 +++++++++++++++++++++++++------------- src/txgraph.h | 8 +- 3 files changed, 177 insertions(+), 74 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index cc20f9e3c4f..976839464b7 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -146,6 +146,30 @@ struct SimTxGraph if (oversized.has_value() && *oversized) oversized = std::nullopt; } + /** Destroy the transaction from the graph, including from the removed set. This will + * trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized + * value is cleared (destroying does not clear oversizedness in TxGraph of the main + * graph while staging exists). */ + void DestroyTransaction(TxGraph::Ref* ref, bool reset_oversize) + { + auto pos = Find(ref); + if (pos == MISSING) { + // Wipe the ref, if it exists, from the removed vector. Use std::partition rather + // than std::erase because we don't care about the order of the entries that + // remain. + auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; }); + removed.erase(remove, removed.end()); + } else { + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (reset_oversize && oversized.has_value() && *oversized) { + oversized = std::nullopt; + } + } + } + /** Construct the set with all positions in this graph corresponding to the specified * TxGraph::Refs. All of them must occur in this graph and not be removed. */ SetType MakeSet(std::span arg) @@ -327,9 +351,9 @@ FUZZ_TARGET(txgraph) } break; } else if (sel_sim.removed.size() > 0 && command-- == 0) { - // ~Ref. Destroying a TxGraph::Ref has an observable effect on the TxGraph it - // refers to, so this simulation permits doing so separately from other actions on - // TxGraph. + // ~Ref (of an already-removed transaction). Destroying a TxGraph::Ref has an + // observable effect on the TxGraph it refers to, so this simulation permits doing + // so separately from other actions on TxGraph. // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur // in the other graph, and thus not actually trigger ~Ref yet (which is exactly @@ -341,6 +365,28 @@ FUZZ_TARGET(txgraph) } sel_sim.removed.pop_back(); break; + } else if (command-- == 0) { + // ~Ref (of any transaction). + std::vector to_destroy; + to_destroy.push_back(pick_fn()); + while (true) { + // Keep adding either the ancestors or descendants the already picked + // transactions have in both graphs (main and staging) combined. Destroying + // will trigger deletions in both, so to have consistent TxGraph behavior, the + // set must be closed under ancestors, or descendants, in both graphs. + auto old_size = to_destroy.size(); + for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt); + if (to_destroy.size() == old_size) break; + } + // The order in which these ancestors/descendants are destroyed should not matter; + // randomly shuffle them. + std::shuffle(to_destroy.begin(), to_destroy.end(), rng); + for (TxGraph::Ref* ptr : to_destroy) { + for (size_t level = 0; level < sims.size(); ++level) { + sims[level].DestroyTransaction(ptr, level == sims.size() - 1); + } + } + break; } else if (command-- == 0) { // SetTransactionFee. int64_t fee; @@ -457,6 +503,10 @@ FUZZ_TARGET(txgraph) // AbortStaging. real->AbortStaging(); sims.pop_back(); + // Reset the cached oversized value (if TxGraph::Ref destructions triggered + // removals of main transactions while staging was active, then aborting will + // cause it to be re-evaluated in TxGraph). + sims.back().oversized = std::nullopt; break; } } @@ -537,13 +587,4 @@ FUZZ_TARGET(txgraph) // Sanity check again (because invoking inspectors may modify internal unobservable state). real->SanityCheck(); - - // Remove all remaining transactions, because Refs cannot be destroyed otherwise (this will be - // addressed in a follow-up commit). - for (auto& sim : sims) { - for (auto i : sim.graph.Positions()) { - auto ref = sim.GetRef(i); - real->RemoveTransaction(*ref); - } - } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 9b605a4e85b..62b00b2a5bb 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -359,8 +359,33 @@ public: auto& entry = m_entries[idx]; Assume(entry.m_ref != nullptr); entry.m_ref = nullptr; + // Mark the transaction as to be removed in all levels where it explicitly or implicitly + // exists. + bool exists_anywhere{false}; + bool exists{false}; + for (size_t level = 0; level < m_clustersets.size(); ++level) { + if (entry.m_locator[level].IsPresent()) { + exists_anywhere = true; + exists = true; + } else if (entry.m_locator[level].IsRemoved()) { + exists = false; + } + if (exists) { + m_clustersets[level].m_to_remove.push_back(idx); + // Force recomputation of grouping data. + m_clustersets[level].m_group_data = std::nullopt; + // Do not wipe the oversized state of a lower level graph (main) if a higher level + // one (staging) exists. The reason for this is that the alternative would mean that + // cluster merges may need to be applied to a formerly-oversized main graph while + // staging exists (to satisfy chunk feerate queries into main, for example), and such + // merges could conflict with pulls of some of their constituents into staging. + if (level == m_clustersets.size() - 1 && m_clustersets[level].m_oversized == true) { + m_clustersets[level].m_oversized = std::nullopt; + } + } + } m_unlinked.push_back(idx); - Compact(); + if (!exists_anywhere) Compact(); } // Functions related to various normalization/application steps. @@ -368,17 +393,17 @@ public: * values for remaining Entrys, so this only does something when no to-be-applied operations * or staged removals referring to GraphIndexes remain). */ void Compact() noexcept; - /** If cluster is not in the top level, copy it there, and return a pointer to it. This has no - * effect if only a main graph exists, but if staging exists this modifies the locators of its - * transactions from inherited (P,M) to explicit (P,P). */ - Cluster* PullIn(Cluster* cluster) noexcept; + /** If cluster is not in to_level, copy it there, and return a pointer to it. This has no + * effect if to_level is 0 (main), but for to_level=1 (staging) this modifies the locators of + * its transactions from inherited (P,M) to explicit (P,P). */ + Cluster* PullIn(Cluster* cluster, int to_level) noexcept; /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a - * NEEDS_SPLIT* QualityLevel) in the specified level. */ - void ApplyRemovals(int level) noexcept; - /** Split an individual cluster (which must be in the top-level ClusterSet). */ + * NEEDS_SPLIT* QualityLevel) up to the specified level. */ + void ApplyRemovals(int up_to_level) noexcept; + /** Split an individual cluster. */ void Split(Cluster& cluster) noexcept; - /** Split all clusters that need splitting in the specified level. */ - void SplitAll(int level) noexcept; + /** Split all clusters that need splitting up to the specified level. */ + void SplitAll(int up_to_level) noexcept; /** Populate m_group_data based on m_deps_to_add in the specified level. */ void GroupClusters(int level) noexcept; /** Merge the specified clusters. */ @@ -425,6 +450,17 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept } // Update the transaction count. --m_clustersets[level].m_txcount; + // Adjust the status of Locators of this transaction at higher levels. + for (size_t after_level = level + 1; after_level < m_clustersets.size(); ++after_level) { + if (entry.m_locator[after_level].IsPresent()) { + break; + } else if (entry.m_locator[after_level].IsRemoved()) { + entry.m_locator[after_level].SetMissing(); + break; + } else { + --m_clustersets[after_level].m_txcount; + } + } } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -477,8 +513,9 @@ std::vector TxGraphImpl::GetConflicts() const noexcept // All Clusters at level-1 containing transactions in m_removed (so (P,R) ones) are conflicts. for (auto i : m_clustersets[level].m_removed) { auto& entry = m_entries[i]; - Assume(entry.m_locator[level - 1].IsPresent()); - ret.push_back(entry.m_locator[level - 1].cluster); + if (entry.m_locator[level - 1].IsPresent()) { + ret.push_back(entry.m_locator[level - 1].cluster); + } } // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones). for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { @@ -834,9 +871,8 @@ Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept return nullptr; } -Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept +Cluster* TxGraphImpl::PullIn(Cluster* cluster, int to_level) noexcept { - int to_level = m_clustersets.size() - 1; if (to_level == 0) return cluster; int level = cluster->m_level; Assume(level <= to_level); @@ -852,39 +888,39 @@ Cluster* TxGraphImpl::PullIn(Cluster* cluster) noexcept return cluster; } -void TxGraphImpl::ApplyRemovals(int level) noexcept +void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept { - auto& clusterset = m_clustersets[level]; - auto& to_remove = clusterset.m_to_remove; - // Skip if there is nothing to remove. - if (to_remove.empty()) return; - // There cannot be removals to be applied in main when staging exists (they should have been - // applied in StartStaging already, and none can be added to main while staging exists). - Assume(size_t(level) == m_clustersets.size() - 1); - // Pull in all Clusters that are not in the top ClusterSet. - for (GraphIndex index : to_remove) { - auto cluster = FindCluster(index, level); - PullIn(cluster); - } - // Group the set of to-be-removed entries by Cluster*. - std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { - return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); - }); - // Process per Cluster. - std::span to_remove_span{to_remove}; - while (!to_remove_span.empty()) { - Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; - if (cluster != nullptr) { - // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it - // can pop off whatever applies to it. - cluster->ApplyRemovals(*this, to_remove_span); - } else { - // Otherwise, skip this already-removed entry. This may happen when RemoveTransaction - // was called twice on the same Ref. - to_remove_span = to_remove_span.subspan(1); + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); + for (int level = 0; level <= up_to_level; ++level) { + auto& clusterset = m_clustersets[level]; + auto& to_remove = clusterset.m_to_remove; + // Skip if there is nothing to remove in this level. + if (to_remove.empty()) continue; + // Pull in all Clusters that are not in the ClusterSet at level level. + for (GraphIndex index : to_remove) { + auto cluster = FindCluster(index, level); + if (cluster != nullptr) PullIn(cluster, level); } + // Group the set of to-be-removed entries by Cluster*. + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); + }); + // Process per Cluster. + std::span to_remove_span{to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. This may happen when + // RemoveTransaction was called twice on the same Ref, for example. + to_remove_span = to_remove_span.subspan(1); + } + } + to_remove.clear(); } - to_remove.clear(); Compact(); } @@ -898,7 +934,7 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept for (int i = 0; i < 2; ++i) { GraphIndex idx = i ? b : a; Entry& entry = m_entries[idx]; - // Update linked Ref. + // Update linked Ref, if any exists. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; // Update the locators for both levels. The rest of the Entry information will not change, // so no need to invoke Cluster::Updated(). @@ -962,14 +998,17 @@ void TxGraphImpl::Split(Cluster& cluster) noexcept } } -void TxGraphImpl::SplitAll(int level) noexcept +void TxGraphImpl::SplitAll(int up_to_level) noexcept { + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); // Before splitting all Cluster, first make sure all removals are applied. - ApplyRemovals(level); - for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { - auto& queue = m_clustersets[level].m_clusters[int(quality)]; - while (!queue.empty()) { - Split(*queue.back().get()); + ApplyRemovals(up_to_level); + for (int level = 0; level <= up_to_level; ++level) { + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { + auto& queue = m_clustersets[level].m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } } } } @@ -979,10 +1018,6 @@ void TxGraphImpl::GroupClusters(int level) noexcept auto& clusterset = m_clustersets[level]; // If the groupings have been computed already, nothing is left to be done. if (clusterset.m_group_data.has_value()) return; - // We should never need to compute main grouping while staging exists (it should have already - // been computing in StartStaging, and no modifications that invalidate it can be made while - // staging exists). - Assume(size_t(level) == m_clustersets.size() - 1); // Before computing which Clusters need to be merged together, first apply all removals and // split the Clusters into connected components. If we would group first, we might end up @@ -1233,7 +1268,7 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); // Pull in all the Clusters that contain dependencies. for (Cluster*& cluster : cluster_span) { - cluster = PullIn(cluster); + cluster = PullIn(cluster, level); } // Invoke Merge() to merge them into a single Cluster. Merge(cluster_span); @@ -1371,6 +1406,7 @@ std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, De // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Ancestors(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1383,6 +1419,7 @@ std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. for (auto idx : m_depgraph.Descendants(idx)) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1395,6 +1432,7 @@ std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noe // Translate all transactions in the Cluster (in linearization order) to Refs. for (auto idx : m_linearization) { const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); ret.push_back(entry.m_ref); } return ret; @@ -1485,7 +1523,8 @@ FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any. Cluster* cluster{nullptr}; for (int level = 0; size_t(level) < m_clustersets.size(); ++level) { - // Apply removals, so that we can correctly report FeePerWeight{} for non-existing transaction. + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing + // transactions. ApplyRemovals(level); if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; @@ -1574,6 +1613,11 @@ void TxGraphImpl::AbortStaging() noexcept // Destroy the staging graph data. m_clustersets.pop_back(); Compact(); + if (!m_clustersets.back().m_group_data.has_value()) { + // In case m_oversized in main was kept after a Ref destruction while staging exists, we + // need to re-evaluate m_oversized now. + m_clustersets.back().m_oversized = std::nullopt; + } } void TxGraphImpl::CommitStaging() noexcept @@ -1760,10 +1804,13 @@ void TxGraphImpl::SanityCheck() const } } - // Verify that all to-be-removed transactions have valid identifiers, and aren't removed yet. + // Verify that all to-be-removed transactions have valid identifiers. for (GraphIndex idx : clusterset.m_to_remove) { assert(idx < m_entries.size()); - assert(FindCluster(idx, level) != nullptr); + // We cannot assert that all m_to_remove transactions are still present: ~Ref on a + // (P,M) transaction (present in main, inherited in staging) will cause an m_to_remove + // addition in both main and staging, but a subsequence ApplyRemovals in main will + // cause it to disappear from staging too, leaving the m_to_remove in place. } // Verify that all to-be-added dependencies have valid identifiers. @@ -1778,6 +1825,15 @@ void TxGraphImpl::SanityCheck() const // Verify that the contents of m_removed matches what was expected based on the Entry vector. std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + for (auto i : expected_unlinked) { + // If a transaction exists in both main and staging, and is removed from staging (adding + // it to m_removed there), and consequently destroyed (wiping the locator completely), + // it can remain in m_removed despite not having an IsRemoved() locator. Exclude those + // transactions from the comparison here. + actual_removed.erase(i); + expected_removed[level].erase(i); + } + assert(actual_removed == expected_removed[level]); // If any GraphIndex entries remain in this ClusterSet, compact is not possible. @@ -1789,6 +1845,10 @@ void TxGraphImpl::SanityCheck() const if (clusterset.m_group_data.has_value()) { assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized); } + + // For non-top levels, m_oversized must be known (as it cannot change until the level + // on top is gone). + if (level < m_clustersets.size() - 1) assert(clusterset.m_oversized.has_value()); } // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. diff --git a/src/txgraph.h b/src/txgraph.h index 7a1cf53dea6..5badb058d26 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -41,8 +41,8 @@ public: /** Construct an empty Ref. Non-empty Refs can only be created using * TxGraph::AddTransaction. */ Ref() noexcept = default; - /** Destroy this Ref. This is only allowed when it is empty, or the transaction it refers - * to does not exist in the graph (in main nor staging). */ + /** Destroy this Ref. If it is not empty, the corresponding transaction is removed (in both + * main and staging, if it exists). */ virtual ~Ref(); // Support moving a Ref. Ref& operator=(Ref&& other) noexcept; @@ -117,7 +117,9 @@ public: /** Determine whether the graph is oversized (contains a connected component of more than the * configured maximum cluster count). If main_only is false and a staging graph exists, it is * queried; otherwise the main graph is queried. Some of the functions below are not available - * for oversized graphs. The mutators above are always available. */ + * for oversized graphs. The mutators above are always available. Removing a transaction by + * destroying its Ref while staging exists will not clear main's oversizedness until staging + * is aborted or committed. */ virtual bool IsOversized(bool main_only = false) noexcept = 0; /** Get the feerate of the chunk which transaction arg is in the main graph. Returns the empty * FeeFrac if arg does not exist in the main graph. The main graph must not be oversized. */ From 9ae139c747623f7727e9bcfa9e67cfe40b50f4bc Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 14 Nov 2024 16:16:59 -0500 Subject: [PATCH 19/30] txgraph: (feature) expose ability to compare transactions In order to make it possible for higher layers to compare transaction quality (ordering within the implicit total ordering on the mempool), expose a comparison function and test it. --- src/test/fuzz/txgraph.cpp | 66 +++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 46 ++++++++++++++++++++++++--- src/txgraph.h | 4 +++ 3 files changed, 112 insertions(+), 4 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 976839464b7..d82dd62921d 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -508,6 +508,24 @@ FUZZ_TARGET(txgraph) // cause it to be re-evaluated in TxGraph). sims.back().oversized = std::nullopt; break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // CompareMainOrder. + auto ref_a = pick_fn(); + auto ref_b = pick_fn(); + auto sim_a = main_sim.Find(ref_a); + auto sim_b = main_sim.Find(ref_b); + // Both transactions must exist in the main graph. + if (sim_a == SimTxGraph::MISSING || sim_b == SimTxGraph::MISSING) break; + auto cmp = real->CompareMainOrder(*ref_a, *ref_b); + // Distinct transactions have distinct places. + if (sim_a != sim_b) assert(cmp != 0); + // Ancestors go before descendants. + if (main_sim.graph.Ancestors(sim_a)[sim_b]) assert(cmp >= 0); + if (main_sim.graph.Descendants(sim_a)[sim_b]) assert(cmp <= 0); + // Do not verify consistency with chunk feerates, as we cannot easily determine + // these here without making more calls to real, which could affect its internal + // state. A full comparison is done at the end. + break; } } } @@ -515,6 +533,54 @@ FUZZ_TARGET(txgraph) // After running all modifications, perform an internal sanity check (before invoking // inspectors that may modify the internal state). real->SanityCheck(); + + if (!sims[0].IsOversized()) { + // If the main graph is not oversized, verify the total ordering implied by + // CompareMainOrder. + // First construct two distinct randomized permutations of the positions in sims[0]. + std::vector vec1; + for (auto i : sims[0].graph.Positions()) vec1.push_back(i); + std::shuffle(vec1.begin(), vec1.end(), rng); + auto vec2 = vec1; + std::shuffle(vec2.begin(), vec2.end(), rng); + if (vec1 == vec2) std::next_permutation(vec2.begin(), vec2.end()); + // Sort both according to CompareMainOrder. By having randomized starting points, the order + // of CompareMainOrder invocations is somewhat randomized as well. + auto cmp = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept { + return real->CompareMainOrder(*sims[0].GetRef(a), *sims[0].GetRef(b)) < 0; + }; + std::sort(vec1.begin(), vec1.end(), cmp); + std::sort(vec2.begin(), vec2.end(), cmp); + + // Verify the resulting orderings are identical. This could only fail if the ordering was + // not total. + assert(vec1 == vec2); + + // Verify that the ordering is topological. + auto todo = sims[0].graph.Positions(); + for (auto i : vec1) { + todo.Reset(i); + assert(!sims[0].graph.Ancestors(i).Overlaps(todo)); + } + assert(todo.None()); + + // For every transaction in the total ordering, find a random one before it and after it, + // and compare their chunk feerates, which must be consistent with the ordering. + for (size_t pos = 0; pos < vec1.size(); ++pos) { + auto pos_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[pos])); + if (pos > 0) { + size_t before = rng.randrange(pos); + auto before_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[before])); + assert(FeeRateCompare(before_feerate, pos_feerate) >= 0); + } + if (pos + 1 < vec1.size()) { + size_t after = pos + 1 + rng.randrange(vec1.size() - 1 - pos); + auto after_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[after])); + assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); + } + } + } + assert(real->HaveStaging() == (sims.size() > 1)); // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 62b00b2a5bb..bdfd46beb10 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -298,6 +298,8 @@ private: Locator m_locator[MAX_LEVELS]; /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ FeePerWeight m_main_chunk_feerate; + /** The position this transaction has in the main linearization (if present). */ + LinearizationIndex m_main_lin_index; }; /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ @@ -433,6 +435,7 @@ public: std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; + std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; void SanityCheck() const final; }; @@ -471,9 +474,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or - // OPTIMAL, compute its chunking and store its information in the Entry's m_main_chunk_feerate. - // These fields are only accessed after making the entire graph ACCEPTABLE, so it is pointless - // to compute these if we haven't reached that quality level yet. + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_lin_index + // and m_main_chunk_feerate. These fields are only accessed after making the entire graph + // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level + // yet. if (m_level == 0 && IsAcceptable()) { LinearizationChunking chunking(m_depgraph, m_linearization); LinearizationIndex lin_idx{0}; @@ -483,9 +487,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept Assume(chunk.transactions.Any()); // Iterate over the transactions in the linearization, which must match those in chunk. do { - DepGraphIndex idx = m_linearization[lin_idx++]; + DepGraphIndex idx = m_linearization[lin_idx]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; + entry.m_main_lin_index = lin_idx++; entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); Assume(chunk.transactions[idx]); chunk.transactions.Reset(idx); @@ -564,6 +569,10 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove // are just never accessed, but set it to -1 here to increase the ability to detect a bug // that causes it to be accessed regardless. m_mapping[locator.index] = GraphIndex(-1); + // - Remove its linearization index from the Entry (if in main). + if (m_level == 0) { + entry.m_main_lin_index = LinearizationIndex(-1); + } // - Mark it as missing/removed in the Entry's locator. graph.ClearLocator(m_level, idx); to_remove = to_remove.subspan(1); @@ -1689,6 +1698,33 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept } } +std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) noexcept +{ + // The references must not be empty. + Assume(GetRefGraph(a) == this); + Assume(GetRefGraph(b) == this); + // Apply dependencies in main. + ApplyDependencies(0); + Assume(m_clustersets[0].m_deps_to_add.empty()); + // Make both involved Clusters acceptable, so chunk feerates are relevant. + const auto& entry_a = m_entries[GetRefIndex(a)]; + const auto& entry_b = m_entries[GetRefIndex(b)]; + const auto& locator_a = entry_a.m_locator[0]; + const auto& locator_b = entry_b.m_locator[0]; + Assume(locator_a.IsPresent()); + Assume(locator_b.IsPresent()); + MakeAcceptable(*locator_a.cluster); + MakeAcceptable(*locator_b.cluster); + // Compare chunk feerates, and return result if it differs. + auto feerate_cmp = FeeRateCompare(entry_b.m_main_chunk_feerate, entry_a.m_main_chunk_feerate); + if (feerate_cmp < 0) return std::strong_ordering::less; + if (feerate_cmp > 0) return std::strong_ordering::greater; + // Compare Cluster* as tie-break for equal chunk feerates. + if (locator_a.cluster != locator_b.cluster) return locator_a.cluster <=> locator_b.cluster; + // As final tie-break, compare position within cluster linearization. + return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). @@ -1706,6 +1742,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const // Verify m_linearization. SetType m_done; + LinearizationIndex linindex{0}; assert(m_depgraph.IsAcyclic()); for (auto lin_pos : m_linearization) { assert(lin_pos < m_mapping.size()); @@ -1718,6 +1755,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(entry.m_locator[level].index == lin_pos); // For top-level entries, check linearization position and chunk feerate. if (level == 0 && IsAcceptable()) { + assert(entry.m_main_lin_index == linindex++); if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); } diff --git a/src/txgraph.h b/src/txgraph.h index 5badb058d26..554b3ec1cf3 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -146,6 +146,10 @@ public: * graph exists, it is queried; otherwise the main graph is queried. This is available even * for oversized graphs. */ virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; + /** Compare two transactions according to the total order in the main graph (topological, and + * from high to low chunk feerate). Both transactions must be in the main graph. The main + * graph must not be oversized. */ + virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 793d4ac66e8dea5eed68ba519e92f8b7c869411e Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 9 Jan 2025 14:22:24 -0500 Subject: [PATCH 20/30] txgraph: (feature) Add DoWork function This can be called when the caller has time to spend now, and wants future operations to be fast. --- src/test/fuzz/txgraph.cpp | 4 ++++ src/txgraph.cpp | 21 +++++++++++++++++++++ src/txgraph.h | 5 +++++ 3 files changed, 30 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index d82dd62921d..2b5e3c0e8c2 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -526,6 +526,10 @@ FUZZ_TARGET(txgraph) // these here without making more calls to real, which could affect its internal // state. A full comparison is done at the end. break; + } else if (command-- == 0) { + // DoWork. + real->DoWork(); + break; } } } diff --git a/src/txgraph.cpp b/src/txgraph.cpp index bdfd46beb10..9f05021bffb 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -414,6 +414,8 @@ public: void ApplyDependencies(int level) noexcept; /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ void MakeAcceptable(Cluster& cluster) noexcept; + /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */ + void MakeAllAcceptable(int level) noexcept; // Implementations for the public TxGraph interface. @@ -422,6 +424,8 @@ public: void AddDependency(const Ref& parent, const Ref& child) noexcept final; void SetTransactionFee(const Ref&, int64_t fee) noexcept final; + void DoWork() noexcept final; + void StartStaging() noexcept final; void CommitStaging() noexcept final; void AbortStaging() noexcept final; @@ -1328,6 +1332,16 @@ void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept } } +void TxGraphImpl::MakeAllAcceptable(int level) noexcept +{ + ApplyDependencies(level); + if (m_clustersets[level].m_oversized == true) return; + auto& queue = m_clustersets[level].m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)]; + while (!queue.empty()) { + MakeAcceptable(*queue.back().get()); + } +} + Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept { // Create a new transaction in the DepGraph, and remember its position in m_mapping. @@ -1900,6 +1914,13 @@ void TxGraphImpl::SanityCheck() const } } +void TxGraphImpl::DoWork() noexcept +{ + for (int level = 0; level < int(m_clustersets.size()); ++level) { + MakeAllAcceptable(level); + } +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 554b3ec1cf3..27f819afb82 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -99,6 +99,11 @@ public: * effect. */ virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + /** TxGraph is internally lazy, and will not compute many things until they are needed. + * Calling DoWork will compute everything now, so that future operations are fast. This can be + * invoked while oversized. */ + virtual void DoWork() noexcept = 0; + /** Create a staging graph (which cannot exist already). This acts as if a full copy of * the transaction graph is made, upon which further modifications are made. This copy can * be inspected, and then either discarded, or the main graph can be replaced by it by From 0007de40981fd40be512560c770516b1914a0265 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 16 Jan 2025 16:00:10 -0500 Subject: [PATCH 21/30] txgraph: (feature) Add CountDistinctClusters function --- src/test/fuzz/txgraph.cpp | 44 +++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 27 ++++++++++++++++++++++++ src/txgraph.h | 5 +++++ 3 files changed, 76 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 2b5e3c0e8c2..2cb620d3b78 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -526,6 +526,50 @@ FUZZ_TARGET(txgraph) // these here without making more calls to real, which could affect its internal // state. A full comparison is done at the end. break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // CountDistinctClusters. + std::vector refs; + // Gather a list of up to 15 (or up to 255) Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, alt ? 255 : 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function. + auto result = real->CountDistinctClusters(refs, use_main); + // Build a vector with representatives of the clusters the Refs occur in in the + // simulated graph. For each, remember the lowest-index transaction SimPos in the + // cluster. + std::vector sim_reps; + for (auto ref : refs) { + // Skip Refs that do not occur in the simulated graph. + auto simpos = sel_sim.Find(ref); + if (simpos == SimTxGraph::MISSING) continue; + // Start with component equal to just the Ref's SimPos. + auto component = SimTxGraph::SetType::Singleton(simpos); + // Keep adding ancestors/descendants of all elements in component until it no + // longer changes. + while (true) { + auto old_component = component; + for (auto i : component) { + component |= sel_sim.graph.Ancestors(i); + component |= sel_sim.graph.Descendants(i); + } + if (component == old_component) break; + } + // Remember the lowest-index SimPos in component, as a representative for it. + assert(component.Any()); + sim_reps.push_back(component.First()); + } + // Remove duplicates from sim_reps. + std::sort(sim_reps.begin(), sim_reps.end()); + sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end()); + // Compare the number of deduplicated representatives with the value returned by + // the real function. + assert(result == sim_reps.size()); + break; } else if (command-- == 0) { // DoWork. real->DoWork(); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 9f05021bffb..4b5dc77f633 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -440,6 +440,7 @@ public: GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; + GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; void SanityCheck() const final; }; @@ -1739,6 +1740,32 @@ std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) n return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; } +TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span refs, bool main_only) noexcept +{ + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Build a vector of Clusters that the specified Refs occur in. + std::vector clusters; + clusters.reserve(refs.size()); + for (const Ref* ref : refs) { + if (ref == nullptr) continue; + if (GetRefGraph(*ref) == nullptr) continue; + Assume(GetRefGraph(*ref) == this); + auto cluster = FindCluster(GetRefIndex(*ref), level); + if (cluster != nullptr) clusters.push_back(cluster); + } + // Count the number of distinct elements in clusters. + std::sort(clusters.begin(), clusters.end()); + Cluster* last{nullptr}; + GraphIndex ret{0}; + for (Cluster* cluster : clusters) { + ret += (cluster != last); + last = cluster; + } + return ret; +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). diff --git a/src/txgraph.h b/src/txgraph.h index 27f819afb82..559172d209b 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -155,6 +155,11 @@ public: * from high to low chunk feerate). Both transactions must be in the main graph. The main * graph must not be oversized. */ virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; + /** Count the number of distinct clusters that the specified transactions belong to. If + * main_only is false and a staging graph exists, staging clusters are counted. Otherwise, + * main clusters are counted. Refs that do not exist in the graph are not counted. The + * queried graph must not be oversized. */ + virtual GraphIndex CountDistinctClusters(std::span, bool main_only = false) noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 3ee5a6e912c122f69216488b4f76c37e6add4b8f Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 5 Feb 2025 22:53:45 -0500 Subject: [PATCH 22/30] txgraph: (preparation) multiple inputs to Get{Ancestors,Descendant}Refs This is a preparation for the next commit, which adds a feature to request the Refs to multiple ancestors/descendants at once. --- src/txgraph.cpp | 58 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 4b5dc77f633..4659c029871 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -139,10 +139,12 @@ public: // Functions that implement the Cluster-specific side of public TxGraph functions. - /** Get a vector of Refs for the ancestors of a given Cluster element. */ - std::vector GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; - /** Get a vector of Refs for the descendants of a given Cluster element. */ - std::vector GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept; + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their ancestors to output. */ + void GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their descendants to output. */ + void GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; /** Get the individual transaction feerate of a Cluster element. */ @@ -1423,30 +1425,42 @@ bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept return cluster != nullptr; } -std::vector Cluster::GetAncestorRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +void Cluster::GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept { - std::vector ret; - ret.reserve(m_depgraph.Ancestors(idx).Count()); + /** The union of all ancestors to be returned. */ + SetType ancestors_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + ancestors_union |= m_depgraph.Ancestors(args.front().second); + args = args.subspan(1); + } + Assume(ancestors_union.Any()); // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. - for (auto idx : m_depgraph.Ancestors(idx)) { + for (auto idx : ancestors_union) { const auto& entry = graph.m_entries[m_mapping[idx]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + output.push_back(entry.m_ref); } - return ret; } -std::vector Cluster::GetDescendantRefs(const TxGraphImpl& graph, DepGraphIndex idx) noexcept +void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept { - std::vector ret; - ret.reserve(m_depgraph.Descendants(idx).Count()); + /** The union of all descendants to be returned. */ + SetType descendants_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + descendants_union |= m_depgraph.Descendants(args.front().second); + args = args.subspan(1); + } + Assume(descendants_union.Any()); // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. - for (auto idx : m_depgraph.Descendants(idx)) { + for (auto idx : descendants_union) { const auto& entry = graph.m_entries[m_mapping[idx]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + output.push_back(entry.m_ref); } - return ret; } std::vector Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept @@ -1492,7 +1506,11 @@ std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_o auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetAncestorRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetAncestorRefs(*this, matches, ret); + return ret; } std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept @@ -1509,7 +1527,11 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main auto cluster = FindCluster(GetRefIndex(arg), level); if (cluster == nullptr) return {}; // Dispatch to the Cluster. - return cluster->GetDescendantRefs(*this, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetDescendantRefs(*this, matches, ret); + return ret; } std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept From 72a97c0a07ea6e5a95ab37c8d95e1ea02cff8e92 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 5 Feb 2025 22:52:58 -0500 Subject: [PATCH 23/30] txgraph: (feature) Get{Ancestors,Descendants}Union --- src/test/fuzz/txgraph.cpp | 22 +++++++++++++ src/txgraph.cpp | 66 +++++++++++++++++++++++++++++++++++++++ src/txgraph.h | 6 ++++ 3 files changed, 94 insertions(+) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 2cb620d3b78..a9fb54836c8 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -454,6 +454,28 @@ FUZZ_TARGET(txgraph) auto expect_set = sel_sim.GetAncDesc(ref, alt); assert(result_set == expect_set); break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // GetAncestorsUnion/GetDescendantsUnion. + std::vector refs; + // Gather a list of up to 15 Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function, and convert to SimPos set. + auto result = alt ? real->GetDescendantsUnion(refs, use_main) + : real->GetAncestorsUnion(refs, use_main); + auto result_set = sel_sim.MakeSet(result); + assert(result.size() == result_set.Count()); + // Compute the expected result. + SimTxGraph::SetType expect_set; + for (TxGraph::Ref* ref : refs) expect_set |= sel_sim.GetAncDesc(ref, alt); + // Compare. + assert(result_set == expect_set); + break; } else if (!sel_sim.IsOversized() && command-- == 0) { // GetCluster. auto ref = pick_fn(); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 4659c029871..38da1ad1650 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -439,6 +439,8 @@ public: std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept final; + std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept final; GraphIndex GetTransactionCount(bool main_only = false) noexcept final; bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; @@ -1534,6 +1536,70 @@ std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main return ret; } +std::vector TxGraphImpl::GetAncestorsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetAncestorRefs(*this, match_span, ret); + } + return ret; +} + +std::vector TxGraphImpl::GetDescendantsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetDescendantRefs(*this, match_span, ret); + } + return ret; +} + std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept { // Return the empty vector if the Ref is empty (which may be indicative of the transaction diff --git a/src/txgraph.h b/src/txgraph.h index 559172d209b..d59d8efd1d8 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -143,10 +143,16 @@ public: * staging graph exists, it is queried; otherwise the main graph is queried. The queried * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ virtual std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept = 0; + /** Like GetAncestors, but return the Refs for all transactions in the union of the provided + * arguments' ancestors (each transaction is only reported once). */ + virtual std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept = 0; /** Get pointers to all descendants of the specified transaction. If main_only is false and a * staging graph exists, it is queried; otherwise the main graph is queried. The queried * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Like GetDescendants, but return the Refs for all transactions in the union of the provided + * arguments' descendants (each transaction is only reported once). */ + virtual std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept = 0; /** Get the total number of transactions in the graph. If main_only is false and a staging * graph exists, it is queried; otherwise the main graph is queried. This is available even * for oversized graphs. */ From 22821d0bd5acb523b83ff4b961838d4ded840e6a Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 28 Nov 2024 10:40:42 -0500 Subject: [PATCH 24/30] txgraph: (feature) Add GetMainStagingDiagrams function This allows determining whether the changes in a staging diagram unambiguously improve the graph, through CompareChunks(). --- src/test/fuzz/txgraph.cpp | 109 ++++++++++++++++++++++++++++++++++++++ src/txgraph.cpp | 38 +++++++++++++ src/txgraph.h | 11 ++-- src/util/feefrac.cpp | 2 +- 4 files changed, 156 insertions(+), 4 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index a9fb54836c8..d2d4c0adcf1 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -83,6 +84,16 @@ struct SimTxGraph /** Determine the number of (non-removed) transactions in the graph. */ DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } + /** Get the sum of all fees/sizes in the graph. */ + FeePerWeight SumAll() const + { + FeePerWeight ret; + for (auto i : graph.Positions()) { + ret += graph.FeeRate(i); + } + return ret; + } + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ Pos Find(const TxGraph::Ref* ref) const { @@ -279,6 +290,40 @@ FUZZ_TARGET(txgraph) return &empty_ref; }; + /** Function to construct the full diagram for a simulated graph. This works by fetching the + * clusters and chunking them manually, so it works for both main and staging + * (GetMainChunkFeerate only works for main). */ + auto get_diagram_fn = [&](bool main_only) -> std::vector { + int level = main_only ? 0 : sims.size() - 1; + auto& sim = sims[level]; + // For every transaction in the graph, request its cluster, and throw them into a set. + std::set> clusters; + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + clusters.insert(real->GetCluster(*ref, main_only)); + } + // Compute the chunkings of each (deduplicated) cluster. + size_t num_tx{0}; + std::vector ret; + for (const auto& cluster : clusters) { + num_tx += cluster.size(); + std::vector linearization; + linearization.reserve(cluster.size()); + for (auto refptr : cluster) linearization.push_back(sim.Find(refptr)); + for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) { + ret.push_back(chunk_feerate); + } + } + // Verify the number of transactions after deduplicating clusters. This implicitly verifies + // that GetCluster on each element of a cluster reports the cluster transactions in the same + // order. + assert(num_tx == sim.GetTransactionCount()); + // Sort by feerate (we don't care about respecting ordering within clusters, as these are + // just feerates). + std::sort(ret.begin(), ret.end(), std::greater{}); + return ret; + }; + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); @@ -441,6 +486,7 @@ FUZZ_TARGET(txgraph) // Just do some quick checks that the reported value is in range. A full // recomputation of expected chunk feerates is done at the end. assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); + assert(feerate.size <= main_sim.SumAll().size); } break; } else if (!sel_sim.IsOversized() && command-- == 0) { @@ -596,6 +642,25 @@ FUZZ_TARGET(txgraph) // DoWork. real->DoWork(); break; + } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) { + // GetMainStagingDiagrams() + auto [main_diagram, staged_diagram] = real->GetMainStagingDiagrams(); + auto sum_main = std::accumulate(main_diagram.begin(), main_diagram.end(), FeeFrac{}); + auto sum_staged = std::accumulate(staged_diagram.begin(), staged_diagram.end(), FeeFrac{}); + auto diagram_gain = sum_staged - sum_main; + auto real_gain = sims[1].SumAll() - sims[0].SumAll(); + // Just check that the total fee gained/lost and size gained/lost according to the + // diagram matches the difference in these values in the simulated graph. A more + // complete check of the GetMainStagingDiagrams result is performed at the end. + assert(diagram_gain == real_gain); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_diagram.size(); ++i) { + assert(FeeRateCompare(main_diagram[i], main_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < staged_diagram.size(); ++i) { + assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0); + } + break; } } } @@ -649,6 +714,50 @@ FUZZ_TARGET(txgraph) assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); } } + + // Check that the implied ordering gives rise to a combined diagram that matches the + // diagram constructed from the individual cluster linearization chunkings. + auto main_diagram = get_diagram_fn(true); + auto expected_main_diagram = ChunkLinearization(sims[0].graph, vec1); + assert(CompareChunks(main_diagram, expected_main_diagram) == 0); + + if (sims.size() >= 2 && !sims[1].IsOversized()) { + // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and + // fully verify the result. + auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams(); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0); + } + // Apply total ordering on the feerate diagrams to make them comparable (the exact + // tie breaker among equal-feerate FeeFracs does not matter, but it has to be + // consistent with the one used in main_diagram and stage_diagram). + std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{}); + std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{}); + // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram. + // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected + // by staging. + std::vector missing_main_cmp; + std::set_difference(main_diagram.begin(), main_diagram.end(), + main_cmp_diagram.begin(), main_cmp_diagram.end(), + std::inserter(missing_main_cmp, missing_main_cmp.end()), + std::greater{}); + assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_diagram.size()); + // Do the same for chunks in stage_diagram missign from stage_cmp_diagram. + auto stage_diagram = get_diagram_fn(false); + std::vector missing_stage_cmp; + std::set_difference(stage_diagram.begin(), stage_diagram.end(), + stage_cmp_diagram.begin(), stage_cmp_diagram.end(), + std::inserter(missing_stage_cmp, missing_stage_cmp.end()), + std::greater{}); + assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_diagram.size()); + // The missing chunks must be equal across main & staging (otherwise they couldn't have + // been omitted). + assert(missing_main_cmp == missing_stage_cmp); + } } assert(real->HaveStaging() == (sims.size() > 1)); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 38da1ad1650..faa17c38750 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -136,6 +136,8 @@ public: void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; /** Improve the linearization of this Cluster. */ void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + /** For every chunk in the cluster, append its FeeFrac to ret. */ + void AppendChunkFeerates(std::vector& ret) const noexcept; // Functions that implement the Cluster-specific side of public TxGraph functions. @@ -445,6 +447,7 @@ public: bool IsOversized(bool main_only = false) noexcept final; std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; + std::pair, std::vector> GetMainStagingDiagrams() noexcept final; void SanityCheck() const final; }; @@ -644,6 +647,14 @@ void Cluster::LevelDown(TxGraphImpl& graph) noexcept Updated(graph); } +void Cluster::AppendChunkFeerates(std::vector& ret) const noexcept +{ + auto chunk_feerates = ChunkLinearization(m_depgraph, m_linearization); + for (const auto& feerate : chunk_feerates) { + ret.push_back(feerate); + } +} + bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. @@ -1854,6 +1865,33 @@ TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span, std::vector> TxGraphImpl::GetMainStagingDiagrams() noexcept +{ + Assume(m_clustersets.size() >= 2); + MakeAllAcceptable(m_clustersets.size() - 2); + Assume(m_clustersets[m_clustersets.size() - 2].m_deps_to_add.empty()); + MakeAllAcceptable(m_clustersets.size() - 1); + Assume(m_clustersets[m_clustersets.size() - 1].m_deps_to_add.empty()); + // For all Clusters in main which conflict with Clusters in staging (i.e., all that are removed + // by, or replaced in, staging), gather their chunk feerates. + auto main_clusters = GetConflicts(); + std::vector main_feerates, staging_feerates; + for (Cluster* cluster : main_clusters) { + cluster->AppendChunkFeerates(main_feerates); + } + // Do the same for the Clusters in staging themselves. + const auto& staging = m_clustersets.back(); + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (const auto& cluster : staging.m_clusters[quality]) { + cluster->AppendChunkFeerates(staging_feerates); + } + } + // Sort both by decreasing feerate to obtain diagrams, and return them. + std::sort(main_feerates.begin(), main_feerates.end(), [](auto& a, auto& b) { return a >> b; }); + std::sort(staging_feerates.begin(), staging_feerates.end(), [](auto& a, auto& b) { return a >> b; }); + return std::make_pair(std::move(main_feerates), std::move(staging_feerates)); +} + void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const { // There must be an m_mapping for each m_depgraph position (including holes). diff --git a/src/txgraph.h b/src/txgraph.h index d59d8efd1d8..02bf6d5855b 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -127,10 +127,11 @@ public: * is aborted or committed. */ virtual bool IsOversized(bool main_only = false) noexcept = 0; /** Get the feerate of the chunk which transaction arg is in the main graph. Returns the empty - * FeeFrac if arg does not exist in the main graph. The main graph must not be oversized. */ + * FeePerWeight if arg does not exist in the main graph. The main graph must not be + * oversized. */ virtual FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept = 0; - /** Get the individual transaction feerate of transaction arg. Returns the empty FeeFrac if - * arg does not exist in either main or staging. This is available even for oversized + /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight + * if arg does not exist in either main or staging. This is available even for oversized * graphs. */ virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; /** Get pointers to all transactions in the connected component ("cluster") which arg is in. @@ -166,6 +167,10 @@ public: * main clusters are counted. Refs that do not exist in the graph are not counted. The * queried graph must not be oversized. */ virtual GraphIndex CountDistinctClusters(std::span, bool main_only = false) noexcept = 0; + /** Get feerate diagrams for both main and staging (which must both exist and not be + * oversized), ignoring unmodified components in both. Use FeeFrac rather than FeePerWeight + * so CompareChunks is usable without type-conversion. */ + virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; diff --git a/src/util/feefrac.cpp b/src/util/feefrac.cpp index 5b6173835cb..96cb1aef2d9 100644 --- a/src/util/feefrac.cpp +++ b/src/util/feefrac.cpp @@ -36,7 +36,7 @@ std::partial_ordering CompareChunks(Span chunks0, Span Date: Thu, 14 Nov 2024 15:54:03 -0500 Subject: [PATCH 25/30] txgraph: (preparation) maintain chunk index This is preparation for exposing mining and eviction functionality in TxGraph. --- src/txgraph.cpp | 117 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 112 insertions(+), 5 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index faa17c38750..29781defe18 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -241,6 +241,49 @@ private: /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ std::vector m_clustersets; + /** Information about a chunk in the main graph. */ + struct ChunkData + { + /** The Entry which is the last transaction of the chunk. */ + mutable GraphIndex m_graph_index; + /** How many transactions the chunk contains. */ + LinearizationIndex m_chunk_count; + + ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept : + m_graph_index{graph_index}, m_chunk_count{chunk_count} {} + }; + + /** Comparator for ChunkData objects in mining order. */ + class ChunkOrder + { + const TxGraphImpl* const m_graph; + public: + explicit ChunkOrder(const TxGraphImpl* graph) : m_graph(graph) {} + + bool operator()(const ChunkData& a, const ChunkData& b) const noexcept + { + const auto& a_entry = m_graph->m_entries[a.m_graph_index]; + const auto& b_entry = m_graph->m_entries[b.m_graph_index]; + // First sort from high feerate to low feerate. + auto cmp_feerate = FeeRateCompare(a_entry.m_main_chunk_feerate, b_entry.m_main_chunk_feerate); + if (cmp_feerate != 0) return cmp_feerate > 0; + // Then sort by increasing Cluster pointer. + Assume(a_entry.m_locator[0].IsPresent()); + Assume(b_entry.m_locator[0].IsPresent()); + if (a_entry.m_locator[0].cluster != b_entry.m_locator[0].cluster) { + return std::less{}(a_entry.m_locator[0].cluster, b_entry.m_locator[0].cluster); + } + // Finally sort by position within the Cluster. + return a_entry.m_main_lin_index < b_entry.m_main_lin_index; + } + }; + + /** Definition for the mining index type. */ + using ChunkIndex = std::set; + + /** Index of ChunkData objects. */ + ChunkIndex m_chunkindex; + /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. * @@ -298,6 +341,8 @@ private: { /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ Ref* m_ref{nullptr}; + /** Iterator to the corresponding ChunkData, if any. */ + ChunkIndex::iterator m_chunkindex_iterator; /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ Locator m_locator[MAX_LEVELS]; /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ @@ -315,7 +360,8 @@ private: public: /** Construct a new TxGraphImpl with the specified maximum cluster count. */ explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept : - m_max_cluster_count(max_cluster_count) + m_max_cluster_count(max_cluster_count), + m_chunkindex(ChunkOrder(this)) { Assume(max_cluster_count >= 1); Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); @@ -476,6 +522,10 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept --m_clustersets[after_level].m_txcount; } } + if (level == 0 && entry.m_chunkindex_iterator != m_chunkindex.end()) { + m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = m_chunkindex.end(); + } } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -483,6 +533,12 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (DepGraphIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + // Destroy any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or @@ -496,9 +552,10 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Iterate over the chunks. for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { auto chunk = chunking.GetChunk(chunk_idx); - Assume(chunk.transactions.Any()); + auto chunk_count = chunk.transactions.Count(); + Assume(chunk_count > 0); // Iterate over the transactions in the linearization, which must match those in chunk. - do { + while (true) { DepGraphIndex idx = m_linearization[lin_idx]; GraphIndex graph_idx = m_mapping[idx]; auto& entry = graph.m_entries[graph_idx]; @@ -506,7 +563,14 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); Assume(chunk.transactions[idx]); chunk.transactions.Reset(idx); - } while(chunk.transactions.Any()); + if (chunk.transactions.None()) { + // Last transaction in the chunk. + auto [it, inserted] = graph.m_chunkindex.emplace(graph_idx, chunk_count); + Assume(inserted); + entry.m_chunkindex_iterator = it; + break; + } + } } } } @@ -761,7 +825,14 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // Update the transaction's Locator. There is no need to call Updated() to update chunk // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). - graph.m_entries[idx].m_locator[m_level].SetPresent(this, new_pos); + auto& entry = graph.m_entries[idx]; + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + // Destroy any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } + entry.m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. other.m_depgraph = DepGraph{}; @@ -965,6 +1036,10 @@ void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept Entry& entry = m_entries[idx]; // Update linked Ref, if any exists. if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update linked chunk index entries, if any exist. + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + entry.m_chunkindex_iterator->m_graph_index = idx; + } // Update the locators for both levels. The rest of the Entry information will not change, // so no need to invoke Cluster::Updated(). for (int level = 0; level < MAX_LEVELS; ++level) { @@ -1374,6 +1449,7 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept auto idx = m_entries.size(); m_entries.emplace_back(); auto& entry = m_entries.back(); + entry.m_chunkindex_iterator = m_chunkindex.end(); entry.m_ref = &ret; GetRefGraph(ret) = this; GetRefIndex(ret) = idx; @@ -1502,6 +1578,10 @@ void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept GraphIndex idx = m_mapping[ci]; auto& entry = graph.m_entries[idx]; entry.m_locator[m_level].SetMissing(); + if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + graph.m_chunkindex.erase(entry.m_chunkindex_iterator); + entry.m_chunkindex_iterator = graph.m_chunkindex.end(); + } } } @@ -1910,6 +1990,7 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const // Verify m_linearization. SetType m_done; LinearizationIndex linindex{0}; + DepGraphIndex chunk_pos{0}; //!< position within the current chunk assert(m_depgraph.IsAcyclic()); for (auto lin_pos : m_linearization) { assert(lin_pos < m_mapping.size()); @@ -1925,8 +2006,13 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(entry.m_main_lin_index == linindex++); if (!linchunking.GetChunk(0).transactions[lin_pos]) { linchunking.MarkDone(linchunking.GetChunk(0).transactions); + chunk_pos = 0; } assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); + // Verify that an entry in the chunk index exists for every chunk-ending transaction. + ++chunk_pos; + bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count()); + assert((entry.m_chunkindex_iterator != graph.m_chunkindex.end()) == is_chunk_end); // If this Cluster has an acceptable quality level, its chunks must be connected. assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); } @@ -1943,6 +2029,8 @@ void TxGraphImpl::SanityCheck() const std::set expected_clusters[MAX_LEVELS]; /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ std::set expected_removed[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in m_chunkindex, based on m_entries. */ + std::set expected_chunkindex; /** Whether compaction is possible in the current state. */ bool compact_possible{true}; @@ -1957,6 +2045,11 @@ void TxGraphImpl::SanityCheck() const assert(GetRefGraph(*entry.m_ref) == this); assert(GetRefIndex(*entry.m_ref) == idx); } + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + // Remember which entries we see a chunkindex entry for. + assert(entry.m_locator[0].IsPresent()); + expected_chunkindex.insert(idx); + } // Verify the Entry m_locators. bool was_present{false}, was_removed{false}; for (int level = 0; level < MAX_LEVELS; ++level) { @@ -2065,6 +2158,20 @@ void TxGraphImpl::SanityCheck() const if (compact_possible) { assert(actual_unlinked.empty()); } + + // Finally, check the chunk index. + std::set actual_chunkindex; + FeeFrac last_chunk_feerate; + for (const auto& chunk : m_chunkindex) { + GraphIndex idx = chunk.m_graph_index; + actual_chunkindex.insert(idx); + auto chunk_feerate = m_entries[idx].m_main_chunk_feerate; + if (!last_chunk_feerate.IsEmpty()) { + assert(FeeRateCompare(last_chunk_feerate, chunk_feerate) >= 0); + } + last_chunk_feerate = chunk_feerate; + } + assert(actual_chunkindex == expected_chunkindex); } void TxGraphImpl::DoWork() noexcept From dd7995b69d576bc2b500d0387293ac3baa151f8d Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Mon, 25 Nov 2024 11:31:02 -0500 Subject: [PATCH 26/30] txgraph: (feature) introduce BlockBuilder interface This interface lets one iterate efficiently over the chunks of the main graph in a TxGraph, in the same order as CompareMainOrder. Each chunk can be marked as "included" or "skipped" (and in the latter case, dependent chunks will be skipped). --- src/test/fuzz/txgraph.cpp | 95 +++++++++++++++++++++++++-- src/txgraph.cpp | 134 ++++++++++++++++++++++++++++++++++---- src/txgraph.h | 31 ++++++++- 3 files changed, 241 insertions(+), 19 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index d2d4c0adcf1..4e87f9dd71e 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -253,6 +253,22 @@ FUZZ_TARGET(txgraph) sims.reserve(2); sims.emplace_back(max_count); + /** Struct encapsulating information about a BlockBuilder that's currently live. */ + struct BlockBuilderData + { + /** BlockBuilder object from real. */ + std::unique_ptr builder; + /** The set of transactions marked as included in *builder. */ + SimTxGraph::SetType done; + /** The last chunk feerate returned by *builder. IsEmpty() if none yet. */ + FeePerWeight last_feerate; + + BlockBuilderData(std::unique_ptr builder_in) : builder(std::move(builder_in)) {} + }; + + /** Currently active block builders. */ + std::vector block_builders; + /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the * empty Ref). */ auto pick_fn = [&]() noexcept -> TxGraph::Ref* { @@ -327,6 +343,11 @@ FUZZ_TARGET(txgraph) LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { // Read a one-byte command. int command = provider.ConsumeIntegral(); + + /** Use the bottom 2 bits of command to select an entry in the block_builders vector (if + * any). */ + int builder_idx = block_builders.empty() ? -1 : int((command & 3) % block_builders.size()); + // Treat the lowest bit of a command as a flag (which selects a variant of some of the // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave // the rest of the bits in command. @@ -344,7 +365,7 @@ FUZZ_TARGET(txgraph) // Keep decrementing command for each applicable operation, until one is hit. Multiple // iterations may be necessary. while (true) { - if (top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { // AddTransaction. int64_t fee; int32_t size; @@ -366,7 +387,7 @@ FUZZ_TARGET(txgraph) // Move it in place. *ref_loc = std::move(ref); break; - } else if (top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { + } else if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { // AddDependency. auto par = pick_fn(); auto chl = pick_fn(); @@ -380,7 +401,7 @@ FUZZ_TARGET(txgraph) top_sim.AddDependency(par, chl); real->AddDependency(*par, *chl); break; - } else if (top_sim.removed.size() < 100 && command-- == 0) { + } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) { // RemoveTransaction. Either all its ancestors or all its descendants are also // removed (if any), to make sure TxGraph's reordering of removals and dependencies // has no effect. @@ -410,7 +431,7 @@ FUZZ_TARGET(txgraph) } sel_sim.removed.pop_back(); break; - } else if (command-- == 0) { + } else if (block_builders.empty() && command-- == 0) { // ~Ref (of any transaction). std::vector to_destroy; to_destroy.push_back(pick_fn()); @@ -432,7 +453,7 @@ FUZZ_TARGET(txgraph) } } break; - } else if (command-- == 0) { + } else if (block_builders.empty() && command-- == 0) { // SetTransactionFee. int64_t fee; if (alt) { @@ -562,7 +583,7 @@ FUZZ_TARGET(txgraph) sims.emplace_back(sims.back()); real->StartStaging(); break; - } else if (sims.size() > 1 && command-- == 0) { + } else if (block_builders.empty() && sims.size() > 1 && command-- == 0) { // CommitStaging. real->CommitStaging(); sims.erase(sims.begin()); @@ -661,6 +682,47 @@ FUZZ_TARGET(txgraph) assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0); } break; + } else if (block_builders.size() < 4 && !main_sim.IsOversized() && command-- == 0) { + // GetBlockBuilder. + block_builders.emplace_back(real->GetBlockBuilder()); + break; + } else if (!block_builders.empty() && command-- == 0) { + // ~BlockBuilder. + block_builders.erase(block_builders.begin() + builder_idx); + break; + } else if (!block_builders.empty() && *block_builders[builder_idx].builder && command-- == 0) { + // BlockBuilder::Include and BlockBuilder::Skip. + auto& builder_data = block_builders[builder_idx]; + auto cur_feerate = builder_data.builder->GetCurrentChunkFeerate(); + // Chunk feerates must be monotonously decreasing. + if (!builder_data.last_feerate.IsEmpty()) { + assert(!(cur_feerate >> builder_data.last_feerate)); + } + builder_data.last_feerate = cur_feerate; + // Verify the contents of GetCurrentChunk. + auto new_done = builder_data.done; + FeePerWeight sum_feerate; + for (TxGraph::Ref* ref : builder_data.builder->GetCurrentChunk()) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(ref); + assert(simpos != SimTxGraph::MISSING); + // Verify the claimed chunk feerate. + sum_feerate += main_sim.graph.FeeRate(simpos); + // Make sure no transaction is reported twice. + assert(!new_done[simpos]); + new_done.Set(simpos); + // The concatenation of all included transactions must be topologically valid. + assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(new_done)); + } + assert(sum_feerate == cur_feerate); + // Skip or Include. + if (alt) { + builder_data.builder->Skip(); + } else { + builder_data.builder->Include(); + builder_data.done = new_done; + } + break; } } } @@ -715,6 +777,27 @@ FUZZ_TARGET(txgraph) } } + // The same order should be obtained through a BlockBuilder, if nothing is skipped. + auto builder = real->GetBlockBuilder(); + std::vector vec_builder; + while (*builder) { + FeePerWeight sum; + for (TxGraph::Ref* ref : builder->GetCurrentChunk()) { + // The reported chunk feerate must match the chunk feerate obtained by asking + // it for each of the chunk's transactions individually. + assert(real->GetMainChunkFeerate(*ref) == builder->GetCurrentChunkFeerate()); + // Verify the chunk feerate matches the sum of the reported individual feerates. + sum += real->GetIndividualFeerate(*ref); + // Chunks must contain transactions that exist in the graph. + auto simpos = sims[0].Find(ref); + assert(simpos != SimTxGraph::MISSING); + vec_builder.push_back(simpos); + } + assert(sum == builder->GetCurrentChunkFeerate()); + builder->Include(); + } + assert(vec_builder == vec1); + // Check that the implied ordering gives rise to a combined diagram that matches the // diagram constructed from the individual cluster linearization chunkings. auto main_diagram = get_diagram_fn(true); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 29781defe18..b03b557823a 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -148,7 +148,7 @@ public: * union of their descendants to output. */ void GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ - std::vector GetClusterRefs(const TxGraphImpl& graph) noexcept; + void GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; /** Get the individual transaction feerate of a Cluster element. */ FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; /** Modify the fee of a Cluster element. */ @@ -185,6 +185,7 @@ public: class TxGraphImpl final : public TxGraph { friend class Cluster; + friend class BlockBuilderImpl; private: /** Internal RNG. */ FastRandomContext m_rng; @@ -283,6 +284,8 @@ private: /** Index of ChunkData objects. */ ChunkIndex m_chunkindex; + /** Number of index-observing objects in existence (BlockBuilderImpl). */ + size_t m_chunkindex_observers{0}; /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. @@ -410,6 +413,7 @@ public: { auto& entry = m_entries[idx]; Assume(entry.m_ref != nullptr); + Assume(m_chunkindex_observers == 0 || !entry.m_locator[0].IsPresent()); entry.m_ref = nullptr; // Mark the transaction as to be removed in all levels where it explicitly or implicitly // exists. @@ -495,9 +499,42 @@ public: GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; std::pair, std::vector> GetMainStagingDiagrams() noexcept final; + std::unique_ptr GetBlockBuilder() noexcept final; + void SanityCheck() const final; }; +/** Implementation of the TxGraph::BlockBuilder interface. */ +class BlockBuilderImpl final : public TxGraph::BlockBuilder +{ + /** Which TxGraphImpl this object is doing block building for. It will have its + * m_chunkindex_observers incremented as long as this BlockBuilderImpl exists. */ + TxGraphImpl* const m_graph; + /** Vector for actual storage pointed to by TxGraph::BlockBuilder::m_current_chunk. */ + std::vector m_chunkdata; + /** Which cluster the current chunk belongs to, so we can exclude further transaction from it + * when that chunk is skipped. */ + Cluster* m_remaining_cluster{nullptr}; + /** Clusters which we're not including further transactions from. */ + std::set m_excluded_clusters; + /** Iterator to the next chunk (after the current one) in the chunk index. end() if nothing + * further remains. */ + TxGraphImpl::ChunkIndex::const_iterator m_next_iter; + + /** Fill in information about the current chunk in m_current_chunk, m_chunkdata, + * m_remaining_cluster, and update m_next_iter. */ + void Next() noexcept; + +public: + /** Construct a new BlockBuilderImpl to build blocks for the provided graph. */ + BlockBuilderImpl(TxGraphImpl& graph) noexcept; + + // Implement the public interface. + ~BlockBuilderImpl() final; + void Include() noexcept final; + void Skip() noexcept final; +}; + void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; @@ -523,6 +560,7 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept } } if (level == 0 && entry.m_chunkindex_iterator != m_chunkindex.end()) { + Assume(m_chunkindex_observers == 0); m_chunkindex.erase(entry.m_chunkindex_iterator); entry.m_chunkindex_iterator = m_chunkindex.end(); } @@ -536,6 +574,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { // Destroy any potential ChunkData prior to modifying the Cluster (as that could // invalidate its ordering). + Assume(graph.m_chunkindex_observers == 0); graph.m_chunkindex.erase(entry.m_chunkindex_iterator); entry.m_chunkindex_iterator = graph.m_chunkindex.end(); } @@ -829,6 +868,7 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { // Destroy any potential ChunkData prior to modifying the Cluster (as that could // invalidate its ordering). + Assume(graph.m_chunkindex_observers == 0); graph.m_chunkindex.erase(entry.m_chunkindex_iterator); entry.m_chunkindex_iterator = graph.m_chunkindex.end(); } @@ -1443,6 +1483,7 @@ Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex gra TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept { + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Construct a new Ref. Ref ret; // Construct a new Entry, and link it with the Ref. @@ -1470,6 +1511,7 @@ void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept // having been removed). if (GetRefGraph(arg) == nullptr) return; Assume(GetRefGraph(arg) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Find the Cluster the transaction is in, and stop if it isn't in any. auto cluster = FindCluster(GetRefIndex(arg), m_clustersets.size() - 1); if (cluster == nullptr) return; @@ -1487,6 +1529,7 @@ void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept // removed). if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); // Don't do anything if this is a dependency on self. if (GetRefIndex(parent) == GetRefIndex(child)) return; // Find the Cluster the parent and child transaction are in, and stop if either appears to be @@ -1552,17 +1595,15 @@ void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span Cluster::GetClusterRefs(const TxGraphImpl& graph) noexcept +void Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept { - std::vector ret; - ret.reserve(m_linearization.size()); - // Translate all transactions in the Cluster (in linearization order) to Refs. - for (auto idx : m_linearization) { - const auto& entry = graph.m_entries[m_mapping[idx]]; + // Translate the transactions in the Cluster (in linearization order, starting at start_pos in + // the linearization) to Refs, and fill them in range. + for (auto& ref : range) { + const auto& entry = graph.m_entries[m_mapping[m_linearization[start_pos++]]]; Assume(entry.m_ref != nullptr); - ret.push_back(entry.m_ref); + ref = entry.m_ref; } - return ret; } FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept @@ -1579,6 +1620,7 @@ void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept auto& entry = graph.m_entries[idx]; entry.m_locator[m_level].SetMissing(); if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { + Assume(graph.m_chunkindex_observers == 0); graph.m_chunkindex.erase(entry.m_chunkindex_iterator); entry.m_chunkindex_iterator = graph.m_chunkindex.end(); } @@ -1707,7 +1749,9 @@ std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_onl if (cluster == nullptr) return {}; // Make sure the Cluster has an acceptable quality level, and then dispatch to it. MakeAcceptable(*cluster); - return cluster->GetClusterRefs(*this); + std::vector ret(cluster->GetTxCount()); + cluster->GetClusterRefs(*this, ret, 0); + return ret; } TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept @@ -1830,6 +1874,7 @@ void TxGraphImpl::CommitStaging() noexcept int main_level = stage_level - 1; auto& stage = m_clustersets[stage_level]; auto& main = m_clustersets[main_level]; + Assume(m_chunkindex_observers == 0 || main_level > 0); // Delete all conflicting Clusters in main_level, to make place for moving the staging ones // there. All of these have been PullIn()'d to stage_level before. auto conflicts = GetConflicts(); @@ -1882,6 +1927,7 @@ void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept // Don't do anything if the passed Ref is empty. if (GetRefGraph(ref) == nullptr) return; Assume(GetRefGraph(ref) == this); + Assume(m_chunkindex_observers == 0); // Find the entry, its locator, and inform its Cluster about the new feerate, if any. auto& entry = m_entries[GetRefIndex(ref)]; for (int level = 0; level < MAX_LEVELS; ++level) { @@ -2161,7 +2207,7 @@ void TxGraphImpl::SanityCheck() const // Finally, check the chunk index. std::set actual_chunkindex; - FeeFrac last_chunk_feerate; + FeePerWeight last_chunk_feerate; for (const auto& chunk : m_chunkindex) { GraphIndex idx = chunk.m_graph_index; actual_chunkindex.insert(idx); @@ -2177,10 +2223,74 @@ void TxGraphImpl::SanityCheck() const void TxGraphImpl::DoWork() noexcept { for (int level = 0; level < int(m_clustersets.size()); ++level) { - MakeAllAcceptable(level); + if (level > 0 || m_chunkindex_observers == 0) { + MakeAllAcceptable(level); + } } } +void BlockBuilderImpl::Next() noexcept +{ + while (m_next_iter != m_graph->m_chunkindex.end()) { + // Find the cluster pointed to by m_next_iter (and advance it). + const auto& chunk_data = *(m_next_iter++); + const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + // If we previously skipped a chunk from this cluster we cannot include more from it. + if (m_excluded_clusters.contains(cluster)) continue; + // Populate m_current_chunk. + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + m_remaining_cluster = cluster; + m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); + return; + } + // We reached the end of m_chunkindex. + m_current_chunk = std::nullopt; +} + +BlockBuilderImpl::BlockBuilderImpl(TxGraphImpl& graph) noexcept : m_graph(&graph) +{ + // Make sure all clusters in main are up to date, and acceptable. + m_graph->MakeAllAcceptable(0); + // There cannot remain any inapplicable dependencies. + Assume(m_graph->m_clustersets[0].m_deps_to_add.empty()); + // Remember that this object is observing the graph's index, so that we can detect concurrent + // modifications. + ++m_graph->m_chunkindex_observers; + // Find the first chunk. + m_next_iter = m_graph->m_chunkindex.begin(); + Next(); +} + +BlockBuilderImpl::~BlockBuilderImpl() +{ + Assume(m_graph->m_chunkindex_observers > 0); + // Permit modifications to the main graph again after destroying the BlockBuilderImpl. + --m_graph->m_chunkindex_observers; +} + +void BlockBuilderImpl::Include() noexcept +{ + // The actual inclusion of the chunk is done by the calling code. All we have to do is switch + // to the next chunk. + Next(); +} + +void BlockBuilderImpl::Skip() noexcept +{ + // When skipping a chunk we need to not include anything more of the cluster, as that could make + // the result topologically invalid. + m_excluded_clusters.insert(m_remaining_cluster); + Next(); +} + +std::unique_ptr TxGraphImpl::GetBlockBuilder() noexcept +{ + return std::make_unique(*this); +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index 02bf6d5855b..c0a3378d320 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -3,9 +3,11 @@ // file COPYING or http://www.opensource.org/licenses/mit-license.php. #include -#include #include +#include +#include #include +#include #include @@ -53,6 +55,29 @@ public: Ref(const Ref&) = delete; }; + /** Interface returned by GetBlockBuilder. */ + class BlockBuilder + { + protected: + /** The next chunk, in topological order plus feerate, or std::nullopt if done. */ + std::optional, FeePerWeight>> m_current_chunk; + /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */ + BlockBuilder() noexcept = default; + public: + /** Support safe inheritance. */ + virtual ~BlockBuilder() = default; + /** Determine whether there are more transactions to be included. */ + explicit operator bool() noexcept { return m_current_chunk.has_value(); } + /** Get the chunk that is currently suggested to be included. */ + const std::span& GetCurrentChunk() noexcept { return m_current_chunk->first; } + /** Get the feerate of the currently suggested chunk. */ + const FeePerWeight& GetCurrentChunkFeerate() noexcept { return m_current_chunk->second; } + /** Mark the current chunk as included, and progress to the next one. */ + virtual void Include() noexcept = 0; + /** Mark the current chunk as skipped, and progress to the next one. */ + virtual void Skip() noexcept = 0; + }; + protected: // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. friend class TxGraph::Ref; @@ -172,6 +197,10 @@ public: * so CompareChunks is usable without type-conversion. */ virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; + /** Construct a block builder, drawing from the main graph, which cannot be oversized. While + * the returned object exists, no mutators on the main graph are allowed. */ + virtual std::unique_ptr GetBlockBuilder() noexcept = 0; + /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; }; From 4bd61e33197161b681da1760f646ae03077575ef Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 8 Jan 2025 17:18:12 -0500 Subject: [PATCH 27/30] txgraph: (feature) introduce TxGraph::GetWorstMainChunk It returns the last chunk that would be suggested for mining by BlockBuilder objects. This is intended for eviction. --- src/test/fuzz/txgraph.cpp | 40 +++++++++++++++++++++++++++++++++++++-- src/txgraph.cpp | 21 ++++++++++++++++++++ src/txgraph.h | 4 ++++ 3 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index 4e87f9dd71e..b658a56e55d 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -723,6 +723,31 @@ FUZZ_TARGET(txgraph) builder_data.done = new_done; } break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetWorstMainChunk. + auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk(); + // Just do some sanity checks here. Consistency with GetBlockBuilder is checked + // below. + if (main_sim.GetTransactionCount() == 0) { + assert(worst_chunk.empty()); + } else { + assert(!worst_chunk.empty()); + SimTxGraph::SetType done; + FeePerWeight sum; + for (TxGraph::Ref* ref : worst_chunk) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(ref); + assert(simpos != SimTxGraph::MISSING); + sum += main_sim.graph.FeeRate(simpos); + // Make sure the chunk contains no duplicate transactions. + assert(!done[simpos]); + done.Set(simpos); + // All elements are preceded by all their descendants. + assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done)); + } + assert(sum == worst_chunk_feerate); + } + break; } } } @@ -780,9 +805,13 @@ FUZZ_TARGET(txgraph) // The same order should be obtained through a BlockBuilder, if nothing is skipped. auto builder = real->GetBlockBuilder(); std::vector vec_builder; + std::vector chunk; + FeePerWeight chunk_feerate; while (*builder) { FeePerWeight sum; - for (TxGraph::Ref* ref : builder->GetCurrentChunk()) { + auto chunk_span = builder->GetCurrentChunk(); + chunk.assign(chunk_span.begin(), chunk_span.end()); + for (TxGraph::Ref* ref : chunk_span) { // The reported chunk feerate must match the chunk feerate obtained by asking // it for each of the chunk's transactions individually. assert(real->GetMainChunkFeerate(*ref) == builder->GetCurrentChunkFeerate()); @@ -793,11 +822,18 @@ FUZZ_TARGET(txgraph) assert(simpos != SimTxGraph::MISSING); vec_builder.push_back(simpos); } - assert(sum == builder->GetCurrentChunkFeerate()); + chunk_feerate = builder->GetCurrentChunkFeerate(); + assert(sum == chunk_feerate); builder->Include(); } assert(vec_builder == vec1); + // The last chunk returned by the BlockBuilder must match GetWorstMainChunk, in reverse. + std::reverse(chunk.begin(), chunk.end()); + auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk(); + assert(chunk == worst_chunk); + assert(chunk_feerate == worst_chunk_feerate); + // Check that the implied ordering gives rise to a combined diagram that matches the // diagram constructed from the individual cluster linearization chunkings. auto main_diagram = get_diagram_fn(true); diff --git a/src/txgraph.cpp b/src/txgraph.cpp index b03b557823a..ef0aba352b0 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -500,6 +500,7 @@ public: std::pair, std::vector> GetMainStagingDiagrams() noexcept final; std::unique_ptr GetBlockBuilder() noexcept final; + std::pair, FeePerWeight> GetWorstMainChunk() noexcept final; void SanityCheck() const final; }; @@ -2291,6 +2292,26 @@ std::unique_ptr TxGraphImpl::GetBlockBuilder() noexcept return std::make_unique(*this); } +std::pair, FeePerWeight> TxGraphImpl::GetWorstMainChunk() noexcept +{ + std::pair, FeePerWeight> ret; + // Make sure all clusters in main are up to date, and acceptable. + MakeAllAcceptable(0); + Assume(m_clustersets[0].m_deps_to_add.empty()); + // If the graph is not empty, populate ret. + if (!m_chunkindex.empty()) { + const auto& chunk_data = *m_chunkindex.rbegin(); + const auto& chunk_end_entry = m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + ret.first.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*this, ret.first, start_pos); + std::reverse(ret.first.begin(), ret.first.end()); + ret.second = chunk_end_entry.m_main_chunk_feerate; + } + return ret; +} + } // namespace TxGraph::Ref::~Ref() diff --git a/src/txgraph.h b/src/txgraph.h index c0a3378d320..43485cd15b1 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -200,6 +200,10 @@ public: /** Construct a block builder, drawing from the main graph, which cannot be oversized. While * the returned object exists, no mutators on the main graph are allowed. */ virtual std::unique_ptr GetBlockBuilder() noexcept = 0; + /** Get the worst chunk overall in the main graph, i.e., the last chunk that would be returned + * by a BlockBuilder created now. The chunk is returned in reversed order, so every element is + * preceded by all its descendants. If the graph is empty, {} is returned. */ + virtual std::pair, FeePerWeight> GetWorstMainChunk() noexcept = 0; /** Perform an internal consistency check on this object. */ virtual void SanityCheck() const = 0; From 271cec168b636e47dc3494f05319847e9e3e2e48 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Mon, 2 Dec 2024 13:33:41 -0500 Subject: [PATCH 28/30] txgraph: (optimization) reuse discarded chunkindex entries --- src/txgraph.cpp | 77 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index ef0aba352b0..f85e081326c 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -286,6 +287,8 @@ private: ChunkIndex m_chunkindex; /** Number of index-observing objects in existence (BlockBuilderImpl). */ size_t m_chunkindex_observers{0}; + /** Cache of discarded ChunkIndex node handles. */ + std::vector m_chunkindex_discarded; /** A Locator that describes whether, where, and in which Cluster an Entry appears. * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. @@ -397,6 +400,10 @@ public: void ClearLocator(int level, GraphIndex index) noexcept; /** Find which Clusters conflict with the top level. */ std::vector GetConflicts() const noexcept; + /** Clear an Entry's ChunkData. */ + void ClearChunkData(Entry& entry) noexcept; + /** Give an Entry a ChunkData object. */ + void CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept; // Functions for handling Refs. @@ -536,6 +543,37 @@ public: void Skip() noexcept final; }; +void TxGraphImpl::ClearChunkData(Entry& entry) noexcept +{ + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + Assume(m_chunkindex_observers == 0); + // If the Entry has a non-empty m_chunkindex_iterator, extract it, and move the handle + // to the cache of discarded chunkindex entries. + m_chunkindex_discarded.emplace_back(m_chunkindex.extract(entry.m_chunkindex_iterator)); + entry.m_chunkindex_iterator = m_chunkindex.end(); + } +} + +void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept +{ + auto& entry = m_entries[idx]; + if (!m_chunkindex_discarded.empty()) { + // Reuse an discarded node handle. + auto& node = m_chunkindex_discarded.back().value(); + node.m_graph_index = idx; + node.m_chunk_count = chunk_count; + auto insert_result = m_chunkindex.insert(std::move(m_chunkindex_discarded.back())); + Assume(insert_result.inserted); + entry.m_chunkindex_iterator = insert_result.position; + m_chunkindex_discarded.pop_back(); + } else { + // Construct a new entry. + auto emplace_result = m_chunkindex.emplace(idx, chunk_count); + Assume(emplace_result.second); + entry.m_chunkindex_iterator = emplace_result.first; + } +} + void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept { auto& entry = m_entries[idx]; @@ -560,11 +598,7 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept --m_clustersets[after_level].m_txcount; } } - if (level == 0 && entry.m_chunkindex_iterator != m_chunkindex.end()) { - Assume(m_chunkindex_observers == 0); - m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = m_chunkindex.end(); - } + if (level == 0) ClearChunkData(entry); } void Cluster::Updated(TxGraphImpl& graph) noexcept @@ -572,13 +606,9 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept // Update all the Locators for this Cluster's Entrys. for (DepGraphIndex idx : m_linearization) { auto& entry = graph.m_entries[m_mapping[idx]]; - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - // Destroy any potential ChunkData prior to modifying the Cluster (as that could - // invalidate its ordering). - Assume(graph.m_chunkindex_observers == 0); - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); entry.m_locator[m_level].SetPresent(this, idx); } // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or @@ -605,9 +635,7 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept chunk.transactions.Reset(idx); if (chunk.transactions.None()) { // Last transaction in the chunk. - auto [it, inserted] = graph.m_chunkindex.emplace(graph_idx, chunk_count); - Assume(inserted); - entry.m_chunkindex_iterator = it; + graph.CreateChunkData(graph_idx, chunk_count); break; } } @@ -866,13 +894,9 @@ void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting // merged Cluster later anyway). auto& entry = graph.m_entries[idx]; - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - // Destroy any potential ChunkData prior to modifying the Cluster (as that could - // invalidate its ordering). - Assume(graph.m_chunkindex_observers == 0); - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); entry.m_locator[m_level].SetPresent(this, new_pos); } // Purge the other Cluster, now that everything has been moved. @@ -1102,6 +1126,9 @@ void TxGraphImpl::Compact() noexcept if (!clusterset.m_removed.empty()) return; } + // Release memory used by discarded ChunkData index entries. + ClearShrink(m_chunkindex_discarded); + // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of // later-processed ones during the "swap with end of m_entries" step below (which might @@ -1620,11 +1647,7 @@ void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept GraphIndex idx = m_mapping[ci]; auto& entry = graph.m_entries[idx]; entry.m_locator[m_level].SetMissing(); - if (m_level == 0 && entry.m_chunkindex_iterator != graph.m_chunkindex.end()) { - Assume(graph.m_chunkindex_observers == 0); - graph.m_chunkindex.erase(entry.m_chunkindex_iterator); - entry.m_chunkindex_iterator = graph.m_chunkindex.end(); - } + if (m_level == 0) graph.ClearChunkData(entry); } } From d82f125196328509438924b2ca154e52edb888de Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 27 Nov 2024 16:12:10 -0500 Subject: [PATCH 29/30] txgraph: (optimization) skipping end of cluster has no impact --- src/txgraph.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index f85e081326c..740c8c2b792 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -148,8 +148,9 @@ public: /** Process elements from the front of args that apply to this cluster, and append Refs for the * union of their descendants to output. */ void GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; - /** Get a vector of Refs for all elements of this Cluster, in linearization order. */ - void GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. Returns + * the range ends at the end of the cluster. */ + bool GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; /** Get the individual transaction feerate of a Cluster element. */ FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; /** Modify the fee of a Cluster element. */ @@ -521,8 +522,8 @@ class BlockBuilderImpl final : public TxGraph::BlockBuilder /** Vector for actual storage pointed to by TxGraph::BlockBuilder::m_current_chunk. */ std::vector m_chunkdata; /** Which cluster the current chunk belongs to, so we can exclude further transaction from it - * when that chunk is skipped. */ - Cluster* m_remaining_cluster{nullptr}; + * when that chunk is skipped, or std::nullopt if we're at the end of the current cluster. */ + std::optional m_remaining_cluster{nullptr}; /** Clusters which we're not including further transactions from. */ std::set m_excluded_clusters; /** Iterator to the next chunk (after the current one) in the chunk index. end() if nothing @@ -1623,7 +1624,7 @@ void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept +bool Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept { // Translate the transactions in the Cluster (in linearization order, starting at start_pos in // the linearization) to Refs, and fill them in range. @@ -1632,6 +1633,8 @@ void Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, Assume(entry.m_ref != nullptr); ref = entry.m_ref; } + // Return whether this was the end of the Cluster. + return start_pos == m_linearization.size(); } FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept @@ -2265,8 +2268,12 @@ void BlockBuilderImpl::Next() noexcept // Populate m_current_chunk. m_chunkdata.resize(chunk_data.m_chunk_count); auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); - m_remaining_cluster = cluster; + bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (is_end) { + m_remaining_cluster = std::nullopt; + } else { + m_remaining_cluster = cluster; + } m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); return; } @@ -2306,7 +2313,9 @@ void BlockBuilderImpl::Skip() noexcept { // When skipping a chunk we need to not include anything more of the cluster, as that could make // the result topologically invalid. - m_excluded_clusters.insert(m_remaining_cluster); + if (m_remaining_cluster.has_value()) { + m_excluded_clusters.insert(*m_remaining_cluster); + } Next(); } From d3816606de5187e027252bad3c81a66bda6546ae Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 27 Nov 2024 15:29:40 -0500 Subject: [PATCH 30/30] txgraph: (optimization) special-case singletons in chunk index --- src/txgraph.cpp | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 740c8c2b792..0c23b939088 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -249,7 +249,7 @@ private: { /** The Entry which is the last transaction of the chunk. */ mutable GraphIndex m_graph_index; - /** How many transactions the chunk contains. */ + /** How many transactions the chunk contains (-1 = singleton tail of cluster). */ LinearizationIndex m_chunk_count; ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept : @@ -636,6 +636,12 @@ void Cluster::Updated(TxGraphImpl& graph) noexcept chunk.transactions.Reset(idx); if (chunk.transactions.None()) { // Last transaction in the chunk. + if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) { + // If this is the final chunk of the cluster, and it contains just a single + // transaction (which will always be true for the very common singleton + // clusters), store the special value -1 as chunk count. + chunk_count = LinearizationIndex(-1); + } graph.CreateChunkData(graph_idx, chunk_count); break; } @@ -2266,13 +2272,22 @@ void BlockBuilderImpl::Next() noexcept // If we previously skipped a chunk from this cluster we cannot include more from it. if (m_excluded_clusters.contains(cluster)) continue; // Populate m_current_chunk. - m_chunkdata.resize(chunk_data.m_chunk_count); - auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); - if (is_end) { + if (chunk_data.m_chunk_count == LinearizationIndex(-1)) { + // Special case in case just a single transaction remains, avoiding the need to + // dispatch to and dereference Cluster. + m_chunkdata.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + m_chunkdata[0] = chunk_end_entry.m_ref; m_remaining_cluster = std::nullopt; } else { - m_remaining_cluster = cluster; + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (is_end) { + m_remaining_cluster = std::nullopt; + } else { + m_remaining_cluster = cluster; + } } m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); return; @@ -2335,10 +2350,17 @@ std::pair, FeePerWeight> TxGraphImpl::GetWorstMainChu const auto& chunk_data = *m_chunkindex.rbegin(); const auto& chunk_end_entry = m_entries[chunk_data.m_graph_index]; Cluster* cluster = chunk_end_entry.m_locator[0].cluster; - ret.first.resize(chunk_data.m_chunk_count); - auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; - cluster->GetClusterRefs(*this, ret.first, start_pos); - std::reverse(ret.first.begin(), ret.first.end()); + if (chunk_data.m_chunk_count == LinearizationIndex(-1) || chunk_data.m_chunk_count == 1) { + // Special case for singletons. + ret.first.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + ret.first[0] = chunk_end_entry.m_ref; + } else { + ret.first.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*this, ret.first, start_pos); + std::reverse(ret.first.begin(), ret.first.end()); + } ret.second = chunk_end_entry.m_main_chunk_feerate; } return ret;