diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 41577b2ad6d..e9a67faa51a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -301,6 +301,7 @@ add_library(bitcoin_node STATIC EXCLUDE_FROM_ALL signet.cpp torcontrol.cpp txdb.cpp + txgraph.cpp txmempool.cpp txorphanage.cpp txrequest.cpp diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp index 7d011975ddb..cb06f3fc28a 100644 --- a/src/bench/cluster_linearize.cpp +++ b/src/bench/cluster_linearize.cpp @@ -23,10 +23,10 @@ namespace { * remaining transaction, whose removal requires updating all remaining transactions' ancestor * set feerates. */ template -DepGraph MakeLinearGraph(ClusterIndex ntx) +DepGraph MakeLinearGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({-int32_t(i), 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(i - 1), i); } @@ -38,10 +38,10 @@ DepGraph MakeLinearGraph(ClusterIndex ntx) * rechunking is needed after every candidate (the last transaction gets picked every time). */ template -DepGraph MakeWideGraph(ClusterIndex ntx) +DepGraph MakeWideGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({int32_t(i) + 1, 1}); if (i > 0) depgraph.AddDependencies(SetType::Singleton(0), i); } @@ -51,10 +51,10 @@ DepGraph MakeWideGraph(ClusterIndex ntx) // Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the implemented // algorithm (purely empirically determined). template -DepGraph MakeHardGraph(ClusterIndex ntx) +DepGraph MakeHardGraph(DepGraphIndex ntx) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { if (ntx & 1) { // Odd cluster size. // @@ -121,7 +121,7 @@ DepGraph MakeHardGraph(ClusterIndex ntx) * iterations difference. */ template -void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) +void BenchLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) { const auto depgraph = MakeHardGraph(ntx); uint64_t rng_seed = 0; @@ -147,12 +147,12 @@ void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t * cheap. */ template -void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseAnc(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeLinearGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); @@ -167,41 +167,41 @@ void BenchLinearizeNoItersWorstCaseAnc(ClusterIndex ntx, benchmark::Bench& bench * AncestorCandidateFinder is cheap. */ template -void BenchLinearizeNoItersWorstCaseLIMO(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeNoItersWorstCaseLIMO(DepGraphIndex ntx, benchmark::Bench& bench) { const auto depgraph = MakeWideGraph(ntx); uint64_t rng_seed = 0; - std::vector old_lin(ntx); - for (ClusterIndex i = 0; i < ntx; ++i) old_lin[i] = i; + std::vector old_lin(ntx); + for (DepGraphIndex i = 0; i < ntx; ++i) old_lin[i] = i; bench.run([&] { Linearize(depgraph, /*max_iterations=*/0, rng_seed++, old_lin); }); } template -void BenchPostLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchPostLinearizeWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph = MakeWideGraph(ntx); - std::vector lin(ntx); + std::vector lin(ntx); bench.run([&] { - for (ClusterIndex i = 0; i < ntx; ++i) lin[i] = i; + for (DepGraphIndex i = 0; i < ntx; ++i) lin[i] = i; PostLinearize(depgraph, lin); }); } template -void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchMergeLinearizationsWorstCase(DepGraphIndex ntx, benchmark::Bench& bench) { DepGraph depgraph; - for (ClusterIndex i = 0; i < ntx; ++i) { + for (DepGraphIndex i = 0; i < ntx; ++i) { depgraph.AddTransaction({i, 1}); if (i) depgraph.AddDependencies(SetType::Singleton(0), i); } - std::vector lin1; - std::vector lin2; + std::vector lin1; + std::vector lin2; lin1.push_back(0); lin2.push_back(0); - for (ClusterIndex i = 1; i < ntx; ++i) { + for (DepGraphIndex i = 1; i < ntx; ++i) { lin1.push_back(i); lin2.push_back(ntx - i); } @@ -214,7 +214,7 @@ template void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array& serialized) { // Determine how many transactions the serialized cluster has. - ClusterIndex num_tx{0}; + DepGraphIndex num_tx{0}; { SpanReader reader{serialized}; DepGraph> depgraph; diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 50b121d9e4c..d5a6c24dc99 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -19,8 +19,8 @@ namespace cluster_linearize { -/** Data type to represent transaction indices in clusters. */ -using ClusterIndex = uint32_t; +/** Data type to represent transaction indices in DepGraphs and the clusters they represent. */ +using DepGraphIndex = uint32_t; /** Data structure that holds a transaction graph's preprocessed data (fee, size, ancestors, * descendants). */ @@ -86,11 +86,11 @@ public: * * Complexity: O(N^2) where N=depgraph.TxCount(). */ - DepGraph(const DepGraph& depgraph, Span mapping, ClusterIndex pos_range) noexcept : entries(pos_range) + DepGraph(const DepGraph& depgraph, Span mapping, DepGraphIndex pos_range) noexcept : entries(pos_range) { Assume(mapping.size() == depgraph.PositionRange()); Assume((pos_range == 0) == (depgraph.TxCount() == 0)); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { auto new_idx = mapping[i]; Assume(new_idx < pos_range); // Add transaction. @@ -100,7 +100,7 @@ public: // Fill in fee and size. entries[new_idx].feerate = depgraph.entries[i].feerate; } - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Fill in dependencies by mapping direct parents. SetType parents; for (auto j : depgraph.GetReducedParents(i)) parents.Set(mapping[j]); @@ -113,29 +113,29 @@ public: /** Get the set of transactions positions in use. Complexity: O(1). */ const SetType& Positions() const noexcept { return m_used; } /** Get the range of positions in this DepGraph. All entries in Positions() are in [0, PositionRange() - 1]. */ - ClusterIndex PositionRange() const noexcept { return entries.size(); } + DepGraphIndex PositionRange() const noexcept { return entries.size(); } /** Get the number of transactions in the graph. Complexity: O(1). */ auto TxCount() const noexcept { return m_used.Count(); } /** Get the feerate of a given transaction i. Complexity: O(1). */ - const FeeFrac& FeeRate(ClusterIndex i) const noexcept { return entries[i].feerate; } + const FeeFrac& FeeRate(DepGraphIndex i) const noexcept { return entries[i].feerate; } /** Get the mutable feerate of a given transaction i. Complexity: O(1). */ - FeeFrac& FeeRate(ClusterIndex i) noexcept { return entries[i].feerate; } + FeeFrac& FeeRate(DepGraphIndex i) noexcept { return entries[i].feerate; } /** Get the ancestors of a given transaction i. Complexity: O(1). */ - const SetType& Ancestors(ClusterIndex i) const noexcept { return entries[i].ancestors; } + const SetType& Ancestors(DepGraphIndex i) const noexcept { return entries[i].ancestors; } /** Get the descendants of a given transaction i. Complexity: O(1). */ - const SetType& Descendants(ClusterIndex i) const noexcept { return entries[i].descendants; } + const SetType& Descendants(DepGraphIndex i) const noexcept { return entries[i].descendants; } /** Add a new unconnected transaction to this transaction graph (in the first available - * position), and return its ClusterIndex. + * position), and return its DepGraphIndex. * * Complexity: O(1) (amortized, due to resizing of backing vector). */ - ClusterIndex AddTransaction(const FeeFrac& feefrac) noexcept + DepGraphIndex AddTransaction(const FeeFrac& feefrac) noexcept { static constexpr auto ALL_POSITIONS = SetType::Fill(SetType::Size()); auto available = ALL_POSITIONS - m_used; Assume(available.Any()); - ClusterIndex new_idx = available.First(); + DepGraphIndex new_idx = available.First(); if (new_idx == entries.size()) { entries.emplace_back(feefrac, SetType::Singleton(new_idx), SetType::Singleton(new_idx)); } else { @@ -174,7 +174,7 @@ public: * * Complexity: O(N) where N=TxCount(). */ - void AddDependencies(const SetType& parents, ClusterIndex child) noexcept + void AddDependencies(const SetType& parents, DepGraphIndex child) noexcept { Assume(m_used[child]); Assume(parents.IsSubsetOf(m_used)); @@ -205,7 +205,7 @@ public: * * Complexity: O(N) where N=Ancestors(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedParents(ClusterIndex i) const noexcept + SetType GetReducedParents(DepGraphIndex i) const noexcept { SetType parents = Ancestors(i); parents.Reset(i); @@ -226,7 +226,7 @@ public: * * Complexity: O(N) where N=Descendants(i).Count() (which is bounded by TxCount()). */ - SetType GetReducedChildren(ClusterIndex i) const noexcept + SetType GetReducedChildren(DepGraphIndex i) const noexcept { SetType children = Descendants(i); children.Reset(i); @@ -298,17 +298,28 @@ public: * * Complexity: O(select.Count() * log(select.Count())). */ - void AppendTopo(std::vector& list, const SetType& select) const noexcept + void AppendTopo(std::vector& list, const SetType& select) const noexcept { - ClusterIndex old_len = list.size(); + DepGraphIndex old_len = list.size(); for (auto i : select) list.push_back(i); - std::sort(list.begin() + old_len, list.end(), [&](ClusterIndex a, ClusterIndex b) noexcept { + std::sort(list.begin() + old_len, list.end(), [&](DepGraphIndex a, DepGraphIndex b) noexcept { const auto a_anc_count = entries[a].ancestors.Count(); const auto b_anc_count = entries[b].ancestors.Count(); if (a_anc_count != b_anc_count) return a_anc_count < b_anc_count; return a < b; }); } + + /** Check if this graph is acyclic. */ + bool IsAcyclic() const noexcept + { + for (auto i : Positions()) { + if ((Ancestors(i) & Descendants(i)) != SetType::Singleton(i)) { + return false; + } + } + return true; + } }; /** A set of transactions together with their aggregate feerate. */ @@ -327,7 +338,7 @@ struct SetInfo SetInfo(const SetType& txn, const FeeFrac& fr) noexcept : transactions(txn), feerate(fr) {} /** Construct a SetInfo for a given transaction in a depgraph. */ - explicit SetInfo(const DepGraph& depgraph, ClusterIndex pos) noexcept : + explicit SetInfo(const DepGraph& depgraph, DepGraphIndex pos) noexcept : transactions(SetType::Singleton(pos)), feerate(depgraph.FeeRate(pos)) {} /** Construct a SetInfo for a set of transactions in a depgraph. */ @@ -335,7 +346,7 @@ struct SetInfo transactions(txn), feerate(depgraph.FeeRate(txn)) {} /** Add a transaction to this SetInfo (which must not yet be in it). */ - void Set(const DepGraph& depgraph, ClusterIndex pos) noexcept + void Set(const DepGraph& depgraph, DepGraphIndex pos) noexcept { Assume(!transactions[pos]); transactions.Set(pos); @@ -371,10 +382,10 @@ struct SetInfo /** Compute the feerates of the chunks of linearization. */ template -std::vector ChunkLinearization(const DepGraph& depgraph, Span linearization) noexcept +std::vector ChunkLinearization(const DepGraph& depgraph, Span linearization) noexcept { std::vector ret; - for (ClusterIndex i : linearization) { + for (DepGraphIndex i : linearization) { /** The new chunk to be added, initially a singleton. */ auto new_chunk = depgraph.FeeRate(i); // As long as the new chunk has a higher feerate than the last chunk so far, absorb it. @@ -396,13 +407,13 @@ class LinearizationChunking const DepGraph& m_depgraph; /** The linearization we started from, possibly with removed prefix stripped. */ - Span m_linearization; + Span m_linearization; /** Chunk sets and their feerates, of what remains of the linearization. */ std::vector> m_chunks; /** How large a prefix of m_chunks corresponds to removed transactions. */ - ClusterIndex m_chunks_skip{0}; + DepGraphIndex m_chunks_skip{0}; /** Which transactions remain in the linearization. */ SetType m_todo; @@ -437,7 +448,7 @@ class LinearizationChunking public: /** Initialize a LinearizationSubset object for a given length of linearization. */ - explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, Span lin LIFETIMEBOUND) noexcept : + explicit LinearizationChunking(const DepGraph& depgraph LIFETIMEBOUND, Span lin LIFETIMEBOUND) noexcept : m_depgraph(depgraph), m_linearization(lin) { // Mark everything in lin as todo still. @@ -448,10 +459,10 @@ public: } /** Determine how many chunks remain in the linearization. */ - ClusterIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } + DepGraphIndex NumChunksLeft() const noexcept { return m_chunks.size() - m_chunks_skip; } /** Access a chunk. Chunk 0 is the highest-feerate prefix of what remains. */ - const SetInfo& GetChunk(ClusterIndex n) const noexcept + const SetInfo& GetChunk(DepGraphIndex n) const noexcept { Assume(n + m_chunks_skip < m_chunks.size()); return m_chunks[n + m_chunks_skip]; @@ -494,7 +505,7 @@ public: Assume(subset.transactions.IsSubsetOf(m_todo)); SetInfo accumulator; // Iterate over all chunks of the remaining linearization. - for (ClusterIndex i = 0; i < NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < NumChunksLeft(); ++i) { // Find what (if any) intersection the chunk has with subset. const SetType to_add = GetChunk(i).transactions & subset.transactions; if (to_add.Any()) { @@ -546,13 +557,13 @@ public: m_ancestor_set_feerates(depgraph.PositionRange()) { // Precompute ancestor-set feerates. - for (ClusterIndex i : m_depgraph.Positions()) { + for (DepGraphIndex i : m_depgraph.Positions()) { /** The remaining ancestors for transaction i. */ SetType anc_to_add = m_depgraph.Ancestors(i); FeeFrac anc_feerate; // Reuse accumulated feerate from first ancestor, if usable. Assume(anc_to_add.Any()); - ClusterIndex first = anc_to_add.First(); + DepGraphIndex first = anc_to_add.First(); if (first < i) { anc_feerate = m_ancestor_set_feerates[first]; Assume(!anc_feerate.IsEmpty()); @@ -592,7 +603,7 @@ public: } /** Count the number of remaining unlinearized transactions. */ - ClusterIndex NumRemaining() const noexcept + DepGraphIndex NumRemaining() const noexcept { return m_todo.Count(); } @@ -605,7 +616,7 @@ public: SetInfo FindCandidateSet() const noexcept { Assume(!AllDone()); - std::optional best; + std::optional best; for (auto i : m_todo) { if (best.has_value()) { Assume(!m_ancestor_set_feerates[i].IsEmpty()); @@ -633,9 +644,9 @@ class SearchCandidateFinder /** Internal RNG. */ InsecureRandomContext m_rng; /** m_sorted_to_original[i] is the original position that sorted transaction position i had. */ - std::vector m_sorted_to_original; + std::vector m_sorted_to_original; /** m_original_to_sorted[i] is the sorted position original transaction position i has. */ - std::vector m_original_to_sorted; + std::vector m_original_to_sorted; /** Internal dependency graph for the cluster (with transactions in decreasing individual * feerate order). */ DepGraph m_sorted_depgraph; @@ -673,7 +684,7 @@ public: { // Determine reordering mapping, by sorting by decreasing feerate. Unused positions are // not included, as they will never be looked up anyway. - ClusterIndex sorted_pos{0}; + DepGraphIndex sorted_pos{0}; for (auto i : depgraph.Positions()) { m_sorted_to_original[sorted_pos++] = i; } @@ -683,7 +694,7 @@ public: return feerate_cmp > 0; }); // Compute reverse mapping. - for (ClusterIndex i = 0; i < m_sorted_to_original.size(); ++i) { + for (DepGraphIndex i = 0; i < m_sorted_to_original.size(); ++i) { m_original_to_sorted[m_sorted_to_original[i]] = i; } // Compute reordered dependency graph. @@ -782,7 +793,7 @@ public: /** The set of transactions in m_todo which have feerate > best's. */ SetType imp = m_todo; while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -839,7 +850,7 @@ public: best = inc; // See if we can remove any entries from imp now. while (imp.Any()) { - ClusterIndex check = imp.Last(); + DepGraphIndex check = imp.Last(); if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; imp.Reset(check); } @@ -880,7 +891,7 @@ public: // If pot is empty, then so is inc. Assume(elem.inc.feerate.IsEmpty() == elem.pot_feerate.IsEmpty()); - const ClusterIndex first = elem.und.First(); + const DepGraphIndex first = elem.und.First(); if (!elem.inc.feerate.IsEmpty()) { // If no undecided transactions remain with feerate higher than best, this entry // cannot be improved beyond best. @@ -906,17 +917,17 @@ public: // most. Let I(t) be the size of the undecided set after including t, and E(t) the size // of the undecided set after excluding t. Then choose the split transaction t such // that 2^I(t) + 2^E(t) is minimal, tie-breaking by highest individual feerate for t. - ClusterIndex split = 0; + DepGraphIndex split = 0; const auto select = elem.und & m_sorted_depgraph.Ancestors(first); Assume(select.Any()); - std::optional> split_counts; + std::optional> split_counts; for (auto t : select) { // Call max = max(I(t), E(t)) and min = min(I(t), E(t)). Let counts = {max,min}. // Sorting by the tuple counts is equivalent to sorting by 2^I(t) + 2^E(t). This // expression is equal to 2^max + 2^min = 2^max * (1 + 1/2^(max - min)). The second // factor (1 + 1/2^(max - min)) there is in (1,2]. Thus increasing max will always // increase it, even when min decreases. Because of this, we can first sort by max. - std::pair counts{ + std::pair counts{ (elem.und - m_sorted_depgraph.Ancestors(t)).Count(), (elem.und - m_sorted_depgraph.Descendants(t)).Count()}; if (counts.first < counts.second) std::swap(counts.first, counts.second); @@ -1016,13 +1027,13 @@ public: * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount(). */ template -std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span old_linearization = {}) noexcept +std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span old_linearization = {}) noexcept { Assume(old_linearization.empty() || old_linearization.size() == depgraph.TxCount()); if (depgraph.TxCount() == 0) return {{}, true}; uint64_t iterations_left = max_iterations; - std::vector linearization; + std::vector linearization; AncestorCandidateFinder anc_finder(depgraph); std::optional> src_finder; @@ -1110,7 +1121,7 @@ std::pair, bool> Linearize(const DepGraph& de * postlinearize" process. */ template -void PostLinearize(const DepGraph& depgraph, Span linearization) +void PostLinearize(const DepGraph& depgraph, Span linearization) { // This algorithm performs a number of passes (currently 2); the even ones operate from back to // front, the odd ones from front to back. Each results in an equal-or-better linearization @@ -1148,9 +1159,9 @@ void PostLinearize(const DepGraph& depgraph, Span lineari // entries[0]. /** Index of the sentinel in the entries array below. */ - static constexpr ClusterIndex SENTINEL{0}; + static constexpr DepGraphIndex SENTINEL{0}; /** Indicator that a group has no previous transaction. */ - static constexpr ClusterIndex NO_PREV_TX{0}; + static constexpr DepGraphIndex NO_PREV_TX{0}; /** Data structure per transaction entry. */ @@ -1158,16 +1169,16 @@ void PostLinearize(const DepGraph& depgraph, Span lineari { /** The index of the previous transaction in this group; NO_PREV_TX if this is the first * entry of a group. */ - ClusterIndex prev_tx; + DepGraphIndex prev_tx; // The fields below are only used for transactions that are the last one in a group // (referred to as tail transactions below). /** Index of the first transaction in this group, possibly itself. */ - ClusterIndex first_tx; + DepGraphIndex first_tx; /** Index of the last transaction in the previous group. The first group (the sentinel) * points back to the last group here, making it a singly-linked circular list. */ - ClusterIndex prev_group; + DepGraphIndex prev_group; /** All transactions in the group. Empty for the sentinel. */ SetType group; /** All dependencies of the group (descendants in even passes; ancestors in odd ones). */ @@ -1210,12 +1221,12 @@ void PostLinearize(const DepGraph& depgraph, Span lineari Assume(entries[SENTINEL].feerate.IsEmpty()); // Iterate over all elements in the existing linearization. - for (ClusterIndex i = 0; i < linearization.size(); ++i) { + for (DepGraphIndex i = 0; i < linearization.size(); ++i) { // Even passes are from back to front; odd passes from front to back. - ClusterIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; + DepGraphIndex idx = linearization[rev ? linearization.size() - 1 - i : i]; // Construct a new group containing just idx. In even passes, the meaning of // parent/child and high/low feerate are swapped. - ClusterIndex cur_group = idx + 1; + DepGraphIndex cur_group = idx + 1; entries[cur_group].group = SetType::Singleton(idx); entries[cur_group].deps = rev ? depgraph.Descendants(idx): depgraph.Ancestors(idx); entries[cur_group].feerate = depgraph.FeeRate(idx); @@ -1227,8 +1238,8 @@ void PostLinearize(const DepGraph& depgraph, Span lineari entries[SENTINEL].prev_group = cur_group; // Start merge/swap cycle. - ClusterIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. - ClusterIndex prev_group = entries[cur_group].prev_group; + DepGraphIndex next_group = SENTINEL; // We inserted at the end, so next group is sentinel. + DepGraphIndex prev_group = entries[cur_group].prev_group; // Continue as long as the current group has higher feerate than the previous one. while (entries[cur_group].feerate >> entries[prev_group].feerate) { // prev_group/cur_group/next_group refer to (the last transactions of) 3 @@ -1256,7 +1267,7 @@ void PostLinearize(const DepGraph& depgraph, Span lineari entries[cur_group].prev_group = prev_group; } else { // There is no dependency between cur_group and prev_group; swap them. - ClusterIndex preprev_group = entries[prev_group].prev_group; + DepGraphIndex preprev_group = entries[prev_group].prev_group; // If PP, P, C, N were the old preprev, prev, cur, next groups, then the new // layout becomes [PP, C, P, N]. Update prev_groups to reflect that order. entries[next_group].prev_group = prev_group; @@ -1271,10 +1282,10 @@ void PostLinearize(const DepGraph& depgraph, Span lineari } // Convert the entries back to linearization (overwriting the existing one). - ClusterIndex cur_group = entries[0].prev_group; - ClusterIndex done = 0; + DepGraphIndex cur_group = entries[0].prev_group; + DepGraphIndex done = 0; while (cur_group != SENTINEL) { - ClusterIndex cur_tx = cur_group; + DepGraphIndex cur_tx = cur_group; // Traverse the transactions of cur_group (from back to front), and write them in the // same order during odd passes, and reversed (front to back) in even passes. if (rev) { @@ -1299,7 +1310,7 @@ void PostLinearize(const DepGraph& depgraph, Span lineari * Complexity: O(N^2) where N=depgraph.TxCount(); O(N) if both inputs are identical. */ template -std::vector MergeLinearizations(const DepGraph& depgraph, Span lin1, Span lin2) +std::vector MergeLinearizations(const DepGraph& depgraph, Span lin1, Span lin2) { Assume(lin1.size() == depgraph.TxCount()); Assume(lin2.size() == depgraph.TxCount()); @@ -1307,7 +1318,7 @@ std::vector MergeLinearizations(const DepGraph& depgraph, /** Chunkings of what remains of both input linearizations. */ LinearizationChunking chunking1(depgraph, lin1), chunking2(depgraph, lin2); /** Output linearization. */ - std::vector ret; + std::vector ret; if (depgraph.TxCount() == 0) return ret; ret.reserve(depgraph.TxCount()); @@ -1336,6 +1347,38 @@ std::vector MergeLinearizations(const DepGraph& depgraph, return ret; } +/** Make linearization topological, retaining its ordering where possible. */ +template +void FixLinearization(const DepGraph& depgraph, Span linearization) noexcept +{ + // This algorithm can be summarized as moving every element in the linearization backwards + // until it is placed after all its ancestors. + SetType done; + const auto len = linearization.size(); + // Iterate over the elements of linearization from back to front (i is distance from back). + for (DepGraphIndex i = 0; i < len; ++i) { + /** The element at that position. */ + DepGraphIndex elem = linearization[len - 1 - i]; + /** j represents how far from the back of the linearization elem should be placed. */ + DepGraphIndex j = i; + // Figure out which elements need to be moved before elem. + SetType place_before = done & depgraph.Ancestors(elem); + // Find which position to place elem in (updating j), continuously moving the elements + // in between forward. + while (place_before.Any()) { + // j cannot be 0 here; if it was, then there was necessarily nothing earlier which + // elem needs to be place before anymore, and place_before would be empty. + Assume(j > 0); + auto to_swap = linearization[len - 1 - (j - 1)]; + place_before.Reset(to_swap); + linearization[len - 1 - (j--)] = to_swap; + } + // Put elem in its final position and mark it as done. + linearization[len - 1 - j] = elem; + done.Set(elem); + } +} + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/cluster_linearize_tests.cpp b/src/test/cluster_linearize_tests.cpp index 265ccdc805e..3413af4a219 100644 --- a/src/test/cluster_linearize_tests.cpp +++ b/src/test/cluster_linearize_tests.cpp @@ -28,11 +28,11 @@ void TestDepGraphSerialization(const std::vector>& c // Construct DepGraph from cluster argument. DepGraph depgraph; SetType holes; - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddTransaction(cluster[i].first); if (cluster[i] == HOLE) holes.Set(i); } - for (ClusterIndex i = 0; i < cluster.size(); ++i) { + for (DepGraphIndex i = 0; i < cluster.size(); ++i) { depgraph.AddDependencies(cluster[i].second, i); } depgraph.RemoveTransactions(holes); diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt index e99c6d91f47..846afeeb474 100644 --- a/src/test/fuzz/CMakeLists.txt +++ b/src/test/fuzz/CMakeLists.txt @@ -124,6 +124,7 @@ add_executable(fuzz tx_in.cpp tx_out.cpp tx_pool.cpp + txgraph.cpp txorphan.cpp txrequest.cpp # Visual Studio 2022 version 17.12 introduced a bug diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 5b3770636ab..c7e40a833da 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -149,9 +149,9 @@ public: * than AncestorCandidateFinder and SearchCandidateFinder. */ template -std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) +std::pair, bool> SimpleLinearize(const DepGraph& depgraph, uint64_t max_iterations) { - std::vector linearization; + std::vector linearization; SimpleCandidateFinder finder(depgraph); SetType todo = depgraph.Positions(); bool optimal = true; @@ -203,9 +203,9 @@ SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& tod /** Given a dependency graph, construct any valid linearization for it, reading from a SpanReader. */ template -std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) +std::vector ReadLinearization(const DepGraph& depgraph, SpanReader& reader) { - std::vector linearization; + std::vector linearization; TestBitSet todo = depgraph.Positions(); // In every iteration one topologically-valid transaction is appended to linearization. while (todo.Any()) { @@ -253,18 +253,18 @@ FUZZ_TARGET(clusterlin_depgraph_sim) * sim[i]->first is its individual feerate, and sim[i]->second is its set of ancestors. */ std::array>, TestBitSet::Size()> sim; /** The number of non-nullopt position in sim. */ - ClusterIndex num_tx_sim{0}; + DepGraphIndex num_tx_sim{0}; /** Read a valid index of a transaction from the provider. */ auto idx_fn = [&]() { - auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); - for (ClusterIndex i = 0; i < sim.size(); ++i) { + auto offset = provider.ConsumeIntegralInRange(0, num_tx_sim - 1); + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (offset == 0) return i; --offset; } assert(false); - return ClusterIndex(-1); + return DepGraphIndex(-1); }; /** Read a valid subset of the transactions from the provider. */ @@ -273,7 +273,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) const auto mask = provider.ConsumeIntegralInRange(0, range); auto mask_shifted = mask; TestBitSet subset; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (!sim[i].has_value()) continue; if (mask_shifted & 1) { subset.Set(i); @@ -289,7 +289,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto range = (uint64_t{1} << sim.size()) - 1; const auto mask = provider.ConsumeIntegralInRange(0, range); TestBitSet set; - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if ((mask >> i) & 1) { set.Set(i); } @@ -301,7 +301,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto anc_update_fn = [&]() { while (true) { bool updates{false}; - for (ClusterIndex chl = 0; chl < sim.size(); ++chl) { + for (DepGraphIndex chl = 0; chl < sim.size(); ++chl) { if (!sim[chl].has_value()) continue; for (auto par : sim[chl]->second) { if (!sim[chl]->second.IsSupersetOf(sim[par]->second)) { @@ -315,7 +315,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) }; /** Compare the state of transaction i in the simulation with the real one. */ - auto check_fn = [&](ClusterIndex i) { + auto check_fn = [&](DepGraphIndex i) { // Compare used positions. assert(real.Positions()[i] == sim[i].has_value()); if (sim[i].has_value()) { @@ -338,7 +338,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) auto idx = real.AddTransaction(feerate); // Verify that the returned index is correct. assert(!sim[idx].has_value()); - for (ClusterIndex i = 0; i < TestBitSet::Size(); ++i) { + for (DepGraphIndex i = 0; i < TestBitSet::Size(); ++i) { if (!sim[i].has_value()) { assert(idx == i); break; @@ -351,7 +351,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) } if ((command % 3) <= 1 && num_tx_sim > 0) { // AddDependencies. - ClusterIndex child = idx_fn(); + DepGraphIndex child = idx_fn(); auto parents = subset_fn(); // Apply to DepGraph. real.AddDependencies(parents, child); @@ -370,7 +370,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Apply to DepGraph. real.RemoveTransactions(del); // Apply to sim. - for (ClusterIndex i = 0; i < sim.size(); ++i) { + for (DepGraphIndex i = 0; i < sim.size(); ++i) { if (sim[i].has_value()) { if (del[i]) { --num_tx_sim; @@ -388,7 +388,7 @@ FUZZ_TARGET(clusterlin_depgraph_sim) // Compare the real obtained depgraph against the simulation. anc_update_fn(); - for (ClusterIndex i = 0; i < sim.size(); ++i) check_fn(i); + for (DepGraphIndex i = 0; i < sim.size(); ++i) check_fn(i); assert(real.TxCount() == num_tx_sim); // Sanity check the result (which includes round-tripping serialization, if applicable). SanityCheck(real); @@ -401,13 +401,42 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) // Construct a graph by deserializing. SpanReader reader(buffer); DepGraph depgraph; + DepGraphIndex par_code{0}, chl_code{0}; try { - reader >> Using(depgraph); + reader >> Using(depgraph) >> VARINT(par_code) >> VARINT(chl_code); } catch (const std::ios_base::failure&) {} SanityCheck(depgraph); // Verify the graph is a DAG. - assert(IsAcyclic(depgraph)); + assert(depgraph.IsAcyclic()); + + // Introduce a cycle, and then test that IsAcyclic returns false. + if (depgraph.TxCount() < 2) return; + DepGraphIndex par(0), chl(0); + // Pick any transaction of depgraph as parent. + par_code %= depgraph.TxCount(); + for (auto i : depgraph.Positions()) { + if (par_code == 0) { + par = i; + break; + } + --par_code; + } + // Pick any ancestor of par (excluding itself) as child, if any. + auto ancestors = depgraph.Ancestors(par) - TestBitSet::Singleton(par); + if (ancestors.None()) return; + chl_code %= ancestors.Count(); + for (auto i : ancestors) { + if (chl_code == 0) { + chl = i; + break; + } + --chl_code; + } + // Add the cycle-introducing dependency. + depgraph.AddDependencies(TestBitSet::Singleton(par), chl); + // Check that we now detect a cycle. + assert(!depgraph.IsAcyclic()); } FUZZ_TARGET(clusterlin_components) @@ -469,7 +498,7 @@ FUZZ_TARGET(clusterlin_components) reader >> VARINT(subset_bits); } catch (const std::ios_base::failure&) {} TestBitSet subset; - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { if (todo[i]) { if (subset_bits & 1) subset.Set(i); subset_bits >>= 1; @@ -526,7 +555,7 @@ FUZZ_TARGET(clusterlin_chunking) for (const auto& chunk_feerate : chunking) { assert(todo.Any()); SetInfo accumulator, best; - for (ClusterIndex idx : linearization) { + for (DepGraphIndex idx : linearization) { if (todo[idx]) { accumulator.Set(depgraph, idx); if (best.feerate.IsEmpty() || accumulator.feerate >> best.feerate) { @@ -737,7 +766,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) assert(chunking.NumChunksLeft() > 0); // Construct linearization with just todo. - std::vector linearization_left; + std::vector linearization_left; for (auto i : linearization) { if (todo[i]) linearization_left.push_back(i); } @@ -747,13 +776,13 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // Verify that it matches the feerates of the chunks of chunking. assert(chunking.NumChunksLeft() == chunking_left.size()); - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { assert(chunking.GetChunk(i).feerate == chunking_left[i]); } // Check consistency of chunking. TestBitSet combined; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { const auto& chunk_info = chunking.GetChunk(i); // Chunks must be non-empty. assert(chunk_info.transactions.Any()); @@ -804,7 +833,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) // - No non-empty intersection between the intersection and a prefix of the chunks of the // remainder of the linearization may be better than the intersection. TestBitSet prefix; - for (ClusterIndex i = 0; i < chunking.NumChunksLeft(); ++i) { + for (DepGraphIndex i = 0; i < chunking.NumChunksLeft(); ++i) { prefix |= chunking.GetChunk(i).transactions; auto reintersect = SetInfo(depgraph, prefix & intersect.transactions); if (!reintersect.feerate.IsEmpty()) { @@ -846,7 +875,7 @@ FUZZ_TARGET(clusterlin_linearize) if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. - std::vector old_linearization; + std::vector old_linearization; { uint8_t have_old_linearization{0}; try { @@ -905,8 +934,8 @@ FUZZ_TARGET(clusterlin_linearize) // Only for very small clusters, test every topologically-valid permutation. if (depgraph.TxCount() <= 7) { - std::vector perm_linearization; - for (ClusterIndex i : depgraph.Positions()) perm_linearization.push_back(i); + std::vector perm_linearization; + for (DepGraphIndex i : depgraph.Positions()) perm_linearization.push_back(i); // Iterate over all valid permutations. do { // Determine whether perm_linearization is topological. @@ -942,7 +971,7 @@ FUZZ_TARGET(clusterlin_postlinearize) } catch (const std::ios_base::failure&) {} // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph, reader); SanityCheck(depgraph, linearization); @@ -990,7 +1019,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) // Now construct a new graph, copying the nodes, but leaving only the first parent (even // direction) or the first child (odd direction). DepGraph depgraph_tree; - for (ClusterIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.PositionRange(); ++i) { if (depgraph_gen.Positions()[i]) { depgraph_tree.AddTransaction(depgraph_gen.FeeRate(i)); } else { @@ -1002,14 +1031,14 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) depgraph_tree.RemoveTransactions(TestBitSet::Fill(depgraph_gen.PositionRange()) - depgraph_gen.Positions()); if (direction & 1) { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto children = depgraph_gen.GetReducedChildren(i); if (children.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(i), children.First()); } } } else { - for (ClusterIndex i = 0; i < depgraph_gen.TxCount(); ++i) { + for (DepGraphIndex i = 0; i < depgraph_gen.TxCount(); ++i) { auto parents = depgraph_gen.GetReducedParents(i); if (parents.Any()) { depgraph_tree.AddDependencies(TestBitSet::Singleton(parents.First()), i); @@ -1018,7 +1047,7 @@ FUZZ_TARGET(clusterlin_postlinearize_tree) } // Retrieve a linearization from the fuzz input. - std::vector linearization; + std::vector linearization; linearization = ReadLinearization(depgraph_tree, reader); SanityCheck(depgraph_tree, linearization); @@ -1075,7 +1104,7 @@ FUZZ_TARGET(clusterlin_postlinearize_moved_leaf) // Construct a linearization identical to lin, but with the tail end of lin_leaf moved to the // back. - std::vector lin_moved; + std::vector lin_moved; for (auto i : lin) { if (i != lin_leaf.back()) lin_moved.push_back(i); } @@ -1118,3 +1147,65 @@ FUZZ_TARGET(clusterlin_merge) auto cmp2 = CompareChunks(chunking_merged, chunking2); assert(cmp2 >= 0); } + +FUZZ_TARGET(clusterlin_fix_linearization) +{ + // Verify expected properties of FixLinearization() on arbitrary linearizations. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Construct an arbitrary linearization (not necessarily topological for depgraph). + std::vector linearization; + /** Which transactions of depgraph are yet to be included in linearization. */ + TestBitSet todo = depgraph.Positions(); + while (todo.Any()) { + // Read a number from the fuzz input in range [0, todo.Count()). + uint64_t val{0}; + try { + reader >> VARINT(val); + } catch (const std::ios_base::failure&) {} + val %= todo.Count(); + // Find the val'th element in todo, remove it from todo, and append it to linearization. + for (auto idx : todo) { + if (val == 0) { + linearization.push_back(idx); + todo.Reset(idx); + break; + } + --val; + } + } + assert(linearization.size() == depgraph.TxCount()); + + // Determine what prefix of linearization is topological, i.e., the position of the first entry + // in linearization which corresponds to a transaction that is not preceded by all its + // ancestors. + size_t topo_prefix = 0; + todo = depgraph.Positions(); + while (topo_prefix < linearization.size()) { + DepGraphIndex idx = linearization[topo_prefix]; + todo.Reset(idx); + if (todo.Overlaps(depgraph.Ancestors(idx))) break; + ++topo_prefix; + } + + // Then make a fixed copy of linearization. + auto linearization_fixed = linearization; + FixLinearization(depgraph, linearization_fixed); + // Sanity check it (which includes testing whether it is topological). + SanityCheck(depgraph, linearization_fixed); + + // FixLinearization does not modify the topological prefix of linearization. + assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix, + linearization_fixed.begin())); + // This also means that if linearization was entirely topological, FixLinearization cannot have + // modified it. This is implied by the assertion above already, but repeat it explicitly. + if (topo_prefix == linearization.size()) { + assert(linearization == linearization_fixed); + } +} diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp new file mode 100644 index 00000000000..233166f63ae --- /dev/null +++ b/src/test/fuzz/txgraph.cpp @@ -0,0 +1,993 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace cluster_linearize; + +namespace { + +/** Data type representing a naive simulated TxGraph, keeping all transactions (even from + * disconnected components) in a single DepGraph. Unlike the real TxGraph, this only models + * a single graph, and multiple instances are used to simulate main/staging. */ +struct SimTxGraph +{ + /** Maximum number of transactions to support simultaneously. Set this higher than txgraph's + * cluster count, so we can exercise situations with more transactions than fit in one + * cluster. */ + static constexpr unsigned MAX_TRANSACTIONS = MAX_CLUSTER_COUNT_LIMIT * 2; + /** Set type to use in the simulation. */ + using SetType = BitSet; + /** Data type for representing positions within SimTxGraph::graph. */ + using Pos = DepGraphIndex; + /** Constant to mean "missing in this graph". */ + static constexpr auto MISSING = Pos(-1); + + /** The dependency graph (for all transactions in the simulation, regardless of + * connectivity/clustering). */ + DepGraph graph; + /** For each position in graph, which TxGraph::Ref it corresponds with (if any). Use shared_ptr + * so that a SimTxGraph can be copied to create a staging one, while sharing Refs with + * the main graph. */ + std::array, MAX_TRANSACTIONS> simmap; + /** For each TxGraph::Ref in graph, the position it corresponds with. */ + std::map simrevmap; + /** The set of TxGraph::Ref entries that have been removed, but not yet destroyed. */ + std::vector> removed; + /** Whether the graph is oversized (true = yes, false = no, std::nullopt = unknown). */ + std::optional oversized; + /** The configured maximum number of transactions per cluster. */ + DepGraphIndex max_cluster_count; + /** The configured maximum total size of transactions per cluster. */ + uint64_t max_cluster_size; + + /** Construct a new SimTxGraph with the specified maximum cluster count. */ + explicit SimTxGraph(DepGraphIndex max_cluster, uint64_t max_size) : + max_cluster_count(max_cluster), max_cluster_size(max_size) {} + + // Permit copying and moving. + SimTxGraph(const SimTxGraph&) noexcept = default; + SimTxGraph& operator=(const SimTxGraph&) noexcept = default; + SimTxGraph(SimTxGraph&&) noexcept = default; + SimTxGraph& operator=(SimTxGraph&&) noexcept = default; + + /** Check whether this graph is oversized (contains a connected component whose number of + * transactions exceeds max_cluster_count. */ + bool IsOversized() + { + if (!oversized.has_value()) { + // Only recompute when oversized isn't already known. + oversized = false; + auto todo = graph.Positions(); + // Iterate over all connected components of the graph. + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + if (component.Count() > max_cluster_count) oversized = true; + uint64_t component_size{0}; + for (auto i : component) component_size += graph.FeeRate(i).size; + if (component_size > max_cluster_size) oversized = true; + todo -= component; + } + } + return *oversized; + } + + /** Determine the number of (non-removed) transactions in the graph. */ + DepGraphIndex GetTransactionCount() const { return graph.TxCount(); } + + /** Get the sum of all fees/sizes in the graph. */ + FeePerWeight SumAll() const + { + FeePerWeight ret; + for (auto i : graph.Positions()) { + ret += graph.FeeRate(i); + } + return ret; + } + + /** Get the position where ref occurs in this simulated graph, or -1 if it does not. */ + Pos Find(const TxGraph::Ref* ref) const + { + auto it = simrevmap.find(ref); + if (it != simrevmap.end()) return it->second; + return MISSING; + } + + /** Given a position in this simulated graph, get the corresponding TxGraph::Ref. */ + TxGraph::Ref* GetRef(Pos pos) + { + assert(graph.Positions()[pos]); + assert(simmap[pos]); + return simmap[pos].get(); + } + + /** Add a new transaction to the simulation. */ + TxGraph::Ref* AddTransaction(const FeePerWeight& feerate) + { + assert(graph.TxCount() < MAX_TRANSACTIONS); + auto simpos = graph.AddTransaction(feerate); + assert(graph.Positions()[simpos]); + simmap[simpos] = std::make_shared(); + auto ptr = simmap[simpos].get(); + simrevmap[ptr] = simpos; + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; + return ptr; + } + + /** Add a dependency between two positions in this graph. */ + void AddDependency(TxGraph::Ref* parent, TxGraph::Ref* child) + { + auto par_pos = Find(parent); + if (par_pos == MISSING) return; + auto chl_pos = Find(child); + if (chl_pos == MISSING) return; + graph.AddDependencies(SetType::Singleton(par_pos), chl_pos); + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; + } + + /** Modify the transaction fee of a ref, if it exists. */ + void SetTransactionFee(TxGraph::Ref* ref, int64_t fee) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.FeeRate(pos).fee = fee; + } + + /** Remove the transaction in the specified position from the graph. */ + void RemoveTransaction(TxGraph::Ref* ref) + { + auto pos = Find(ref); + if (pos == MISSING) return; + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + // Retain the TxGraph::Ref corresponding to this position, so the Ref destruction isn't + // invoked until the simulation explicitly decided to do so. + removed.push_back(std::move(simmap[pos])); + simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (oversized.has_value() && *oversized) oversized = std::nullopt; + } + + /** Destroy the transaction from the graph, including from the removed set. This will + * trigger TxGraph::Ref::~Ref. reset_oversize controls whether the cached oversized + * value is cleared (destroying does not clear oversizedness in TxGraph of the main + * graph while staging exists). */ + void DestroyTransaction(TxGraph::Ref* ref, bool reset_oversize) + { + auto pos = Find(ref); + if (pos == MISSING) { + // Wipe the ref, if it exists, from the removed vector. Use std::partition rather + // than std::erase because we don't care about the order of the entries that + // remain. + auto remove = std::partition(removed.begin(), removed.end(), [&](auto& arg) { return arg.get() != ref; }); + removed.erase(remove, removed.end()); + } else { + graph.RemoveTransactions(SetType::Singleton(pos)); + simrevmap.erase(simmap[pos].get()); + simmap[pos].reset(); + // This may invalidate our cached oversized value. + if (reset_oversize && oversized.has_value() && *oversized) { + oversized = std::nullopt; + } + } + } + + /** Construct the set with all positions in this graph corresponding to the specified + * TxGraph::Refs. All of them must occur in this graph and not be removed. */ + SetType MakeSet(std::span arg) + { + SetType ret; + for (TxGraph::Ref* ptr : arg) { + auto pos = Find(ptr); + assert(pos != Pos(-1)); + ret.Set(pos); + } + return ret; + } + + /** Get the set of ancestors (desc=false) or descendants (desc=true) in this graph. */ + SetType GetAncDesc(TxGraph::Ref* arg, bool desc) + { + auto pos = Find(arg); + if (pos == MISSING) return {}; + return desc ? graph.Descendants(pos) : graph.Ancestors(pos); + } + + /** Given a set of Refs (given as a vector of pointers), expand the set to include all its + * ancestors (desc=false) or all its descendants (desc=true) in this graph. */ + void IncludeAncDesc(std::vector& arg, bool desc) + { + std::vector ret; + for (auto ptr : arg) { + auto simpos = Find(ptr); + if (simpos != MISSING) { + for (auto i : desc ? graph.Descendants(simpos) : graph.Ancestors(simpos)) { + ret.push_back(simmap[i].get()); + } + } else { + ret.push_back(ptr); + } + } + // Deduplicate. + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + // Replace input. + arg = std::move(ret); + } +}; + +} // namespace + +FUZZ_TARGET(txgraph) +{ + // This is a big simulation test for TxGraph, which performs a fuzz-derived sequence of valid + // operations on a TxGraph instance, as well as on a simpler (mostly) reimplementation (see + // SimTxGraph above), comparing the outcome of functions that return a result, and finally + // performing a full comparison between the two. + + SeedRandomStateForTest(SeedRand::ZEROS); + FuzzedDataProvider provider(buffer.data(), buffer.size()); + + /** Internal test RNG, used only for decisions which would require significant amount of data + * to be read from the provider, without realistically impacting test sensitivity. */ + InsecureRandomContext rng(0xdecade2009added + buffer.size()); + + /** Variable used whenever an empty TxGraph::Ref is needed. */ + TxGraph::Ref empty_ref; + + // Decide the maximum number of transactions per cluster we will use in this simulation. + auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + // And the maximum combined size of transactions per cluster. + auto max_size = provider.ConsumeIntegralInRange(1, 0x3fffff * MAX_CLUSTER_COUNT_LIMIT); + + // Construct a real graph, and a vector of simulated graphs (main, and possibly staging). + auto real = MakeTxGraph(max_count, max_size); + std::vector sims; + sims.reserve(2); + sims.emplace_back(max_count, max_size); + + /** Struct encapsulating information about a BlockBuilder that's currently live. */ + struct BlockBuilderData + { + /** BlockBuilder object from real. */ + std::unique_ptr builder; + /** The set of transactions marked as included in *builder. */ + SimTxGraph::SetType done; + /** The last chunk feerate returned by *builder. IsEmpty() if none yet. */ + FeePerWeight last_feerate; + + BlockBuilderData(std::unique_ptr builder_in) : builder(std::move(builder_in)) {} + }; + + /** Currently active block builders. */ + std::vector block_builders; + + /** Function to pick any Ref (for either sim in sims: from sim.simmap or sim.removed, or the + * empty Ref). */ + auto pick_fn = [&]() noexcept -> TxGraph::Ref* { + size_t tx_count[2] = {sims[0].GetTransactionCount(), 0}; + /** The number of possible choices. */ + size_t choices = tx_count[0] + sims[0].removed.size() + 1; + if (sims.size() == 2) { + tx_count[1] = sims[1].GetTransactionCount(); + choices += tx_count[1] + sims[1].removed.size(); + } + /** Pick one of them. */ + auto choice = provider.ConsumeIntegralInRange(0, choices - 1); + // Consider both main and (if it exists) staging. + for (size_t level = 0; level < sims.size(); ++level) { + auto& sim = sims[level]; + if (choice < tx_count[level]) { + // Return from graph. + for (auto i : sim.graph.Positions()) { + if (choice == 0) return sim.GetRef(i); + --choice; + } + assert(false); + } else { + choice -= tx_count[level]; + } + if (choice < sim.removed.size()) { + // Return from removed. + return sim.removed[choice].get(); + } else { + choice -= sim.removed.size(); + } + } + // Return empty. + assert(choice == 0); + return &empty_ref; + }; + + /** Function to construct the full diagram for a simulated graph. This works by fetching the + * clusters and chunking them manually, so it works for both main and staging + * (GetMainChunkFeerate only works for main). */ + auto get_diagram_fn = [&](bool main_only) -> std::vector { + int level = main_only ? 0 : sims.size() - 1; + auto& sim = sims[level]; + // For every transaction in the graph, request its cluster, and throw them into a set. + std::set> clusters; + for (auto i : sim.graph.Positions()) { + auto ref = sim.GetRef(i); + clusters.insert(real->GetCluster(*ref, main_only)); + } + // Compute the chunkings of each (deduplicated) cluster. + size_t num_tx{0}; + std::vector ret; + for (const auto& cluster : clusters) { + num_tx += cluster.size(); + std::vector linearization; + linearization.reserve(cluster.size()); + for (auto refptr : cluster) linearization.push_back(sim.Find(refptr)); + for (const FeeFrac& chunk_feerate : ChunkLinearization(sim.graph, linearization)) { + ret.push_back(chunk_feerate); + } + } + // Verify the number of transactions after deduplicating clusters. This implicitly verifies + // that GetCluster on each element of a cluster reports the cluster transactions in the same + // order. + assert(num_tx == sim.GetTransactionCount()); + // Sort by feerate (we don't care about respecting ordering within clusters, as these are + // just feerates). + std::sort(ret.begin(), ret.end(), std::greater{}); + return ret; + }; + + LIMITED_WHILE(provider.remaining_bytes() > 0, 200) { + // Read a one-byte command. + int command = provider.ConsumeIntegral(); + + /** Use the bottom 2 bits of command to select an entry in the block_builders vector (if + * any). */ + int builder_idx = block_builders.empty() ? -1 : int((command & 3) % block_builders.size()); + + // Treat the lowest bit of a command as a flag (which selects a variant of some of the + // operations), and the second-lowest bit as a way of selecting main vs. staging, and leave + // the rest of the bits in command. + bool alt = command & 1; + bool use_main = command & 2; + command >>= 2; + + // Provide convenient aliases for the top simulated graph (main, or staging if it exists), + // one for the simulated graph selected based on use_main (for operations that can operate + // on both graphs), and one that always refers to the main graph. + auto& top_sim = sims.back(); + auto& sel_sim = use_main ? sims[0] : top_sim; + auto& main_sim = sims[0]; + + // Keep decrementing command for each applicable operation, until one is hit. Multiple + // iterations may be necessary. + while (true) { + if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() < SimTxGraph::MAX_TRANSACTIONS && command-- == 0) { + // AddTransaction. + int64_t fee; + int32_t size; + if (alt) { + // If alt is true, pick fee and size from the entire range. + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + size = provider.ConsumeIntegralInRange(1, 0x3fffff); + } else { + // Otherwise, use smaller range which consume fewer fuzz input bytes, as just + // these are likely sufficient to trigger all interesting code paths already. + fee = provider.ConsumeIntegral(); + size = provider.ConsumeIntegralInRange(1, 0xff); + } + FeePerWeight feerate{fee, size}; + // Create a real TxGraph::Ref. + auto ref = real->AddTransaction(feerate); + // Create a shared_ptr place in the simulation to put the Ref in. + auto ref_loc = top_sim.AddTransaction(feerate); + // Move it in place. + *ref_loc = std::move(ref); + break; + } else if ((block_builders.empty() || sims.size() > 1) && top_sim.GetTransactionCount() + top_sim.removed.size() > 1 && command-- == 0) { + // AddDependency. + auto par = pick_fn(); + auto chl = pick_fn(); + auto pos_par = top_sim.Find(par); + auto pos_chl = top_sim.Find(chl); + if (pos_par != SimTxGraph::MISSING && pos_chl != SimTxGraph::MISSING) { + // Determine if adding this would introduce a cycle (not allowed by TxGraph), + // and if so, skip. + if (top_sim.graph.Ancestors(pos_par)[pos_chl]) break; + } + top_sim.AddDependency(par, chl); + real->AddDependency(*par, *chl); + break; + } else if ((block_builders.empty() || sims.size() > 1) && top_sim.removed.size() < 100 && command-- == 0) { + // RemoveTransaction. Either all its ancestors or all its descendants are also + // removed (if any), to make sure TxGraph's reordering of removals and dependencies + // has no effect. + std::vector to_remove; + to_remove.push_back(pick_fn()); + top_sim.IncludeAncDesc(to_remove, alt); + // The order in which these ancestors/descendants are removed should not matter; + // randomly shuffle them. + std::shuffle(to_remove.begin(), to_remove.end(), rng); + for (TxGraph::Ref* ptr : to_remove) { + real->RemoveTransaction(*ptr); + top_sim.RemoveTransaction(ptr); + } + break; + } else if (sel_sim.removed.size() > 0 && command-- == 0) { + // ~Ref (of an already-removed transaction). Destroying a TxGraph::Ref has an + // observable effect on the TxGraph it refers to, so this simulation permits doing + // so separately from other actions on TxGraph. + + // Pick a Ref of sel_sim.removed to destroy. Note that the same Ref may still occur + // in the other graph, and thus not actually trigger ~Ref yet (which is exactly + // what we want, as destroying Refs is only allowed when it does not refer to an + // existing transaction in either graph). + auto removed_pos = provider.ConsumeIntegralInRange(0, sel_sim.removed.size() - 1); + if (removed_pos != sel_sim.removed.size() - 1) { + std::swap(sel_sim.removed[removed_pos], sel_sim.removed.back()); + } + sel_sim.removed.pop_back(); + break; + } else if (block_builders.empty() && command-- == 0) { + // ~Ref (of any transaction). + std::vector to_destroy; + to_destroy.push_back(pick_fn()); + while (true) { + // Keep adding either the ancestors or descendants the already picked + // transactions have in both graphs (main and staging) combined. Destroying + // will trigger deletions in both, so to have consistent TxGraph behavior, the + // set must be closed under ancestors, or descendants, in both graphs. + auto old_size = to_destroy.size(); + for (auto& sim : sims) sim.IncludeAncDesc(to_destroy, alt); + if (to_destroy.size() == old_size) break; + } + // The order in which these ancestors/descendants are destroyed should not matter; + // randomly shuffle them. + std::shuffle(to_destroy.begin(), to_destroy.end(), rng); + for (TxGraph::Ref* ptr : to_destroy) { + for (size_t level = 0; level < sims.size(); ++level) { + sims[level].DestroyTransaction(ptr, level == sims.size() - 1); + } + } + break; + } else if (block_builders.empty() && command-- == 0) { + // SetTransactionFee. + int64_t fee; + if (alt) { + fee = provider.ConsumeIntegralInRange(-0x8000000000000, 0x7ffffffffffff); + } else { + fee = provider.ConsumeIntegral(); + } + auto ref = pick_fn(); + real->SetTransactionFee(*ref, fee); + for (auto& sim : sims) { + sim.SetTransactionFee(ref, fee); + } + break; + } else if (command-- == 0) { + // GetTransactionCount. + assert(real->GetTransactionCount(use_main) == sel_sim.GetTransactionCount()); + break; + } else if (command-- == 0) { + // Exists. + auto ref = pick_fn(); + bool exists = real->Exists(*ref, use_main); + bool should_exist = sel_sim.Find(ref) != SimTxGraph::MISSING; + assert(exists == should_exist); + break; + } else if (command-- == 0) { + // IsOversized. + assert(sel_sim.IsOversized() == real->IsOversized(use_main)); + break; + } else if (command-- == 0) { + // GetIndividualFeerate. + auto ref = pick_fn(); + auto feerate = real->GetIndividualFeerate(*ref); + bool found{false}; + for (auto& sim : sims) { + auto simpos = sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + found = true; + assert(feerate == sim.graph.FeeRate(simpos)); + } + } + if (!found) assert(feerate.IsEmpty()); + break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetMainChunkFeerate. + auto ref = pick_fn(); + auto feerate = real->GetMainChunkFeerate(*ref); + auto simpos = main_sim.Find(ref); + if (simpos == SimTxGraph::MISSING) { + assert(feerate.IsEmpty()); + } else { + // Just do some quick checks that the reported value is in range. A full + // recomputation of expected chunk feerates is done at the end. + assert(feerate.size >= main_sim.graph.FeeRate(simpos).size); + assert(feerate.size <= main_sim.SumAll().size); + } + break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // GetAncestors/GetDescendants. + auto ref = pick_fn(); + auto result = alt ? real->GetDescendants(*ref, use_main) + : real->GetAncestors(*ref, use_main); + assert(result.size() <= max_count); + auto result_set = sel_sim.MakeSet(result); + assert(result.size() == result_set.Count()); + auto expect_set = sel_sim.GetAncDesc(ref, alt); + assert(result_set == expect_set); + break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // GetAncestorsUnion/GetDescendantsUnion. + std::vector refs; + // Gather a list of up to 15 Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function, and convert to SimPos set. + auto result = alt ? real->GetDescendantsUnion(refs, use_main) + : real->GetAncestorsUnion(refs, use_main); + auto result_set = sel_sim.MakeSet(result); + assert(result.size() == result_set.Count()); + // Compute the expected result. + SimTxGraph::SetType expect_set; + for (TxGraph::Ref* ref : refs) expect_set |= sel_sim.GetAncDesc(ref, alt); + // Compare. + assert(result_set == expect_set); + break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // GetCluster. + auto ref = pick_fn(); + auto result = real->GetCluster(*ref, use_main); + // Check cluster count limit. + assert(result.size() <= max_count); + // Require the result to be topologically valid and not contain duplicates. + auto left = sel_sim.graph.Positions(); + uint64_t total_size{0}; + for (auto refptr : result) { + auto simpos = sel_sim.Find(refptr); + total_size += sel_sim.graph.FeeRate(simpos).size; + assert(simpos != SimTxGraph::MISSING); + assert(left[simpos]); + left.Reset(simpos); + assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left)); + } + // Check cluster size limit. + assert(total_size <= max_size); + // Require the set to be connected. + auto result_set = sel_sim.MakeSet(result); + assert(sel_sim.graph.IsConnected(result_set)); + // If ref exists, the result must contain it. If not, it must be empty. + auto simpos = sel_sim.Find(ref); + if (simpos != SimTxGraph::MISSING) { + assert(result_set[simpos]); + } else { + assert(result_set.None()); + } + // Require the set not to have ancestors or descendants outside of it. + for (auto i : result_set) { + assert(sel_sim.graph.Ancestors(i).IsSubsetOf(result_set)); + assert(sel_sim.graph.Descendants(i).IsSubsetOf(result_set)); + } + break; + } else if (command-- == 0) { + // HaveStaging. + assert((sims.size() == 2) == real->HaveStaging()); + break; + } else if (sims.size() < 2 && command-- == 0) { + // StartStaging. + sims.emplace_back(sims.back()); + real->StartStaging(); + break; + } else if (block_builders.empty() && sims.size() > 1 && command-- == 0) { + // CommitStaging. + real->CommitStaging(); + sims.erase(sims.begin()); + break; + } else if (sims.size() > 1 && command-- == 0) { + // AbortStaging. + real->AbortStaging(); + sims.pop_back(); + // Reset the cached oversized value (if TxGraph::Ref destructions triggered + // removals of main transactions while staging was active, then aborting will + // cause it to be re-evaluated in TxGraph). + sims.back().oversized = std::nullopt; + break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // CompareMainOrder. + auto ref_a = pick_fn(); + auto ref_b = pick_fn(); + auto sim_a = main_sim.Find(ref_a); + auto sim_b = main_sim.Find(ref_b); + // Both transactions must exist in the main graph. + if (sim_a == SimTxGraph::MISSING || sim_b == SimTxGraph::MISSING) break; + auto cmp = real->CompareMainOrder(*ref_a, *ref_b); + // Distinct transactions have distinct places. + if (sim_a != sim_b) assert(cmp != 0); + // Ancestors go before descendants. + if (main_sim.graph.Ancestors(sim_a)[sim_b]) assert(cmp >= 0); + if (main_sim.graph.Descendants(sim_a)[sim_b]) assert(cmp <= 0); + // Do not verify consistency with chunk feerates, as we cannot easily determine + // these here without making more calls to real, which could affect its internal + // state. A full comparison is done at the end. + break; + } else if (!sel_sim.IsOversized() && command-- == 0) { + // CountDistinctClusters. + std::vector refs; + // Gather a list of up to 15 (or up to 255) Ref pointers. + auto count = provider.ConsumeIntegralInRange(0, alt ? 255 : 15); + refs.resize(count); + for (size_t i = 0; i < count; ++i) { + refs[i] = pick_fn(); + } + // Their order should not matter, shuffle them. + std::shuffle(refs.begin(), refs.end(), rng); + // Invoke the real function. + auto result = real->CountDistinctClusters(refs, use_main); + // Build a vector with representatives of the clusters the Refs occur in in the + // simulated graph. For each, remember the lowest-index transaction SimPos in the + // cluster. + std::vector sim_reps; + for (auto ref : refs) { + // Skip Refs that do not occur in the simulated graph. + auto simpos = sel_sim.Find(ref); + if (simpos == SimTxGraph::MISSING) continue; + // Start with component equal to just the Ref's SimPos. + auto component = SimTxGraph::SetType::Singleton(simpos); + // Keep adding ancestors/descendants of all elements in component until it no + // longer changes. + while (true) { + auto old_component = component; + for (auto i : component) { + component |= sel_sim.graph.Ancestors(i); + component |= sel_sim.graph.Descendants(i); + } + if (component == old_component) break; + } + // Remember the lowest-index SimPos in component, as a representative for it. + assert(component.Any()); + sim_reps.push_back(component.First()); + } + // Remove duplicates from sim_reps. + std::sort(sim_reps.begin(), sim_reps.end()); + sim_reps.erase(std::unique(sim_reps.begin(), sim_reps.end()), sim_reps.end()); + // Compare the number of deduplicated representatives with the value returned by + // the real function. + assert(result == sim_reps.size()); + break; + } else if (command-- == 0) { + // DoWork. + real->DoWork(); + break; + } else if (sims.size() == 2 && !sims[0].IsOversized() && !sims[1].IsOversized() && command-- == 0) { + // GetMainStagingDiagrams() + auto [main_diagram, staged_diagram] = real->GetMainStagingDiagrams(); + auto sum_main = std::accumulate(main_diagram.begin(), main_diagram.end(), FeeFrac{}); + auto sum_staged = std::accumulate(staged_diagram.begin(), staged_diagram.end(), FeeFrac{}); + auto diagram_gain = sum_staged - sum_main; + auto real_gain = sims[1].SumAll() - sims[0].SumAll(); + // Just check that the total fee gained/lost and size gained/lost according to the + // diagram matches the difference in these values in the simulated graph. A more + // complete check of the GetMainStagingDiagrams result is performed at the end. + assert(diagram_gain == real_gain); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_diagram.size(); ++i) { + assert(FeeRateCompare(main_diagram[i], main_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < staged_diagram.size(); ++i) { + assert(FeeRateCompare(staged_diagram[i], staged_diagram[i - 1]) <= 0); + } + break; + } else if (block_builders.size() < 4 && !main_sim.IsOversized() && command-- == 0) { + // GetBlockBuilder. + block_builders.emplace_back(real->GetBlockBuilder()); + break; + } else if (!block_builders.empty() && command-- == 0) { + // ~BlockBuilder. + block_builders.erase(block_builders.begin() + builder_idx); + break; + } else if (!block_builders.empty() && *block_builders[builder_idx].builder && command-- == 0) { + // BlockBuilder::Include and BlockBuilder::Skip. + auto& builder_data = block_builders[builder_idx]; + auto cur_feerate = builder_data.builder->GetCurrentChunkFeerate(); + // Chunk feerates must be monotonously decreasing. + if (!builder_data.last_feerate.IsEmpty()) { + assert(!(cur_feerate >> builder_data.last_feerate)); + } + builder_data.last_feerate = cur_feerate; + // Verify the contents of GetCurrentChunk. + auto new_done = builder_data.done; + FeePerWeight sum_feerate; + for (TxGraph::Ref* ref : builder_data.builder->GetCurrentChunk()) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(ref); + assert(simpos != SimTxGraph::MISSING); + // Verify the claimed chunk feerate. + sum_feerate += main_sim.graph.FeeRate(simpos); + // Make sure no transaction is reported twice. + assert(!new_done[simpos]); + new_done.Set(simpos); + // The concatenation of all included transactions must be topologically valid. + assert(main_sim.graph.Ancestors(simpos).IsSubsetOf(new_done)); + } + assert(sum_feerate == cur_feerate); + // Skip or Include. + if (alt) { + builder_data.builder->Skip(); + } else { + builder_data.builder->Include(); + builder_data.done = new_done; + } + break; + } else if (!main_sim.IsOversized() && command-- == 0) { + // GetWorstMainChunk. + auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk(); + // Just do some sanity checks here. Consistency with GetBlockBuilder is checked + // below. + if (main_sim.GetTransactionCount() == 0) { + assert(worst_chunk.empty()); + } else { + assert(!worst_chunk.empty()); + SimTxGraph::SetType done; + FeePerWeight sum; + for (TxGraph::Ref* ref : worst_chunk) { + // Each transaction in the chunk must exist in the main graph. + auto simpos = main_sim.Find(ref); + assert(simpos != SimTxGraph::MISSING); + sum += main_sim.graph.FeeRate(simpos); + // Make sure the chunk contains no duplicate transactions. + assert(!done[simpos]); + done.Set(simpos); + // All elements are preceded by all their descendants. + assert(main_sim.graph.Descendants(simpos).IsSubsetOf(done)); + } + assert(sum == worst_chunk_feerate); + } + break; + } else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) { + // Trim. + bool was_oversized = top_sim.IsOversized(); + auto removed = real->Trim(); + if (!was_oversized) { + assert(removed.empty()); + break; + } + auto removed_set = top_sim.MakeSet(removed); + // The removed set must contain all its own descendants. + for (auto simpos : removed_set) { + assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set)); + } + // Apply all removals to the simulation, and verify the result is no longer + // oversized. Don't query the real graph for oversizedness; it is compared + // against the simulation anyway later. + for (auto simpos : removed_set) { + top_sim.RemoveTransaction(top_sim.GetRef(simpos)); + } + assert(!top_sim.IsOversized()); + break; + } + } + } + + // After running all modifications, perform an internal sanity check (before invoking + // inspectors that may modify the internal state). + real->SanityCheck(); + + if (!sims[0].IsOversized()) { + // If the main graph is not oversized, verify the total ordering implied by + // CompareMainOrder. + // First construct two distinct randomized permutations of the positions in sims[0]. + std::vector vec1; + for (auto i : sims[0].graph.Positions()) vec1.push_back(i); + std::shuffle(vec1.begin(), vec1.end(), rng); + auto vec2 = vec1; + std::shuffle(vec2.begin(), vec2.end(), rng); + if (vec1 == vec2) std::next_permutation(vec2.begin(), vec2.end()); + // Sort both according to CompareMainOrder. By having randomized starting points, the order + // of CompareMainOrder invocations is somewhat randomized as well. + auto cmp = [&](SimTxGraph::Pos a, SimTxGraph::Pos b) noexcept { + return real->CompareMainOrder(*sims[0].GetRef(a), *sims[0].GetRef(b)) < 0; + }; + std::sort(vec1.begin(), vec1.end(), cmp); + std::sort(vec2.begin(), vec2.end(), cmp); + + // Verify the resulting orderings are identical. This could only fail if the ordering was + // not total. + assert(vec1 == vec2); + + // Verify that the ordering is topological. + auto todo = sims[0].graph.Positions(); + for (auto i : vec1) { + todo.Reset(i); + assert(!sims[0].graph.Ancestors(i).Overlaps(todo)); + } + assert(todo.None()); + + // For every transaction in the total ordering, find a random one before it and after it, + // and compare their chunk feerates, which must be consistent with the ordering. + for (size_t pos = 0; pos < vec1.size(); ++pos) { + auto pos_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[pos])); + if (pos > 0) { + size_t before = rng.randrange(pos); + auto before_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[before])); + assert(FeeRateCompare(before_feerate, pos_feerate) >= 0); + } + if (pos + 1 < vec1.size()) { + size_t after = pos + 1 + rng.randrange(vec1.size() - 1 - pos); + auto after_feerate = real->GetMainChunkFeerate(*sims[0].GetRef(vec1[after])); + assert(FeeRateCompare(after_feerate, pos_feerate) <= 0); + } + } + + // The same order should be obtained through a BlockBuilder, if nothing is skipped. + auto builder = real->GetBlockBuilder(); + std::vector vec_builder; + std::vector chunk; + FeePerWeight chunk_feerate; + while (*builder) { + FeePerWeight sum; + auto chunk_span = builder->GetCurrentChunk(); + chunk.assign(chunk_span.begin(), chunk_span.end()); + for (TxGraph::Ref* ref : chunk_span) { + // The reported chunk feerate must match the chunk feerate obtained by asking + // it for each of the chunk's transactions individually. + assert(real->GetMainChunkFeerate(*ref) == builder->GetCurrentChunkFeerate()); + // Verify the chunk feerate matches the sum of the reported individual feerates. + sum += real->GetIndividualFeerate(*ref); + // Chunks must contain transactions that exist in the graph. + auto simpos = sims[0].Find(ref); + assert(simpos != SimTxGraph::MISSING); + vec_builder.push_back(simpos); + } + chunk_feerate = builder->GetCurrentChunkFeerate(); + assert(sum == chunk_feerate); + builder->Include(); + } + assert(vec_builder == vec1); + + // The last chunk returned by the BlockBuilder must match GetWorstMainChunk, in reverse. + std::reverse(chunk.begin(), chunk.end()); + auto [worst_chunk, worst_chunk_feerate] = real->GetWorstMainChunk(); + assert(chunk == worst_chunk); + assert(chunk_feerate == worst_chunk_feerate); + + // Check that the implied ordering gives rise to a combined diagram that matches the + // diagram constructed from the individual cluster linearization chunkings. + auto main_diagram = get_diagram_fn(true); + auto expected_main_diagram = ChunkLinearization(sims[0].graph, vec1); + assert(CompareChunks(main_diagram, expected_main_diagram) == 0); + + if (sims.size() >= 2 && !sims[1].IsOversized()) { + // When the staging graph is not oversized as well, call GetMainStagingDiagrams, and + // fully verify the result. + auto [main_cmp_diagram, stage_cmp_diagram] = real->GetMainStagingDiagrams(); + // Check that the feerates in each diagram are monotonically decreasing. + for (size_t i = 1; i < main_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(main_cmp_diagram[i], main_cmp_diagram[i - 1]) <= 0); + } + for (size_t i = 1; i < stage_cmp_diagram.size(); ++i) { + assert(FeeRateCompare(stage_cmp_diagram[i], stage_cmp_diagram[i - 1]) <= 0); + } + // Apply total ordering on the feerate diagrams to make them comparable (the exact + // tie breaker among equal-feerate FeeFracs does not matter, but it has to be + // consistent with the one used in main_diagram and stage_diagram). + std::sort(main_cmp_diagram.begin(), main_cmp_diagram.end(), std::greater{}); + std::sort(stage_cmp_diagram.begin(), stage_cmp_diagram.end(), std::greater{}); + // Find the chunks that appear in main_diagram but are missing from main_cmp_diagram. + // This is allowed, because GetMainStagingDiagrams omits clusters in main unaffected + // by staging. + std::vector missing_main_cmp; + std::set_difference(main_diagram.begin(), main_diagram.end(), + main_cmp_diagram.begin(), main_cmp_diagram.end(), + std::inserter(missing_main_cmp, missing_main_cmp.end()), + std::greater{}); + assert(main_cmp_diagram.size() + missing_main_cmp.size() == main_diagram.size()); + // Do the same for chunks in stage_diagram missign from stage_cmp_diagram. + auto stage_diagram = get_diagram_fn(false); + std::vector missing_stage_cmp; + std::set_difference(stage_diagram.begin(), stage_diagram.end(), + stage_cmp_diagram.begin(), stage_cmp_diagram.end(), + std::inserter(missing_stage_cmp, missing_stage_cmp.end()), + std::greater{}); + assert(stage_cmp_diagram.size() + missing_stage_cmp.size() == stage_diagram.size()); + // The missing chunks must be equal across main & staging (otherwise they couldn't have + // been omitted). + assert(missing_main_cmp == missing_stage_cmp); + } + } + + assert(real->HaveStaging() == (sims.size() > 1)); + + // Try to run a full comparison, for both main_only=false and main_only=true in TxGraph + // inspector functions that support both. + for (int main_only = 0; main_only < 2; ++main_only) { + auto& sim = main_only ? sims[0] : sims.back(); + // Compare simple properties of the graph with the simulation. + assert(real->IsOversized(main_only) == sim.IsOversized()); + assert(real->GetTransactionCount(main_only) == sim.GetTransactionCount()); + // If the graph (and the simulation) are not oversized, perform a full comparison. + if (!sim.IsOversized()) { + auto todo = sim.graph.Positions(); + // Iterate over all connected components of the resulting (simulated) graph, each of which + // should correspond to a cluster in the real one. + while (todo.Any()) { + auto component = sim.graph.FindConnectedComponent(todo); + todo -= component; + // Iterate over the transactions in that component. + for (auto i : component) { + // Check its individual feerate against simulation. + assert(sim.graph.FeeRate(i) == real->GetIndividualFeerate(*sim.GetRef(i))); + // Check its ancestors against simulation. + auto expect_anc = sim.graph.Ancestors(i); + auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i), main_only)); + assert(anc.Count() <= max_count); + assert(anc == expect_anc); + // Check its descendants against simulation. + auto expect_desc = sim.graph.Descendants(i); + auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i), main_only)); + assert(desc.Count() <= max_count); + assert(desc == expect_desc); + // Check the cluster the transaction is part of. + auto cluster = real->GetCluster(*sim.GetRef(i), main_only); + assert(cluster.size() <= max_count); + assert(sim.MakeSet(cluster) == component); + // Check that the cluster is reported in a valid topological order (its + // linearization). + std::vector simlin; + SimTxGraph::SetType done; + uint64_t total_size{0}; + for (TxGraph::Ref* ptr : cluster) { + auto simpos = sim.Find(ptr); + assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); + done.Set(simpos); + assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); + simlin.push_back(simpos); + total_size += sim.graph.FeeRate(simpos).size; + } + // Check cluster size. + assert(total_size <= max_size); + // Construct a chunking object for the simulated graph, using the reported cluster + // linearization as ordering, and compare it against the reported chunk feerates. + if (sims.size() == 1 || main_only) { + cluster_linearize::LinearizationChunking simlinchunk(sim.graph, simlin); + DepGraphIndex idx{0}; + for (unsigned chunknum = 0; chunknum < simlinchunk.NumChunksLeft(); ++chunknum) { + auto chunk = simlinchunk.GetChunk(chunknum); + // Require that the chunks of cluster linearizations are connected (this must + // be the case as all linearizations inside are PostLinearized). + assert(sim.graph.IsConnected(chunk.transactions)); + // Check the chunk feerates of all transactions in the cluster. + while (chunk.transactions.Any()) { + assert(chunk.transactions[simlin[idx]]); + chunk.transactions.Reset(simlin[idx]); + assert(chunk.feerate == real->GetMainChunkFeerate(*cluster[idx])); + ++idx; + } + } + } + } + } + } + } + + // Sanity check again (because invoking inspectors may modify internal unobservable state). + real->SanityCheck(); +} diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 871aa9d74ed..5992e819342 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -23,18 +23,6 @@ using namespace cluster_linearize; using TestBitSet = BitSet<32>; -/** Check if a graph is acyclic. */ -template -bool IsAcyclic(const DepGraph& depgraph) noexcept -{ - for (ClusterIndex i : depgraph.Positions()) { - if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) { - return false; - } - } - return true; -} - /** A formatter for a bespoke serialization for acyclic DepGraph objects. * * The serialization format outputs information about transactions in a topological order (parents @@ -134,10 +122,10 @@ struct DepGraphFormatter static void Ser(Stream& s, const DepGraph& depgraph) { /** Construct a topological order to serialize the transactions in. */ - std::vector topo_order; + std::vector topo_order; topo_order.reserve(depgraph.TxCount()); for (auto i : depgraph.Positions()) topo_order.push_back(i); - std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) { + std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) { auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count(); if (anc_a != anc_b) return anc_a < anc_b; return a < b; @@ -148,9 +136,9 @@ struct DepGraphFormatter SetType done; // Loop over the transactions in topological order. - for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { + for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) { /** Which depgraph index we are currently writing. */ - ClusterIndex idx = topo_order[topo_idx]; + DepGraphIndex idx = topo_order[topo_idx]; // Write size, which must be larger than 0. s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED); // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative). @@ -158,9 +146,9 @@ struct DepGraphFormatter // Write dependency information. SetType written_parents; uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over. - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which depgraph index we are currently considering as parent of idx. */ - ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; + DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist]; // Ignore transactions which are already known to be ancestors. if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue; if (depgraph.Ancestors(idx)[dep_idx]) { @@ -203,9 +191,9 @@ struct DepGraphFormatter DepGraph topo_depgraph; /** Mapping from serialization order to cluster order, used later to reconstruct the * cluster order. */ - std::vector reordering; + std::vector reordering; /** How big the entries vector in the reconstructed depgraph will be (including holes). */ - ClusterIndex total_size{0}; + DepGraphIndex total_size{0}; // Read transactions in topological order. while (true) { @@ -229,9 +217,9 @@ struct DepGraphFormatter // Read dependency information. auto topo_idx = reordering.size(); s >> VARINT(diff); - for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { + for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) { /** Which topo_depgraph index we are currently considering as parent of topo_idx. */ - ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist; + DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist; // Ignore transactions which are already known ancestors of topo_idx. if (new_ancestors[dep_topo_idx]) continue; if (diff == 0) { @@ -298,9 +286,9 @@ template void SanityCheck(const DepGraph& depgraph) { // Verify Positions and PositionRange consistency. - ClusterIndex num_positions{0}; - ClusterIndex position_range{0}; - for (ClusterIndex i : depgraph.Positions()) { + DepGraphIndex num_positions{0}; + DepGraphIndex position_range{0}; + for (DepGraphIndex i : depgraph.Positions()) { ++num_positions; position_range = i + 1; } @@ -309,7 +297,7 @@ void SanityCheck(const DepGraph& depgraph) assert(position_range >= num_positions); assert(position_range <= SetType::Size()); // Consistency check between ancestors internally. - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { // Transactions include themselves as ancestors. assert(depgraph.Ancestors(i)[i]); // If a is an ancestor of b, then b's ancestors must include all of a's ancestors. @@ -318,8 +306,8 @@ void SanityCheck(const DepGraph& depgraph) } } // Consistency check between ancestors and descendants. - for (ClusterIndex i : depgraph.Positions()) { - for (ClusterIndex j : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { + for (DepGraphIndex j : depgraph.Positions()) { assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]); } // No transaction is a parent or child of itself. @@ -337,7 +325,7 @@ void SanityCheck(const DepGraph& depgraph) assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child))); } } - if (IsAcyclic(depgraph)) { + if (depgraph.IsAcyclic()) { // If DepGraph is acyclic, serialize + deserialize must roundtrip. std::vector ser; VectorWriter writer(ser, 0); @@ -360,7 +348,7 @@ void SanityCheck(const DepGraph& depgraph) // In acyclic graphs, the union of parents with parents of parents etc. yields the // full ancestor set (and similar for children and descendants). std::vector parents(depgraph.PositionRange()), children(depgraph.PositionRange()); - for (ClusterIndex i : depgraph.Positions()) { + for (DepGraphIndex i : depgraph.Positions()) { parents[i] = depgraph.GetReducedParents(i); children[i] = depgraph.GetReducedChildren(i); } @@ -392,7 +380,7 @@ void SanityCheck(const DepGraph& depgraph) /** Perform a sanity check on a linearization. */ template -void SanityCheck(const DepGraph& depgraph, Span linearization) +void SanityCheck(const DepGraph& depgraph, Span linearization) { // Check completeness. assert(linearization.size() == depgraph.TxCount()); diff --git a/src/txgraph.cpp b/src/txgraph.cpp new file mode 100644 index 00000000000..aeb2109bb14 --- /dev/null +++ b/src/txgraph.cpp @@ -0,0 +1,2795 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +using namespace cluster_linearize; + +/** The maximum number of levels a TxGraph can have (0 = main, 1 = staging). */ +static constexpr int MAX_LEVELS{2}; + +// Forward declare the TxGraph implementation class. +class TxGraphImpl; + +/** Position of a DepGraphIndex within a Cluster::m_linearization. */ +using LinearizationIndex = uint32_t; +/** Position of a Cluster within Graph::ClusterSet::m_clusters. */ +using ClusterSetIndex = uint32_t; + +/** Quality levels for cached cluster linearizations. */ +enum class QualityLevel +{ + /** This is a singleton cluster consisting of a transaction that individually exceeds the + * cluster size limit. It cannot be merged with anything. */ + OVERSIZED, + /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ + NEEDS_SPLIT, + /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ + NEEDS_SPLIT_ACCEPTABLE, + /** This cluster has undergone changes that warrant re-linearization. */ + NEEDS_RELINEARIZE, + /** The minimal level of linearization has been performed, but it is not known to be optimal. */ + ACCEPTABLE, + /** The linearization is known to be optimal. */ + OPTIMAL, + /** This cluster is not registered in any ClusterSet::m_clusters. + * This must be the last entry in QualityLevel as ClusterSet::m_clusters is sized using it. */ + NONE, +}; + +/** Information about a transaction inside TxGraphImpl::Trim. */ +struct TrimTxData +{ + /** Chunk feerate for this transaction. */ + FeePerWeight m_chunk_feerate; + /** GraphIndex of the transaction. */ + TxGraph::GraphIndex m_index; + /** Number of unmet dependencies this transaction has. -1 if the transaction is included. */ + uint32_t m_deps_left; + /** Number of dependencies that apply to this transaction as child. */ + uint32_t m_parent_count; + /** Where in deps_by_child those dependencies begin. */ + uint32_t m_parent_offset; + /** Number of dependencies that apply to this transaction as parent. */ + uint32_t m_children_count; + /** Where in deps_by_parent those dependencies begin. */ + uint32_t m_children_offset; + /** Size of the transaction. */ + uint32_t m_tx_size; + + // As transactions get processed, they get organized into trees which form partitions + // representing the would-be clusters up to that point. The root of each tree is a + // representative for that partition. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + + /** Pointer to another TrimTxData, towards the root of the tree. If this is a root, m_uf_parent + * is equal to this itself. */ + TrimTxData* m_uf_parent; + /** If this is a root, the total number of transactions in the parition. */ + uint32_t m_uf_count; + /** If this is a root, the total size of transactions in the partition. */ + uint64_t m_uf_size; +}; + +/** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */ +class Cluster +{ + friend class TxGraphImpl; + using GraphIndex = TxGraph::GraphIndex; + using SetType = BitSet; + /** The DepGraph for this cluster, holding all feerates, and ancestors/descendants. */ + DepGraph m_depgraph; + /** m_mapping[i] gives the GraphIndex for the position i transaction in m_depgraph. Values for + * positions i that do not exist in m_depgraph shouldn't ever be accessed and thus don't + * matter. m_mapping.size() equals m_depgraph.PositionRange(). */ + std::vector m_mapping; + /** The current linearization of the cluster. m_linearization.size() equals + * m_depgraph.TxCount(). This is always kept topological. */ + std::vector m_linearization; + /** The quality level of m_linearization. */ + QualityLevel m_quality{QualityLevel::NONE}; + /** Which position this Cluster has in Graph::ClusterSet::m_clusters[m_quality]. */ + ClusterSetIndex m_setindex{ClusterSetIndex(-1)}; + /** Which level this Cluster is at in the graph (-1=not inserted, 0=main, 1=staging). */ + int m_level{-1}; + +public: + /** Construct an empty Cluster. */ + Cluster() noexcept = default; + /** Construct a singleton Cluster. */ + explicit Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept; + + // Cannot move or copy (would invalidate Cluster* in Locator and ClusterSet). */ + Cluster(const Cluster&) = delete; + Cluster& operator=(const Cluster&) = delete; + Cluster(Cluster&&) = delete; + Cluster& operator=(Cluster&&) = delete; + + // Generic helper functions. + + /** Whether the linearization of this Cluster can be exposed. */ + bool IsAcceptable(bool after_split = false) const noexcept + { + return m_quality == QualityLevel::ACCEPTABLE || m_quality == QualityLevel::OPTIMAL || + (after_split && m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE); + } + /** Whether the linearization of this Cluster is optimal. */ + bool IsOptimal() const noexcept + { + return m_quality == QualityLevel::OPTIMAL; + } + /** Whether this cluster is oversized (just due to the size of its transaction(s), not due to + * dependencies that are yet to be added. */ + bool IsOversized() const noexcept { return m_quality == QualityLevel::OVERSIZED; } + /** Whether this cluster requires splitting. */ + bool NeedsSplitting() const noexcept + { + return m_quality == QualityLevel::NEEDS_SPLIT || + m_quality == QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } + /** Get the number of transactions in this Cluster. */ + LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Get the total size of the transactions in this Cluster. */ + uint64_t GetTxSize() const noexcept; + /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */ + GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept { return m_mapping[index]; } + /** Only called by Graph::SwapIndexes. */ + void UpdateMapping(DepGraphIndex cluster_idx, GraphIndex graph_idx) noexcept { m_mapping[cluster_idx] = graph_idx; } + /** Push changes to Cluster and its linearization to the TxGraphImpl Entry objects. */ + void Updated(TxGraphImpl& graph) noexcept; + /** Create a copy of this Cluster, returning a pointer to it (used by PullIn). */ + Cluster* CopyTo(TxGraphImpl& graph, int to_level) const noexcept; + /** Get the list of Clusters that conflict with this one (at the level below this Cluster). */ + void GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept; + /** Mark all the Entry objects belonging to this Cluster as missing. The Cluster must be + * deleted immediately after. */ + void MakeTransactionsMissing(TxGraphImpl& graph) noexcept; + /** Remove all transactions from a Cluster. */ + void Clear(TxGraphImpl& graph) noexcept; + /** Change a Cluster's level from level to level-1. */ + void LevelDown(TxGraphImpl& graph) noexcept; + + // Functions that implement the Cluster-specific side of internal TxGraphImpl mutations. + + /** Apply all removals from the front of to_remove that apply to this Cluster, popping them + * off. These must be at least one such entry. */ + void ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept; + /** Split this cluster (must have a NEEDS_SPLIT* quality). Returns whether to delete this + * Cluster afterwards. */ + [[nodiscard]] bool Split(TxGraphImpl& graph) noexcept; + /** Move all transactions from cluster to *this (as separate components). */ + void Merge(TxGraphImpl& graph, Cluster& cluster) noexcept; + /** Given a span of (parent, child) pairs that all belong to this Cluster, apply them. */ + void ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept; + /** Improve the linearization of this Cluster. */ + void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; + /** For every chunk in the cluster, append its FeeFrac to ret. */ + void AppendChunkFeerates(std::vector& ret) const noexcept; + /** Add a TrimTxData entry for every transaction in the Cluster to ret. Implicit dependencies + * between consecutive transactions in the linearization are added to deps. Return the + * Cluster's combined transaction size. */ + uint64_t AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept; + + // Functions that implement the Cluster-specific side of public TxGraph functions. + + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their ancestors to output. */ + void GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; + /** Process elements from the front of args that apply to this cluster, and append Refs for the + * union of their descendants to output. */ + void GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept; + /** Get a vector of Refs for all elements of this Cluster, in linearization order. Returns + * the range ends at the end of the cluster. */ + bool GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept; + /** Get the individual transaction feerate of a Cluster element. */ + FeePerWeight GetIndividualFeerate(DepGraphIndex idx) noexcept; + /** Modify the fee of a Cluster element. */ + void SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept; + + // Debugging functions. + + void SanityCheck(const TxGraphImpl& graph, int level) const; +}; + +/** The transaction graph, including staged changes. + * + * The overall design of the data structure consists of 3 interlinked representations: + * - The transactions (held as a vector of TxGraphImpl::Entry inside TxGraphImpl). + * - The clusters (Cluster objects in per-quality vectors inside TxGraphImpl::ClusterSet). + * - The Refs (TxGraph::Ref objects, held externally by users of the TxGraph class) + * + * The Clusters are kept in one or two ClusterSet objects, one for the "main" graph, and one for + * the proposed changes ("staging"). If a transaction occurs in both, they share the same Entry, + * but there will be a separate Cluster per graph. + * + * Clusters and Refs contain the index of the Entry objects they refer to, and the Entry objects + * refer back to the Clusters and Refs the corresponding transaction is contained in. + * + * While redundant, this permits moving all of them independently, without invalidating things + * or costly iteration to fix up everything: + * - Entry objects can be moved to fill holes left by removed transactions in the Entry vector + * (see TxGraphImpl::Compact). + * - Clusters can be rewritten continuously (removals can cause them to split, new dependencies + * can cause them to be merged). + * - Ref objects can be held outside the class, while permitting them to be moved around, and + * inherited from. + */ +class TxGraphImpl final : public TxGraph +{ + friend class Cluster; + friend class BlockBuilderImpl; +private: + /** Internal RNG. */ + FastRandomContext m_rng; + /** This TxGraphImpl's maximum cluster count limit. */ + const DepGraphIndex m_max_cluster_count; + /** This TxGraphImpl's maximum cluster size limit. */ + const uint64_t m_max_cluster_size; + + /** Information about one group of Clusters to be merged. */ + struct GroupEntry + { + /** Where the clusters to be merged start in m_group_clusters. */ + uint32_t m_cluster_offset; + /** How many clusters to merge. */ + uint32_t m_cluster_count; + /** Where the dependencies for this cluster group in m_deps_to_add start. */ + uint32_t m_deps_offset; + /** How many dependencies to add. */ + uint32_t m_deps_count; + }; + + /** Information about all groups of Clusters to be merged. */ + struct GroupData + { + /** The groups of Clusters to be merged. */ + std::vector m_groups; + /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ + std::vector m_group_clusters; + /** Whether at least one of the groups cannot be applied because it would result in a + * Cluster that violates the cluster count limit. */ + bool m_group_oversized; + }; + + /** The collection of all Clusters in main or staged. */ + struct ClusterSet + { + /** The vectors of clusters, one vector per quality level. ClusterSetIndex indexes into each. */ + std::vector> m_clusters[int(QualityLevel::NONE)]; + /** Which removals have yet to be applied. */ + std::vector m_to_remove; + /** Which dependencies are to be added ((parent,child) pairs). GroupData::m_deps_offset indexes + * into this. */ + std::vector> m_deps_to_add; + /** Information about the merges to be performed, if known. */ + std::optional m_group_data = GroupData{}; + /** Which entries were removed in this ClusterSet (so they can be wiped on abort). */ + std::vector m_removed; + /** Total number of transactions in this graph (sum of all transaction counts in all + * Clusters, and for staging also those inherited from the main ClusterSet). */ + GraphIndex m_txcount{0}; + /** Total number of individually oversized transactions in the graph. */ + GraphIndex m_txcount_oversized{0}; + /** Whether this graph is oversized (if known). This roughly matches + * m_group_data->m_group_oversized || (m_txcount_oversized > 0), but may be known even if + * m_group_data is not. */ + std::optional m_oversized{false}; + }; + + /** The ClusterSets in this TxGraphImpl. Has exactly 1 (main) or exactly 2 elements (main and staged). */ + std::vector m_clustersets; + + /** Information about a chunk in the main graph. */ + struct ChunkData + { + /** The Entry which is the last transaction of the chunk. */ + mutable GraphIndex m_graph_index; + /** How many transactions the chunk contains (-1 = singleton tail of cluster). */ + LinearizationIndex m_chunk_count; + + ChunkData(GraphIndex graph_index, LinearizationIndex chunk_count) noexcept : + m_graph_index{graph_index}, m_chunk_count{chunk_count} {} + }; + + /** Comparator for ChunkData objects in mining order. */ + class ChunkOrder + { + const TxGraphImpl* const m_graph; + public: + explicit ChunkOrder(const TxGraphImpl* graph) : m_graph(graph) {} + + bool operator()(const ChunkData& a, const ChunkData& b) const noexcept + { + const auto& a_entry = m_graph->m_entries[a.m_graph_index]; + const auto& b_entry = m_graph->m_entries[b.m_graph_index]; + // First sort from high feerate to low feerate. + auto cmp_feerate = FeeRateCompare(a_entry.m_main_chunk_feerate, b_entry.m_main_chunk_feerate); + if (cmp_feerate != 0) return cmp_feerate > 0; + // Then sort by increasing Cluster pointer. + Assume(a_entry.m_locator[0].IsPresent()); + Assume(b_entry.m_locator[0].IsPresent()); + if (a_entry.m_locator[0].cluster != b_entry.m_locator[0].cluster) { + return std::less{}(a_entry.m_locator[0].cluster, b_entry.m_locator[0].cluster); + } + // Finally sort by position within the Cluster. + return a_entry.m_main_lin_index < b_entry.m_main_lin_index; + } + }; + + /** Definition for the mining index type. */ + using ChunkIndex = std::set; + + /** Index of ChunkData objects. */ + ChunkIndex m_chunkindex; + /** Number of index-observing objects in existence (BlockBuilderImpl). */ + size_t m_chunkindex_observers{0}; + /** Cache of discarded ChunkIndex node handles. */ + std::vector m_chunkindex_discarded; + + /** A Locator that describes whether, where, and in which Cluster an Entry appears. + * Every Entry has MAX_LEVELS locators, as it may appear in one Cluster per level. + * + * Each level of a Locator is in one of three states: + * + * - (P)resent: actually occurs in a Cluster at that level. + * + * - (M)issing: + * - In the main graph: the transaction does not exist in main. + * - In the staging graph: the transaction's existence is the same as in main. If it doesn't + * exist in main, (M) in staging means it does not exist there + * either. If it does exist in main, (M) in staging means the + * cluster it is in has not been modified in staging, and thus the + * transaction implicitly exists in staging too (without explicit + * Cluster object; see PullIn() to create it in staging too). + * + * - (R)emoved: only possible in staging; it means the transaction exists in main, but is + * removed in staging. + * + * The following combinations are possible: + * - (M,M): the transaction doesn't exist in either graph. + * - (P,M): the transaction exists in both, but only exists explicitly in a Cluster object in + * main. Its existence in staging is inherited from main. + * - (P,P): the transaction exists in both, and is materialized in both. Thus, the clusters + * and/or their linearizations may be different in main and staging. + * - (M,P): the transaction is added in staging, and does not exist in main. + * - (P,R): the transaction exists in main, but is removed in staging. + * + * When staging does not exist, only (M,M) and (P,M) are possible. + */ + struct Locator + { + /** Which Cluster the Entry appears in (nullptr = missing). */ + Cluster* cluster{nullptr}; + /** Where in the Cluster it appears (if cluster == nullptr: 0 = missing, -1 = removed). */ + DepGraphIndex index{0}; + + /** Mark this Locator as missing (= same as lower level, or non-existing if level 0). */ + void SetMissing() noexcept { cluster = nullptr; index = 0; } + /** Mark this Locator as removed (not allowed in level 0). */ + void SetRemoved() noexcept { cluster = nullptr; index = DepGraphIndex(-1); } + /** Mark this Locator as present, in the specified Cluster. */ + void SetPresent(Cluster* c, DepGraphIndex i) noexcept { cluster = c; index = i; } + /** Check if this Locator is missing. */ + bool IsMissing() const noexcept { return cluster == nullptr && index == 0; } + /** Check if this Locator is removed. */ + bool IsRemoved() const noexcept { return cluster == nullptr && index == DepGraphIndex(-1); } + /** Check if this Locator is present (in some Cluster). */ + bool IsPresent() const noexcept { return cluster != nullptr; } + }; + + /** A class of objects held internally in TxGraphImpl, with information about a single + * transaction. */ + struct Entry + { + /** Pointer to the corresponding Ref object if any, or nullptr if unlinked. */ + Ref* m_ref{nullptr}; + /** Iterator to the corresponding ChunkData, if any. */ + ChunkIndex::iterator m_chunkindex_iterator; + /** Which Cluster and position therein this Entry appears in. ([0] = main, [1] = staged). */ + Locator m_locator[MAX_LEVELS]; + /** The chunk feerate of this transaction in main (if present in m_locator[0]). */ + FeePerWeight m_main_chunk_feerate; + /** The position this transaction has in the main linearization (if present). */ + LinearizationIndex m_main_lin_index; + }; + + /** The set of all transactions (in all levels combined). GraphIndex values index into this. */ + std::vector m_entries; + + /** Set of Entries which have no linked Ref anymore. */ + std::vector m_unlinked; + +public: + /** Construct a new TxGraphImpl with the specified limits. */ + explicit TxGraphImpl(DepGraphIndex max_cluster_count, uint64_t max_cluster_size) noexcept : + m_max_cluster_count(max_cluster_count), + m_max_cluster_size(max_cluster_size), + m_chunkindex(ChunkOrder(this)) + { + Assume(max_cluster_count >= 1); + Assume(max_cluster_count <= MAX_CLUSTER_COUNT_LIMIT); + m_clustersets.reserve(MAX_LEVELS); + m_clustersets.emplace_back(); + } + + // Cannot move or copy (would invalidate TxGraphImpl* in Ref, MiningOrder, EvictionOrder). + TxGraphImpl(const TxGraphImpl&) = delete; + TxGraphImpl& operator=(const TxGraphImpl&) = delete; + TxGraphImpl(TxGraphImpl&&) = delete; + TxGraphImpl& operator=(TxGraphImpl&&) = delete; + + // Simple helper functions. + + /** Swap the Entrys referred to by a and b. */ + void SwapIndexes(GraphIndex a, GraphIndex b) noexcept; + /** If idx exists in the specified level ClusterSet (explicitly, or in the level below and not + * removed), return the Cluster it is in. Otherwise, return nullptr. */ + Cluster* FindCluster(GraphIndex idx, int level) const noexcept; + /** Extract a Cluster from its ClusterSet. */ + std::unique_ptr ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept; + /** Delete a Cluster. */ + void DeleteCluster(Cluster& cluster) noexcept; + /** Insert a Cluster into its ClusterSet. */ + ClusterSetIndex InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept; + /** Change the QualityLevel of a Cluster (identified by old_quality and old_index). */ + void SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept; + /** Make a transaction not exist at a specified level. It must currently exist there. */ + void ClearLocator(int level, GraphIndex index, bool oversized_tx) noexcept; + /** Find which Clusters conflict with the top level. */ + std::vector GetConflicts() const noexcept; + /** Clear an Entry's ChunkData. */ + void ClearChunkData(Entry& entry) noexcept; + /** Give an Entry a ChunkData object. */ + void CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept; + + // Functions for handling Refs. + + /** Only called by Ref's move constructor/assignment to update Ref locations. */ + void UpdateRef(GraphIndex idx, Ref& new_location) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + entry.m_ref = &new_location; + } + + /** Only called by Ref::~Ref to unlink Refs, and Ref's move assignment. */ + void UnlinkRef(GraphIndex idx) noexcept final + { + auto& entry = m_entries[idx]; + Assume(entry.m_ref != nullptr); + Assume(m_chunkindex_observers == 0 || !entry.m_locator[0].IsPresent()); + entry.m_ref = nullptr; + // Mark the transaction as to be removed in all levels where it explicitly or implicitly + // exists. + bool exists_anywhere{false}; + bool exists{false}; + for (size_t level = 0; level < m_clustersets.size(); ++level) { + if (entry.m_locator[level].IsPresent()) { + exists_anywhere = true; + exists = true; + } else if (entry.m_locator[level].IsRemoved()) { + exists = false; + } + if (exists) { + m_clustersets[level].m_to_remove.push_back(idx); + // Force recomputation of grouping data. + m_clustersets[level].m_group_data = std::nullopt; + // Do not wipe the oversized state of a lower level graph (main) if a higher level + // one (staging) exists. The reason for this is that the alternative would mean that + // cluster merges may need to be applied to a formerly-oversized main graph while + // staging exists (to satisfy chunk feerate queries into main, for example), and such + // merges could conflict with pulls of some of their constituents into staging. + if (level == m_clustersets.size() - 1 && m_clustersets[level].m_oversized == true) { + m_clustersets[level].m_oversized = std::nullopt; + } + } + } + m_unlinked.push_back(idx); + if (!exists_anywhere) Compact(); + } + + // Functions related to various normalization/application steps. + /** Get rid of unlinked Entry objects in m_entries, if possible (this changes the GraphIndex + * values for remaining Entrys, so this only does something when no to-be-applied operations + * or staged removals referring to GraphIndexes remain). */ + void Compact() noexcept; + /** If cluster is not in to_level, copy it there, and return a pointer to it. This has no + * effect if to_level is 0 (main), but for to_level=1 (staging) this modifies the locators of + * its transactions from inherited (P,M) to explicit (P,P). */ + Cluster* PullIn(Cluster* cluster, int to_level) noexcept; + /** Apply all removals queued up in m_to_remove to the relevant Clusters (which get a + * NEEDS_SPLIT* QualityLevel) up to the specified level. */ + void ApplyRemovals(int up_to_level) noexcept; + /** Split an individual cluster. */ + void Split(Cluster& cluster) noexcept; + /** Split all clusters that need splitting up to the specified level. */ + void SplitAll(int up_to_level) noexcept; + /** Populate m_group_data based on m_deps_to_add in the specified level. */ + void GroupClusters(int level) noexcept; + /** Merge the specified clusters. */ + void Merge(std::span to_merge) noexcept; + /** Apply all m_deps_to_add to the relevant Clusters in the specified level. */ + void ApplyDependencies(int level) noexcept; + /** Make a specified Cluster have quality ACCEPTABLE or OPTIMAL. */ + void MakeAcceptable(Cluster& cluster) noexcept; + /** Make all Clusters at the specified level have quality ACCEPTABLE or OPTIMAL. */ + void MakeAllAcceptable(int level) noexcept; + + // Implementations for the public TxGraph interface. + + Ref AddTransaction(const FeePerWeight& feerate) noexcept final; + void RemoveTransaction(const Ref& arg) noexcept final; + void AddDependency(const Ref& parent, const Ref& child) noexcept final; + void SetTransactionFee(const Ref&, int64_t fee) noexcept final; + + void DoWork() noexcept final; + + void StartStaging() noexcept final; + void CommitStaging() noexcept final; + void AbortStaging() noexcept final; + bool HaveStaging() const noexcept final { return m_clustersets.size() > 1; } + + bool Exists(const Ref& arg, bool main_only = false) noexcept final; + FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept final; + FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept final; + std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept final; + std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept final; + std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept final; + GraphIndex GetTransactionCount(bool main_only = false) noexcept final; + bool IsOversized(bool main_only = false) noexcept final; + std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; + GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; + std::pair, std::vector> GetMainStagingDiagrams() noexcept final; + std::vector Trim() noexcept final; + + std::unique_ptr GetBlockBuilder() noexcept final; + std::pair, FeePerWeight> GetWorstMainChunk() noexcept final; + + void SanityCheck() const final; +}; + +/** Implementation of the TxGraph::BlockBuilder interface. */ +class BlockBuilderImpl final : public TxGraph::BlockBuilder +{ + /** Which TxGraphImpl this object is doing block building for. It will have its + * m_chunkindex_observers incremented as long as this BlockBuilderImpl exists. */ + TxGraphImpl* const m_graph; + /** Vector for actual storage pointed to by TxGraph::BlockBuilder::m_current_chunk. */ + std::vector m_chunkdata; + /** Which cluster the current chunk belongs to, so we can exclude further transaction from it + * when that chunk is skipped, or std::nullopt if we're at the end of the current cluster. */ + std::optional m_remaining_cluster{nullptr}; + /** Clusters which we're not including further transactions from. */ + std::set m_excluded_clusters; + /** Iterator to the next chunk (after the current one) in the chunk index. end() if nothing + * further remains. */ + TxGraphImpl::ChunkIndex::const_iterator m_next_iter; + + /** Fill in information about the current chunk in m_current_chunk, m_chunkdata, + * m_remaining_cluster, and update m_next_iter. */ + void Next() noexcept; + +public: + /** Construct a new BlockBuilderImpl to build blocks for the provided graph. */ + BlockBuilderImpl(TxGraphImpl& graph) noexcept; + + // Implement the public interface. + ~BlockBuilderImpl() final; + void Include() noexcept final; + void Skip() noexcept final; +}; + +void TxGraphImpl::ClearChunkData(Entry& entry) noexcept +{ + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + Assume(m_chunkindex_observers == 0); + // If the Entry has a non-empty m_chunkindex_iterator, extract it, and move the handle + // to the cache of discarded chunkindex entries. + m_chunkindex_discarded.emplace_back(m_chunkindex.extract(entry.m_chunkindex_iterator)); + entry.m_chunkindex_iterator = m_chunkindex.end(); + } +} + +void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count) noexcept +{ + auto& entry = m_entries[idx]; + if (!m_chunkindex_discarded.empty()) { + // Reuse an discarded node handle. + auto& node = m_chunkindex_discarded.back().value(); + node.m_graph_index = idx; + node.m_chunk_count = chunk_count; + auto insert_result = m_chunkindex.insert(std::move(m_chunkindex_discarded.back())); + Assume(insert_result.inserted); + entry.m_chunkindex_iterator = insert_result.position; + m_chunkindex_discarded.pop_back(); + } else { + // Construct a new entry. + auto emplace_result = m_chunkindex.emplace(idx, chunk_count); + Assume(emplace_result.second); + entry.m_chunkindex_iterator = emplace_result.first; + } +} + +uint64_t Cluster::GetTxSize() const noexcept +{ + uint64_t ret{0}; + for (auto i : m_linearization) { + ret += m_depgraph.FeeRate(i).size; + } + return ret; +} + +void TxGraphImpl::ClearLocator(int level, GraphIndex idx, bool oversized_tx) noexcept +{ + auto& entry = m_entries[idx]; + Assume(entry.m_locator[level].IsPresent()); + // Change the locator from Present to Missing or Removed. + if (level == 0 || !entry.m_locator[level - 1].IsPresent()) { + entry.m_locator[level].SetMissing(); + } else { + entry.m_locator[level].SetRemoved(); + m_clustersets[level].m_removed.push_back(idx); + } + // Update the transaction count. + --m_clustersets[level].m_txcount; + m_clustersets[level].m_txcount_oversized -= oversized_tx; + // Adjust the status of Locators of this transaction at higher levels. + for (size_t after_level = level + 1; after_level < m_clustersets.size(); ++after_level) { + if (entry.m_locator[after_level].IsPresent()) { + break; + } else if (entry.m_locator[after_level].IsRemoved()) { + entry.m_locator[after_level].SetMissing(); + break; + } else { + --m_clustersets[after_level].m_txcount; + m_clustersets[after_level].m_txcount_oversized -= oversized_tx; + } + } + if (level == 0) ClearChunkData(entry); +} + +void Cluster::Updated(TxGraphImpl& graph) noexcept +{ + // Update all the Locators for this Cluster's Entrys. + for (DepGraphIndex idx : m_linearization) { + auto& entry = graph.m_entries[m_mapping[idx]]; + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); + entry.m_locator[m_level].SetPresent(this, idx); + } + // If this is for the main graph (level = 0), and the Cluster's quality is ACCEPTABLE or + // OPTIMAL, compute its chunking and store its information in the Entry's m_main_lin_index + // and m_main_chunk_feerate. These fields are only accessed after making the entire graph + // ACCEPTABLE, so it is pointless to compute these if we haven't reached that quality level + // yet. + if (m_level == 0 && IsAcceptable()) { + LinearizationChunking chunking(m_depgraph, m_linearization); + LinearizationIndex lin_idx{0}; + // Iterate over the chunks. + for (unsigned chunk_idx = 0; chunk_idx < chunking.NumChunksLeft(); ++chunk_idx) { + auto chunk = chunking.GetChunk(chunk_idx); + auto chunk_count = chunk.transactions.Count(); + Assume(chunk_count > 0); + // Iterate over the transactions in the linearization, which must match those in chunk. + while (true) { + DepGraphIndex idx = m_linearization[lin_idx]; + GraphIndex graph_idx = m_mapping[idx]; + auto& entry = graph.m_entries[graph_idx]; + entry.m_main_lin_index = lin_idx++; + entry.m_main_chunk_feerate = FeePerWeight::FromFeeFrac(chunk.feerate); + Assume(chunk.transactions[idx]); + chunk.transactions.Reset(idx); + if (chunk.transactions.None()) { + // Last transaction in the chunk. + if (chunk_count == 1 && chunk_idx + 1 == chunking.NumChunksLeft()) { + // If this is the final chunk of the cluster, and it contains just a single + // transaction (which will always be true for the very common singleton + // clusters), store the special value -1 as chunk count. + chunk_count = LinearizationIndex(-1); + } + graph.CreateChunkData(graph_idx, chunk_count); + break; + } + } + } + } +} + +void Cluster::GetConflicts(const TxGraphImpl& graph, std::vector& out) const noexcept +{ + for (auto i : m_linearization) { + auto& entry = graph.m_entries[m_mapping[i]]; + // For every transaction Entry in this Cluster, if it also exists in a lower-level Cluster, + // then that Cluster conflicts. + if (entry.m_locator[m_level - 1].IsPresent()) { + out.push_back(entry.m_locator[m_level - 1].cluster); + } + } +} + +std::vector TxGraphImpl::GetConflicts() const noexcept +{ + int level = m_clustersets.size() - 1; + std::vector ret; + // All Clusters at level-1 containing transactions in m_removed (so (P,R) ones) are conflicts. + for (auto i : m_clustersets[level].m_removed) { + auto& entry = m_entries[i]; + if (entry.m_locator[level - 1].IsPresent()) { + ret.push_back(entry.m_locator[level - 1].cluster); + } + } + // Then go over all Clusters at this level, and find their conflicts (the (P,P) ones). + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& clusters = m_clustersets[level].m_clusters[quality]; + for (const auto& cluster : clusters) { + cluster->GetConflicts(*this, ret); + } + } + // Deduplicate the result (the same Cluster may appear multiple times). + std::sort(ret.begin(), ret.end()); + ret.erase(std::unique(ret.begin(), ret.end()), ret.end()); + return ret; +} + +Cluster* Cluster::CopyTo(TxGraphImpl& graph, int to_level) const noexcept +{ + // Construct an empty Cluster. + auto ret = std::make_unique(); + auto ptr = ret.get(); + // Copy depgraph, mapping, and linearization/ + ptr->m_depgraph = m_depgraph; + ptr->m_mapping = m_mapping; + ptr->m_linearization = m_linearization; + // Insert the new Cluster into the graph. + graph.InsertCluster(to_level, std::move(ret), m_quality); + // Update its Locators. + ptr->Updated(graph); + return ptr; +} + +void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove) noexcept +{ + // Iterate over the prefix of to_remove that applies to this cluster. + Assume(!to_remove.empty()); + SetType todo; + do { + GraphIndex idx = to_remove.front(); + Assume(idx < graph.m_entries.size()); + auto& entry = graph.m_entries[idx]; + auto& locator = entry.m_locator[m_level]; + // Stop once we hit an entry that applies to another Cluster. + if (locator.cluster != this) break; + // - Remember it in a set of to-remove DepGraphIndexes. + todo.Set(locator.index); + // - Remove from m_mapping. This isn't strictly necessary as unused positions in m_mapping + // are just never accessed, but set it to -1 here to increase the ability to detect a bug + // that causes it to be accessed regardless. + m_mapping[locator.index] = GraphIndex(-1); + // - Remove its linearization index from the Entry (if in main). + if (m_level == 0) { + entry.m_main_lin_index = LinearizationIndex(-1); + } + // - Mark it as missing/removed in the Entry's locator. + bool oversized_tx = uint64_t(m_depgraph.FeeRate(locator.index).size) > graph.m_max_cluster_size; + graph.ClearLocator(m_level, idx, oversized_tx); + to_remove = to_remove.subspan(1); + } while(!to_remove.empty()); + + auto quality = m_quality; + Assume(todo.Any()); + // Wipe from the Cluster's DepGraph (this is O(n) regardless of the number of entries + // removed, so we benefit from batching all the removals). + m_depgraph.RemoveTransactions(todo); + m_mapping.resize(m_depgraph.PositionRange()); + + // First remove all removals at the end of the linearization. + while (!m_linearization.empty() && todo[m_linearization.back()]) { + todo.Reset(m_linearization.back()); + m_linearization.pop_back(); + } + if (m_linearization.empty()) { + // Empty Cluster, it just needs to be deleted. Any NEEDS_SPLIT_* state will work. + // This will always be hit when the input is QualityLevel::OVERSIZED. + quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } else if (todo.None()) { + // If no further removals remain, and thus all removals were at the end, we may be able + // to leave the cluster at a better quality level. + Assume(quality != QualityLevel::OVERSIZED); + if (IsAcceptable(/*after_split=*/true)) { + quality = QualityLevel::NEEDS_SPLIT_ACCEPTABLE; + } else { + quality = QualityLevel::NEEDS_SPLIT; + } + } else { + // If more removals remain, filter those out of m_linearization. + m_linearization.erase(std::remove_if( + m_linearization.begin(), + m_linearization.end(), + [&](auto pos) { return todo[pos]; }), m_linearization.end()); + quality = QualityLevel::NEEDS_SPLIT; + } + graph.SetClusterQuality(m_level, m_quality, m_setindex, quality); + Updated(graph); +} + +void Cluster::Clear(TxGraphImpl& graph) noexcept +{ + for (auto i : m_linearization) { + // We do not care about setting oversized_tx accurately here, because this function is only + // applied to main-graph Clusters in CommitStaging, which will overwrite main's + // m_txcount_oversized anyway with the staging graph's value. + graph.ClearLocator(m_level, m_mapping[i], /*oversized_tx=*/false); + } + m_depgraph = {}; + m_linearization.clear(); + m_mapping.clear(); +} + +void Cluster::LevelDown(TxGraphImpl& graph) noexcept +{ + int level = m_level; + Assume(level > 0); + for (auto i : m_linearization) { + GraphIndex idx = m_mapping[i]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[level].SetMissing(); + } + auto quality = m_quality; + auto cluster = graph.ExtractCluster(level, quality, m_setindex); + graph.InsertCluster(level - 1, std::move(cluster), quality); + Updated(graph); +} + +void Cluster::AppendChunkFeerates(std::vector& ret) const noexcept +{ + auto chunk_feerates = ChunkLinearization(m_depgraph, m_linearization); + for (const auto& feerate : chunk_feerates) { + ret.push_back(feerate); + } +} + +uint64_t Cluster::AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept +{ + LinearizationChunking linchunking(m_depgraph, m_linearization); + LinearizationIndex pos{0}; + uint64_t size{0}; + auto prev_index = GraphIndex(-1); + // Iterate over the chunks of this cluster's linearization. + for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) { + const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i); + // Iterate over the transactions of that chunk, in linearization order. + auto chunk_tx_count = chunk.Count(); + for (unsigned j = 0; j < chunk_tx_count; ++j) { + auto cluster_idx = m_linearization[pos]; + // The transaction must appear in the chunk. + Assume(chunk[cluster_idx]); + // Construct a new element in ret. + auto& entry = ret.emplace_back(); + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk_feerate); + entry.m_index = m_mapping[cluster_idx]; + // If this is not the first transaction of the cluster linearization, it has an + // implicit dependency on its predecessor. + if (pos != 0) deps.emplace_back(prev_index, entry.m_index); + prev_index = entry.m_index; + entry.m_tx_size = m_depgraph.FeeRate(cluster_idx).size; + size += entry.m_tx_size; + ++pos; + } + } + return size; +} + +bool Cluster::Split(TxGraphImpl& graph) noexcept +{ + // This function can only be called when the Cluster needs splitting. + Assume(NeedsSplitting()); + // Determine the new quality the split-off Clusters will have. + QualityLevel new_quality = IsAcceptable(/*after_split=*/true) ? QualityLevel::ACCEPTABLE + : QualityLevel::NEEDS_RELINEARIZE; + // If we're going to produce ACCEPTABLE clusters (i.e., when in NEEDS_SPLIT_ACCEPTABLE), we + // need to post-linearize to make sure the split-out versions are all connected (as + // connectivity may have changed by removing part of the cluster). This could be done on each + // resulting split-out cluster separately, but it is simpler to do it once up front before + // splitting. This step is not necessary if the resulting clusters are NEEDS_RELINEARIZE, as + // they will be post-linearized anyway in MakeAcceptable(). + if (new_quality == QualityLevel::ACCEPTABLE) { + PostLinearize(m_depgraph, m_linearization); + } + /** Which positions are still left in this Cluster. */ + auto todo = m_depgraph.Positions(); + /** Mapping from transaction positions in this Cluster to the Cluster where it ends up, and + * its position therein. */ + std::vector> remap(m_depgraph.PositionRange()); + std::vector new_clusters; + bool first{true}; + // Iterate over the connected components of this Cluster's m_depgraph. + while (todo.Any()) { + auto component = m_depgraph.FindConnectedComponent(todo); + if (first && component == todo) { + // The existing Cluster is an entire component. Leave it be, but update its quality. + Assume(todo == m_depgraph.Positions()); + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); + // If this made the quality ACCEPTABLE or OPTIMAL, we need to compute and cache its + // chunking. + Updated(graph); + return false; + } + first = false; + // Construct a new Cluster to hold the found component. + auto new_cluster = std::make_unique(); + new_clusters.push_back(new_cluster.get()); + // Remember that all the component's transactions go to this new Cluster. The positions + // will be determined below, so use -1 for now. + for (auto i : component) { + remap[i] = {new_cluster.get(), DepGraphIndex(-1)}; + } + graph.InsertCluster(m_level, std::move(new_cluster), new_quality); + todo -= component; + } + // Redistribute the transactions. + for (auto i : m_linearization) { + /** The cluster which transaction originally in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy the transaction to the new cluster's depgraph, and remember the position. + remap[i].second = new_cluster->m_depgraph.AddTransaction(m_depgraph.FeeRate(i)); + // Create new mapping entry. + new_cluster->m_mapping.push_back(m_mapping[i]); + // Create a new linearization entry. As we're only appending transactions, they equal the + // DepGraphIndex. + new_cluster->m_linearization.push_back(remap[i].second); + } + // Redistribute the dependencies. + for (auto i : m_linearization) { + /** The cluster transaction in position i is moved to. */ + Cluster* new_cluster = remap[i].first; + // Copy its parents, translating positions. + SetType new_parents; + for (auto par : m_depgraph.GetReducedParents(i)) new_parents.Set(remap[par].second); + new_cluster->m_depgraph.AddDependencies(new_parents, remap[i].second); + } + // Update all the Locators of moved transactions. + for (Cluster* new_cluster : new_clusters) { + new_cluster->Updated(graph); + } + // Wipe this Cluster, and return that it needs to be deleted. + m_depgraph = DepGraph{}; + m_mapping.clear(); + m_linearization.clear(); + return true; +} + +void Cluster::Merge(TxGraphImpl& graph, Cluster& other) noexcept +{ + /** Vector to store the positions in this Cluster for each position in other. */ + std::vector remap(other.m_depgraph.PositionRange()); + // Iterate over all transactions in the other Cluster (the one being absorbed). + for (auto pos : other.m_linearization) { + auto idx = other.m_mapping[pos]; + // Copy the transaction into this Cluster, and remember its position. + auto new_pos = m_depgraph.AddTransaction(other.m_depgraph.FeeRate(pos)); + remap[pos] = new_pos; + if (new_pos == m_mapping.size()) { + m_mapping.push_back(idx); + } else { + m_mapping[new_pos] = idx; + } + m_linearization.push_back(new_pos); + // Copy the transaction's dependencies, translating them using remap. Note that since + // pos iterates over other.m_linearization, which is in topological order, all parents + // of pos should already be in remap. + SetType parents; + for (auto par : other.m_depgraph.GetReducedParents(pos)) { + parents.Set(remap[par]); + } + m_depgraph.AddDependencies(parents, remap[pos]); + // Update the transaction's Locator. There is no need to call Updated() to update chunk + // feerates, as Updated() will be invoked by Cluster::ApplyDependencies on the resulting + // merged Cluster later anyway). + auto& entry = graph.m_entries[idx]; + // Discard any potential ChunkData prior to modifying the Cluster (as that could + // invalidate its ordering). + if (m_level == 0) graph.ClearChunkData(entry); + entry.m_locator[m_level].SetPresent(this, new_pos); + } + // Purge the other Cluster, now that everything has been moved. + other.m_depgraph = DepGraph{}; + other.m_linearization.clear(); + other.m_mapping.clear(); +} + +void Cluster::ApplyDependencies(TxGraphImpl& graph, std::span> to_apply) noexcept +{ + // This function is invoked by TxGraphImpl::ApplyDependencies after merging groups of Clusters + // between which dependencies are added, which simply concatenates their linearizations. Invoke + // PostLinearize, which has the effect that the linearization becomes a merge-sort of the + // constituent linearizations. Do this here rather than in Cluster::Merge, because this + // function is only invoked once per merged Cluster, rather than once per constituent one. + // This concatenation + post-linearization could be replaced with an explicit merge-sort. + PostLinearize(m_depgraph, m_linearization); + + // Sort the list of dependencies to apply by child, so those can be applied in batch. + std::sort(to_apply.begin(), to_apply.end(), [](auto& a, auto& b) { return a.second < b.second; }); + // Iterate over groups of to-be-added dependencies with the same child. + auto it = to_apply.begin(); + while (it != to_apply.end()) { + auto& first_child = graph.m_entries[it->second].m_locator[m_level]; + const auto child_idx = first_child.index; + // Iterate over all to-be-added dependencies within that same child, gather the relevant + // parents. + SetType parents; + while (it != to_apply.end()) { + auto& child = graph.m_entries[it->second].m_locator[m_level]; + auto& parent = graph.m_entries[it->first].m_locator[m_level]; + Assume(child.cluster == this && parent.cluster == this); + if (child.index != child_idx) break; + parents.Set(parent.index); + ++it; + } + // Push all dependencies to the underlying DepGraph. Note that this is O(N) in the size of + // the cluster, regardless of the number of parents being added, so batching them together + // has a performance benefit. + m_depgraph.AddDependencies(parents, child_idx); + } + + // Finally fix the linearization, as the new dependencies may have invalidated the + // linearization, and post-linearize it to fix up the worst problems with it. + FixLinearization(m_depgraph, m_linearization); + PostLinearize(m_depgraph, m_linearization); + + // Finally push the changes to graph.m_entries. + Updated(graph); +} + +std::unique_ptr TxGraphImpl::ExtractCluster(int level, QualityLevel quality, ClusterSetIndex setindex) noexcept +{ + Assume(quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; + Assume(setindex < quality_clusters.size()); + + // Extract the Cluster-owning unique_ptr. + std::unique_ptr ret = std::move(quality_clusters[setindex]); + ret->m_quality = QualityLevel::NONE; + ret->m_setindex = ClusterSetIndex(-1); + ret->m_level = -1; + + // Clean up space in quality_cluster. + auto max_setindex = quality_clusters.size() - 1; + if (setindex != max_setindex) { + // If the cluster was not the last element of quality_clusters, move that to take its place. + quality_clusters.back()->m_setindex = setindex; + quality_clusters.back()->m_level = level; + quality_clusters[setindex] = std::move(quality_clusters.back()); + } + // The last element of quality_clusters is now unused; drop it. + quality_clusters.pop_back(); + + return ret; +} + +ClusterSetIndex TxGraphImpl::InsertCluster(int level, std::unique_ptr&& cluster, QualityLevel quality) noexcept +{ + // Cannot insert with quality level NONE (as that would mean not inserted). + Assume(quality != QualityLevel::NONE); + // The passed-in Cluster must not currently be in the TxGraphImpl. + Assume(cluster->m_quality == QualityLevel::NONE); + // The specified level must exist. + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + + // Append it at the end of the relevant TxGraphImpl::m_cluster. + auto& clusterset = m_clustersets[level]; + auto& quality_clusters = clusterset.m_clusters[int(quality)]; + ClusterSetIndex ret = quality_clusters.size(); + cluster->m_quality = quality; + cluster->m_setindex = ret; + cluster->m_level = level; + quality_clusters.push_back(std::move(cluster)); + return ret; +} + +void TxGraphImpl::SetClusterQuality(int level, QualityLevel old_quality, ClusterSetIndex old_index, QualityLevel new_quality) noexcept +{ + Assume(new_quality != QualityLevel::NONE); + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + + // Don't do anything if the quality did not change. + if (old_quality == new_quality) return; + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(level, old_quality, old_index); + // And re-insert it where it belongs. + InsertCluster(level, std::move(cluster_ptr), new_quality); +} + +void TxGraphImpl::DeleteCluster(Cluster& cluster) noexcept +{ + // Extract the cluster from where it currently resides. + auto cluster_ptr = ExtractCluster(cluster.m_level, cluster.m_quality, cluster.m_setindex); + // And throw it away. + cluster_ptr.reset(); +} + +Cluster* TxGraphImpl::FindCluster(GraphIndex idx, int level) const noexcept +{ + Assume(level >= 0 && size_t(level) < m_clustersets.size()); + auto& entry = m_entries[idx]; + // Search the entry's locators from top to bottom. + for (int l = level; l >= 0; --l) { + // If the locator is missing, dig deeper; it may exist at a lower level and therefore be + // implicitly existing at this level too. + if (entry.m_locator[l].IsMissing()) continue; + // If the locator has the entry marked as explicitly removed, stop. + if (entry.m_locator[l].IsRemoved()) break; + // Otherwise, we have found the topmost ClusterSet that contains this entry. + return entry.m_locator[l].cluster; + } + // If no non-empty locator was found, or an explicitly removed was hit, return nothing. + return nullptr; +} + +Cluster* TxGraphImpl::PullIn(Cluster* cluster, int to_level) noexcept +{ + if (to_level == 0) return cluster; + int level = cluster->m_level; + Assume(level <= to_level); + // Copy the Cluster from the level it was found at to higher levels, if any. + while (level < to_level) { + // Make the Cluster Acceptable before copying. This isn't strictly necessary, but doing it + // now avoids doing double work later. + MakeAcceptable(*cluster); + ++level; + auto new_cluster = cluster->CopyTo(*this, level); + cluster = new_cluster; + } + return cluster; +} + +void TxGraphImpl::ApplyRemovals(int up_to_level) noexcept +{ + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); + for (int level = 0; level <= up_to_level; ++level) { + auto& clusterset = m_clustersets[level]; + auto& to_remove = clusterset.m_to_remove; + // Skip if there is nothing to remove in this level. + if (to_remove.empty()) continue; + // Pull in all Clusters that are not in the ClusterSet at level level. + for (GraphIndex index : to_remove) { + auto cluster = FindCluster(index, level); + if (cluster != nullptr) PullIn(cluster, level); + } + // Group the set of to-be-removed entries by Cluster*. + std::sort(to_remove.begin(), to_remove.end(), [&](GraphIndex a, GraphIndex b) noexcept { + return std::less{}(m_entries[a].m_locator[level].cluster, m_entries[b].m_locator[level].cluster); + }); + // Process per Cluster. + std::span to_remove_span{to_remove}; + while (!to_remove_span.empty()) { + Cluster* cluster = m_entries[to_remove_span.front()].m_locator[level].cluster; + if (cluster != nullptr) { + // If the first to_remove_span entry's Cluster exists, hand to_remove_span to it, so it + // can pop off whatever applies to it. + cluster->ApplyRemovals(*this, to_remove_span); + } else { + // Otherwise, skip this already-removed entry. This may happen when + // RemoveTransaction was called twice on the same Ref, for example. + to_remove_span = to_remove_span.subspan(1); + } + } + to_remove.clear(); + } + Compact(); +} + +void TxGraphImpl::SwapIndexes(GraphIndex a, GraphIndex b) noexcept +{ + Assume(a < m_entries.size()); + Assume(b < m_entries.size()); + // Swap the Entry objects. + std::swap(m_entries[a], m_entries[b]); + // Iterate over both objects. + for (int i = 0; i < 2; ++i) { + GraphIndex idx = i ? b : a; + Entry& entry = m_entries[idx]; + // Update linked Ref, if any exists. + if (entry.m_ref) GetRefIndex(*entry.m_ref) = idx; + // Update linked chunk index entries, if any exist. + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + entry.m_chunkindex_iterator->m_graph_index = idx; + } + // Update the locators for both levels. The rest of the Entry information will not change, + // so no need to invoke Cluster::Updated(). + for (int level = 0; level < MAX_LEVELS; ++level) { + Locator& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->UpdateMapping(locator.index, idx); + } + } + } +} + +void TxGraphImpl::Compact() noexcept +{ + // We cannot compact while any to-be-applied operations or staged removals remain as we'd need + // to rewrite them. It is easier to delay the compaction until they have been applied. + for (auto& clusterset : m_clustersets) { + if (!clusterset.m_deps_to_add.empty()) return; + if (!clusterset.m_to_remove.empty()) return; + if (!clusterset.m_removed.empty()) return; + } + + // Release memory used by discarded ChunkData index entries. + ClearShrink(m_chunkindex_discarded); + + // Sort the GraphIndexes that need to be cleaned up. They are sorted in reverse, so the last + // ones get processed first. This means earlier-processed GraphIndexes will not cause moving of + // later-processed ones during the "swap with end of m_entries" step below (which might + // invalidate them). + std::sort(m_unlinked.begin(), m_unlinked.end(), std::greater{}); + + auto last = GraphIndex(-1); + for (GraphIndex idx : m_unlinked) { + // m_unlinked should never contain the same GraphIndex twice (the code below would fail + // if so, because GraphIndexes get invalidated by removing them). + Assume(idx != last); + last = idx; + + // Make sure the entry is unlinked. + Entry& entry = m_entries[idx]; + Assume(entry.m_ref == nullptr); + // Make sure the entry does not occur in the graph. + for (int level = 0; level < MAX_LEVELS; ++level) { + Assume(!entry.m_locator[level].IsPresent()); + } + + // Move the entry to the end. + if (idx != m_entries.size() - 1) SwapIndexes(idx, m_entries.size() - 1); + // Drop the entry for idx, now that it is at the end. + m_entries.pop_back(); + } + m_unlinked.clear(); +} + +void TxGraphImpl::Split(Cluster& cluster) noexcept +{ + // To split a Cluster, first make sure all removals are applied (as we might need to split + // again afterwards otherwise). + ApplyRemovals(cluster.m_level); + bool del = cluster.Split(*this); + if (del) { + // Cluster::Split reports whether the Cluster is to be deleted. + DeleteCluster(cluster); + } +} + +void TxGraphImpl::SplitAll(int up_to_level) noexcept +{ + Assume(up_to_level >= 0 && size_t(up_to_level) < m_clustersets.size()); + // Before splitting all Cluster, first make sure all removals are applied. + ApplyRemovals(up_to_level); + for (int level = 0; level <= up_to_level; ++level) { + for (auto quality : {QualityLevel::NEEDS_SPLIT, QualityLevel::NEEDS_SPLIT_ACCEPTABLE}) { + auto& queue = m_clustersets[level].m_clusters[int(quality)]; + while (!queue.empty()) { + Split(*queue.back().get()); + } + } + } +} + +void TxGraphImpl::GroupClusters(int level) noexcept +{ + auto& clusterset = m_clustersets[level]; + // If the groupings have been computed already, nothing is left to be done. + if (clusterset.m_group_data.has_value()) return; + + // Before computing which Clusters need to be merged together, first apply all removals and + // split the Clusters into connected components. If we would group first, we might end up + // with inefficient and/or oversized Clusters which just end up being split again anyway. + SplitAll(level); + + /** Annotated clusters: an entry for each Cluster, together with the representative for the + * partition it is in if known, or with nullptr if not yet known. */ + std::vector> an_clusters; + /** Annotated dependencies: an entry for each m_deps_to_add entry (excluding ones that apply + * to removed transactions), together with the representative root of the partition of + * Clusters it applies to. */ + std::vector, Cluster*>> an_deps; + + // Construct an an_clusters entry for every oversized cluster, including ones from levels below, + // as they may be inherited in this one. + for (int level_iter = 0; level_iter <= level; ++level_iter) { + for (auto& cluster : m_clustersets[level_iter].m_clusters[int(QualityLevel::OVERSIZED)]) { + auto graph_idx = cluster->GetClusterEntry(0); + auto cur_cluster = FindCluster(graph_idx, level); + if (cur_cluster == nullptr) continue; + an_clusters.emplace_back(cur_cluster, nullptr); + } + } + + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies. + for (const auto& [par, chl] : clusterset.m_deps_to_add) { + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); + // Skip dependencies for which the parent or child transaction is removed. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + an_clusters.emplace_back(par_cluster, nullptr); + // Do not include a duplicate when parent and child are identical, as it'll be removed + // below anyway. + if (chl_cluster != par_cluster) an_clusters.emplace_back(chl_cluster, nullptr); + } + // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters + // to which dependencies apply, or which are oversized. + std::sort(an_clusters.begin(), an_clusters.end()); + an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); + + // Sort the dependencies by child Cluster. + std::sort(clusterset.m_deps_to_add.begin(), clusterset.m_deps_to_add.end(), [&](auto& a, auto& b) noexcept { + auto [_a_par, a_chl] = a; + auto [_b_par, b_chl] = b; + auto a_chl_cluster = FindCluster(a_chl, level); + auto b_chl_cluster = FindCluster(b_chl, level); + return std::less{}(a_chl_cluster, b_chl_cluster); + }); + + // Run the union-find algorithm to to find partitions of the input Clusters which need to be + // grouped together. See https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + { + /** Each PartitionData entry contains information about a single input Cluster. */ + struct PartitionData + { + /** The cluster this holds information for. */ + Cluster* cluster; + /** All PartitionData entries belonging to the same partition are organized in a tree. + * Each element points to its parent, or to itself if it is the root. The root is then + * a representative for the entire tree, and can be found by walking upwards from any + * element. */ + PartitionData* parent; + /** (only if this is a root, so when parent == this) An upper bound on the height of + * tree for this partition. */ + unsigned rank; + }; + /** Information about each input Cluster. Sorted by Cluster* pointer. */ + std::vector partition_data; + + /** Given a Cluster, find its corresponding PartitionData. */ + auto locate_fn = [&](Cluster* arg) noexcept -> PartitionData* { + auto it = std::lower_bound(partition_data.begin(), partition_data.end(), arg, + [](auto& a, Cluster* ptr) noexcept { return a.cluster < ptr; }); + Assume(it != partition_data.end()); + Assume(it->cluster == arg); + return &*it; + }; + + /** Given a PartitionData, find the root of the tree it is in (its representative). */ + static constexpr auto find_root_fn = [](PartitionData* data) noexcept -> PartitionData* { + while (data->parent != data) { + // Replace pointers to parents with pointers to grandparents. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = data->parent; + data->parent = par->parent; + data = par; + } + return data; + }; + + /** Given two PartitionDatas, union the partitions they are in, and return their + * representative. */ + static constexpr auto union_fn = [](PartitionData* arg1, PartitionData* arg2) noexcept { + // Find the roots of the trees, and bail out if they are already equal (which would + // mean they are in the same partition already). + auto rep1 = find_root_fn(arg1); + auto rep2 = find_root_fn(arg2); + if (rep1 == rep2) return rep1; + // Pick the lower-rank root to become a child of the higher-rank one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_rank. + if (rep1->rank < rep2->rank) std::swap(rep1, rep2); + rep2->parent = rep1; + rep1->rank += (rep1->rank == rep2->rank); + return rep1; + }; + + // Start by initializing every Cluster as its own singleton partition. + partition_data.resize(an_clusters.size()); + for (size_t i = 0; i < an_clusters.size(); ++i) { + partition_data[i].cluster = an_clusters[i].first; + partition_data[i].parent = &partition_data[i]; + partition_data[i].rank = 0; + } + + // Run through all parent/child pairs in m_deps_to_add, and union the + // the partitions their Clusters are in. + Cluster* last_chl_cluster{nullptr}; + PartitionData* last_partition{nullptr}; + for (const auto& [par, chl] : clusterset.m_deps_to_add) { + auto par_cluster = FindCluster(par, level); + auto chl_cluster = FindCluster(chl, level); + // Nothing to do if parent and child are in the same Cluster. + if (par_cluster == chl_cluster) continue; + // Nothing to do if either parent or child transaction is removed already. + if (par_cluster == nullptr || chl_cluster == nullptr) continue; + Assume(par != chl); + if (chl_cluster == last_chl_cluster) { + // If the child Clusters is the same as the previous iteration, union with the + // tree they were in, avoiding the need for another lookup. Note that m_deps_to_add + // is sorted by child Cluster, so batches with the same child are expected. + last_partition = union_fn(locate_fn(par_cluster), last_partition); + } else { + last_chl_cluster = chl_cluster; + last_partition = union_fn(locate_fn(par_cluster), locate_fn(chl_cluster)); + } + } + + // Populate the an_clusters and an_deps data structures with the list of input Clusters, + // and the input dependencies, annotated with the representative of the Cluster partition + // it applies to. + an_deps.reserve(clusterset.m_deps_to_add.size()); + auto deps_it = clusterset.m_deps_to_add.begin(); + for (size_t i = 0; i < partition_data.size(); ++i) { + auto& data = partition_data[i]; + // Find the representative of the partition Cluster i is in, and store it with the + // Cluster. + auto rep = find_root_fn(&data)->cluster; + Assume(an_clusters[i].second == nullptr); + an_clusters[i].second = rep; + // Find all dependencies whose child Cluster is Cluster i, and annotate them with rep. + while (deps_it != clusterset.m_deps_to_add.end()) { + auto [par, chl] = *deps_it; + auto chl_cluster = FindCluster(chl, level); + if (std::greater{}(chl_cluster, data.cluster)) break; + // Skip dependencies that apply to earlier Clusters (those necessary are for + // deleted transactions, as otherwise we'd have processed them already). + if (chl_cluster == data.cluster) { + auto par_cluster = FindCluster(par, level); + // Also filter out dependencies applying to a removed parent. + if (par_cluster != nullptr) an_deps.emplace_back(*deps_it, rep); + } + ++deps_it; + } + } + } + + // Sort both an_clusters and an_deps by representative of the partition they are in, grouping + // all those applying to the same partition together. + std::sort(an_deps.begin(), an_deps.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + + // Translate the resulting cluster groups to the m_group_data structure, and the dependencies + // back to m_deps_to_add. + clusterset.m_group_data = GroupData{}; + clusterset.m_group_data->m_group_clusters.reserve(an_clusters.size()); + clusterset.m_group_data->m_group_oversized = false; + clusterset.m_deps_to_add.clear(); + clusterset.m_deps_to_add.reserve(an_deps.size()); + auto an_deps_it = an_deps.begin(); + auto an_clusters_it = an_clusters.begin(); + while (an_clusters_it != an_clusters.end()) { + // Process all clusters/dependencies belonging to the partition with representative rep. + auto rep = an_clusters_it->second; + // Create and initialize a new GroupData entry for the partition. + auto& new_entry = clusterset.m_group_data->m_groups.emplace_back(); + new_entry.m_cluster_offset = clusterset.m_group_data->m_group_clusters.size(); + new_entry.m_cluster_count = 0; + new_entry.m_deps_offset = clusterset.m_deps_to_add.size(); + new_entry.m_deps_count = 0; + uint32_t total_count{0}; + uint64_t total_size{0}; + // Add all its clusters to it (copying those from an_clusters to m_group_clusters). + while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { + clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first); + total_count += an_clusters_it->first->GetTxCount(); + total_size += an_clusters_it->first->GetTxSize(); + ++an_clusters_it; + ++new_entry.m_cluster_count; + } + // Add all its dependencies to it (copying those back from an_deps to m_deps_to_add). + while (an_deps_it != an_deps.end() && an_deps_it->second == rep) { + clusterset.m_deps_to_add.push_back(an_deps_it->first); + ++an_deps_it; + ++new_entry.m_deps_count; + } + // Detect oversizedness. + if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) { + clusterset.m_group_data->m_group_oversized = true; + } + } + Assume(an_deps_it == an_deps.end()); + Assume(an_clusters_it == an_clusters.end()); + clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; + Compact(); +} + +void TxGraphImpl::Merge(std::span to_merge) noexcept +{ + Assume(!to_merge.empty()); + // Nothing to do if a group consists of just a single Cluster. + if (to_merge.size() == 1) return; + + // Move the largest Cluster to the front of to_merge. As all transactions in other to-be-merged + // Clusters will be moved to that one, putting the largest one first minimizes the number of + // moves. + size_t max_size_pos{0}; + DepGraphIndex max_size = to_merge[max_size_pos]->GetTxCount(); + for (size_t i = 1; i < to_merge.size(); ++i) { + DepGraphIndex size = to_merge[i]->GetTxCount(); + if (size > max_size) { + max_size_pos = i; + max_size = size; + } + } + if (max_size_pos != 0) std::swap(to_merge[0], to_merge[max_size_pos]); + + // Merge all further Clusters in the group into the first one, and delete them. + for (size_t i = 1; i < to_merge.size(); ++i) { + to_merge[0]->Merge(*this, *to_merge[i]); + DeleteCluster(*to_merge[i]); + } +} + +void TxGraphImpl::ApplyDependencies(int level) noexcept +{ + auto& clusterset = m_clustersets[level]; + // Do not bother computing groups if we already know the result will be oversized. + if (clusterset.m_oversized == true) return; + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + GroupClusters(level); + Assume(clusterset.m_group_data.has_value()); + // Nothing to do if there are no dependencies to be added. + if (clusterset.m_deps_to_add.empty()) return; + // Dependencies cannot be applied if it would result in oversized clusters. + if (clusterset.m_oversized == true) return; + + // For each group of to-be-merged Clusters. + for (const auto& group_data : clusterset.m_group_data->m_groups) { + auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + // Pull in all the Clusters that contain dependencies. + for (Cluster*& cluster : cluster_span) { + cluster = PullIn(cluster, level); + } + // Invoke Merge() to merge them into a single Cluster. + Merge(cluster_span); + // Actually apply all to-be-added dependencies (all parents and children from this grouping + // belong to the same Cluster at this point because of the merging above). + auto deps_span = std::span{clusterset.m_deps_to_add} + .subspan(group_data.m_deps_offset, group_data.m_deps_count); + Assume(!deps_span.empty()); + const auto& loc = m_entries[deps_span[0].second].m_locator[level]; + Assume(loc.IsPresent()); + loc.cluster->ApplyDependencies(*this, deps_span); + } + + // Wipe the list of to-be-added dependencies now that they are applied. + clusterset.m_deps_to_add.clear(); + Compact(); + // Also no further Cluster mergings are needed (note that we clear, but don't set to + // std::nullopt, as that would imply the groupings are unknown). + clusterset.m_group_data = GroupData{}; +} + +void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept +{ + // We can only relinearize Clusters that do not need splitting. + Assume(!NeedsSplitting()); + // No work is required for Clusters which are already optimally linearized. + if (IsOptimal()) return; + // Invoke the actual linearization algorithm (passing in the existing one). + uint64_t rng_seed = graph.m_rng.rand64(); + auto [linearization, optimal] = Linearize(m_depgraph, max_iters, rng_seed, m_linearization); + // Postlinearize if the result isn't optimal already. This guarantees (among other things) + // that the chunks of the resulting linearization are all connected. + if (!optimal) PostLinearize(m_depgraph, linearization); + // Update the linearization. + m_linearization = std::move(linearization); + // Update the Cluster's quality. + auto new_quality = optimal ? QualityLevel::OPTIMAL : QualityLevel::ACCEPTABLE; + graph.SetClusterQuality(m_level, m_quality, m_setindex, new_quality); + // Update the Entry objects. + Updated(graph); +} + +void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept +{ + // Relinearize the Cluster if needed. + if (!cluster.NeedsSplitting() && !cluster.IsAcceptable() && !cluster.IsOversized()) { + cluster.Relinearize(*this, 10000); + } +} + +void TxGraphImpl::MakeAllAcceptable(int level) noexcept +{ + ApplyDependencies(level); + if (m_clustersets[level].m_oversized == true) return; + auto& queue = m_clustersets[level].m_clusters[int(QualityLevel::NEEDS_RELINEARIZE)]; + while (!queue.empty()) { + MakeAcceptable(*queue.back().get()); + } +} + +Cluster::Cluster(TxGraphImpl& graph, const FeePerWeight& feerate, GraphIndex graph_index) noexcept +{ + // Create a new transaction in the DepGraph, and remember its position in m_mapping. + auto cluster_idx = m_depgraph.AddTransaction(feerate); + m_mapping.push_back(graph_index); + m_linearization.push_back(cluster_idx); +} + +TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept +{ + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); + Assume(feerate.size > 0); + // Construct a new Ref. + Ref ret; + // Construct a new Entry, and link it with the Ref. + auto idx = m_entries.size(); + m_entries.emplace_back(); + auto& entry = m_entries.back(); + entry.m_chunkindex_iterator = m_chunkindex.end(); + entry.m_ref = &ret; + GetRefGraph(ret) = this; + GetRefIndex(ret) = idx; + // Construct a new singleton Cluster (which is necessarily optimally linearized). + bool oversized = uint64_t(feerate.size) > m_max_cluster_size; + auto cluster = std::make_unique(*this, feerate, idx); + auto cluster_ptr = cluster.get(); + int level = m_clustersets.size() - 1; + InsertCluster(level, std::move(cluster), oversized ? QualityLevel::OVERSIZED : QualityLevel::OPTIMAL); + cluster_ptr->Updated(*this); + ++m_clustersets[level].m_txcount; + // Deal with individually oversized transactions. + if (oversized) { + ++m_clustersets[level].m_txcount_oversized; + m_clustersets[level].m_oversized = true; + m_clustersets[level].m_group_data = std::nullopt; + } + // Return the Ref. + return ret; +} + +void TxGraphImpl::RemoveTransaction(const Ref& arg) noexcept +{ + // Don't do anything if the Ref is empty (which may be indicative of the transaction already + // having been removed). + if (GetRefGraph(arg) == nullptr) return; + Assume(GetRefGraph(arg) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); + // Find the Cluster the transaction is in, and stop if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), m_clustersets.size() - 1); + if (cluster == nullptr) return; + // Remember that the transaction is to be removed. + auto& clusterset = m_clustersets.back(); + clusterset.m_to_remove.push_back(GetRefIndex(arg)); + // Wipe m_group_data (as it will need to be recomputed). + clusterset.m_group_data.reset(); + if (clusterset.m_oversized == true) clusterset.m_oversized = std::nullopt; +} + +void TxGraphImpl::AddDependency(const Ref& parent, const Ref& child) noexcept +{ + // Don't do anything if either Ref is empty (which may be indicative of it having already been + // removed). + if (GetRefGraph(parent) == nullptr || GetRefGraph(child) == nullptr) return; + Assume(GetRefGraph(parent) == this && GetRefGraph(child) == this); + Assume(m_chunkindex_observers == 0 || m_clustersets.size() > 1); + // Don't do anything if this is a dependency on self. + if (GetRefIndex(parent) == GetRefIndex(child)) return; + // Find the Cluster the parent and child transaction are in, and stop if either appears to be + // already removed. + auto par_cluster = FindCluster(GetRefIndex(parent), m_clustersets.size() - 1); + if (par_cluster == nullptr) return; + auto chl_cluster = FindCluster(GetRefIndex(child), m_clustersets.size() - 1); + if (chl_cluster == nullptr) return; + // Remember that this dependency is to be applied. + auto& clusterset = m_clustersets.back(); + clusterset.m_deps_to_add.emplace_back(GetRefIndex(parent), GetRefIndex(child)); + // Wipe m_group_data (as it will need to be recomputed). + clusterset.m_group_data.reset(); + if (clusterset.m_oversized == false) clusterset.m_oversized = std::nullopt; +} + +bool TxGraphImpl::Exists(const Ref& arg, bool main_only) noexcept +{ + if (GetRefGraph(arg) == nullptr) return false; + Assume(GetRefGraph(arg) == this); + size_t level = main_only ? 0 : m_clustersets.size() - 1; + // Make sure the transaction isn't scheduled for removal. + ApplyRemovals(level); + auto cluster = FindCluster(GetRefIndex(arg), level); + return cluster != nullptr; +} + +void Cluster::GetAncestorRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept +{ + /** The union of all ancestors to be returned. */ + SetType ancestors_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + ancestors_union |= m_depgraph.Ancestors(args.front().second); + args = args.subspan(1); + } + Assume(ancestors_union.Any()); + // Translate all ancestors (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : ancestors_union) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); + output.push_back(entry.m_ref); + } +} + +void Cluster::GetDescendantRefs(const TxGraphImpl& graph, std::span>& args, std::vector& output) noexcept +{ + /** The union of all descendants to be returned. */ + SetType descendants_union; + // Process elements from the front of args, as long as they apply. + while (!args.empty()) { + if (args.front().first != this) break; + descendants_union |= m_depgraph.Descendants(args.front().second); + args = args.subspan(1); + } + Assume(descendants_union.Any()); + // Translate all descendants (in arbitrary order) to Refs (if they have any), and return them. + for (auto idx : descendants_union) { + const auto& entry = graph.m_entries[m_mapping[idx]]; + Assume(entry.m_ref != nullptr); + output.push_back(entry.m_ref); + } +} + +bool Cluster::GetClusterRefs(TxGraphImpl& graph, std::span range, LinearizationIndex start_pos) noexcept +{ + // Translate the transactions in the Cluster (in linearization order, starting at start_pos in + // the linearization) to Refs, and fill them in range. + for (auto& ref : range) { + const auto& entry = graph.m_entries[m_mapping[m_linearization[start_pos++]]]; + Assume(entry.m_ref != nullptr); + ref = entry.m_ref; + } + // Return whether this was the end of the Cluster. + return start_pos == m_linearization.size(); +} + +FeePerWeight Cluster::GetIndividualFeerate(DepGraphIndex idx) noexcept +{ + return FeePerWeight::FromFeeFrac(m_depgraph.FeeRate(idx)); +} + +void Cluster::MakeTransactionsMissing(TxGraphImpl& graph) noexcept +{ + // Mark all transactions of a Cluster missing, needed when aborting staging, so that the + // corresponding Locators don't retain references into aborted Clusters. + for (auto ci : m_linearization) { + GraphIndex idx = m_mapping[ci]; + auto& entry = graph.m_entries[idx]; + entry.m_locator[m_level].SetMissing(); + if (m_level == 0) graph.ClearChunkData(entry); + } +} + +std::vector TxGraphImpl::GetAncestors(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetAncestorRefs(*this, matches, ret); + return ret; +} + +std::vector TxGraphImpl::GetDescendants(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + std::pair match = {cluster, m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index}; + auto matches = std::span(&match, 1); + std::vector ret; + cluster->GetDescendantRefs(*this, matches, ret); + return ret; +} + +std::vector TxGraphImpl::GetAncestorsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetAncestorRefs(*this, match_span, ret); + } + return ret; +} + +std::vector TxGraphImpl::GetDescendantsUnion(std::span args, bool main_only) noexcept +{ + // Apply all dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Ancestry cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + + // Translate args to matches. + std::vector> matches; + matches.reserve(args.size()); + for (auto arg : args) { + // Skip empty Refs. + if (GetRefGraph(*arg) == nullptr) continue; + Assume(GetRefGraph(*arg) == this); + // Find the Cluster the argument is in, and skip if none is found. + auto cluster = FindCluster(GetRefIndex(*arg), level); + if (cluster == nullptr) continue; + // Append to matches. + matches.emplace_back(cluster, m_entries[GetRefIndex(*arg)].m_locator[cluster->m_level].index); + } + // Group by Cluster. + std::sort(matches.begin(), matches.end(), [](auto& a, auto& b) noexcept { return std::less{}(a.first, b.first); }); + // Dispatch to the Clusters. + std::span match_span(matches); + std::vector ret; + while (!match_span.empty()) { + match_span.front().first->GetDescendantRefs(*this, match_span, ret); + } + return ret; +} + +std::vector TxGraphImpl::GetCluster(const Ref& arg, bool main_only) noexcept +{ + // Return the empty vector if the Ref is empty (which may be indicative of the transaction + // having been removed already. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be incorrect otherwise. + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + // Cluster linearization cannot be known if unapplied dependencies remain. + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Find the Cluster the argument is in, and return the empty vector if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), level); + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then dispatch to it. + MakeAcceptable(*cluster); + std::vector ret(cluster->GetTxCount()); + cluster->GetClusterRefs(*this, ret, 0); + return ret; +} + +TxGraph::GraphIndex TxGraphImpl::GetTransactionCount(bool main_only) noexcept +{ + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyRemovals(level); + return m_clustersets[level].m_txcount; +} + +FeePerWeight TxGraphImpl::GetIndividualFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Find the cluster the argument is in (the level does not matter as individual feerates will + // be identical if it occurs in both), and return the empty FeePerWeight if it isn't in any. + Cluster* cluster{nullptr}; + for (int level = 0; size_t(level) < m_clustersets.size(); ++level) { + // Apply removals, so that we can correctly report FeePerWeight{} for non-existing + // transactions. + ApplyRemovals(level); + if (m_entries[GetRefIndex(arg)].m_locator[level].IsPresent()) { + cluster = m_entries[GetRefIndex(arg)].m_locator[level].cluster; + break; + } + } + if (cluster == nullptr) return {}; + // Dispatch to the Cluster. + return cluster->GetIndividualFeerate(m_entries[GetRefIndex(arg)].m_locator[cluster->m_level].index); +} + +FeePerWeight TxGraphImpl::GetMainChunkFeerate(const Ref& arg) noexcept +{ + // Return the empty FeePerWeight if the passed Ref is empty. + if (GetRefGraph(arg) == nullptr) return {}; + Assume(GetRefGraph(arg) == this); + // Apply all removals and dependencies, as the result might be inaccurate otherwise. + ApplyDependencies(/*level=*/0); + // Chunk feerates cannot be accurately known if unapplied dependencies remain. + Assume(m_clustersets[0].m_deps_to_add.empty()); + // Find the cluster the argument is in, and return the empty FeePerWeight if it isn't in any. + auto cluster = FindCluster(GetRefIndex(arg), 0); + if (cluster == nullptr) return {}; + // Make sure the Cluster has an acceptable quality level, and then return the transaction's + // chunk feerate. + MakeAcceptable(*cluster); + const auto& entry = m_entries[GetRefIndex(arg)]; + return entry.m_main_chunk_feerate; +} + +bool TxGraphImpl::IsOversized(bool main_only) noexcept +{ + size_t level = main_only ? 0 : m_clustersets.size() - 1; + auto& clusterset = m_clustersets[level]; + if (clusterset.m_oversized.has_value()) { + // Return cached value if known. + return *clusterset.m_oversized; + } + ApplyRemovals(level); + if (clusterset.m_txcount_oversized > 0) { + clusterset.m_oversized = true; + } else { + // Find which Clusters will need to be merged together, as that is where the oversize + // property is assessed. + GroupClusters(level); + } + Assume(clusterset.m_oversized.has_value()); + return *clusterset.m_oversized; +} + +void TxGraphImpl::StartStaging() noexcept +{ + Assume(m_clustersets.size() < MAX_LEVELS); + // Apply all remaining dependencies in main before creating a staging graph. Once staging + // exists, we cannot merge Clusters anymore (because of interference with Clusters being + // pulled into staging), so to make sure all inspectors are available (if not oversized), do + // all merging work now. Call SplitAll() first, so that even if ApplyDependencies does not do + // any thing due to knowing the result is oversized, splitting is still performed. + SplitAll(m_clustersets.size() - 1); + ApplyDependencies(m_clustersets.size() - 1); + // Construct a new graph. + m_clustersets.emplace_back(); + // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to + // the new graph. To-be-applied removals will always be empty at this point. + auto& stage = m_clustersets.back(); + auto& main = *(m_clustersets.rbegin() + 1); + stage.m_txcount = main.m_txcount; + stage.m_txcount_oversized = main.m_txcount_oversized; + stage.m_deps_to_add = main.m_deps_to_add; + stage.m_group_data = main.m_group_data; + stage.m_oversized = main.m_oversized; + Assume(stage.m_oversized.has_value()); +} + +void TxGraphImpl::AbortStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + auto& stage = m_clustersets[stage_level]; + // Mark all removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created). + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Do the same with the non-removed transactions in staging Clusters. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (auto& cluster : stage.m_clusters[quality]) { + cluster->MakeTransactionsMissing(*this); + } + } + // Destroy the staging graph data. + m_clustersets.pop_back(); + Compact(); + auto& clusterset = m_clustersets.back(); + if (!clusterset.m_group_data.has_value()) { + // In case m_oversized in main was kept after a Ref destruction while staging exists, we + // need to re-evaluate m_oversized now. + if (clusterset.m_to_remove.empty() && clusterset.m_txcount_oversized > 0) { + // It is possible that a Ref destruction caused a removal in main while staging existed. + // In this case, m_txcount_oversized may be inaccurate. + m_clustersets.back().m_oversized = true; + } else { + m_clustersets.back().m_oversized = std::nullopt; + } + } +} + +void TxGraphImpl::CommitStaging() noexcept +{ + Assume(m_clustersets.size() > 1); + int stage_level = m_clustersets.size() - 1; + int main_level = stage_level - 1; + auto& stage = m_clustersets[stage_level]; + auto& main = m_clustersets[main_level]; + Assume(m_chunkindex_observers == 0 || main_level > 0); + // Delete all conflicting Clusters in main_level, to make place for moving the staging ones + // there. All of these have been PullIn()'d to stage_level before. + auto conflicts = GetConflicts(); + for (Cluster* conflict : conflicts) { + conflict->Clear(*this); + DeleteCluster(*conflict); + } + // Mark the removed transactions as Missing (so the stage_level locator for these transactions + // can be reused if another staging is created0. + for (auto idx : stage.m_removed) { + m_entries[idx].m_locator[stage_level].SetMissing(); + } + // Then move all Clusters in staging to main. + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + auto& stage_sets = stage.m_clusters[quality]; + while (!stage_sets.empty()) { + stage_sets.back()->LevelDown(*this); + } + } + // Move all statistics, precomputed data, and to-be-applied removals and dependencies. + main.m_deps_to_add = std::move(stage.m_deps_to_add); + main.m_to_remove = std::move(stage.m_to_remove); + main.m_group_data = std::move(stage.m_group_data); + main.m_oversized = std::move(stage.m_oversized); + main.m_txcount = std::move(stage.m_txcount); + main.m_txcount_oversized = std::move(stage.m_txcount_oversized); + // Delete the old staging graph, after all its information was moved to main. + m_clustersets.pop_back(); + Compact(); +} + +void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcept +{ + // Make sure the specified DepGraphIndex exists in this Cluster. + Assume(m_depgraph.Positions()[idx]); + // Bail out if the fee isn't actually being changed. + if (m_depgraph.FeeRate(idx).fee == fee) return; + // Update the fee, remember that relinearization will be necessary, and update the Entries + // in the same Cluster. + m_depgraph.FeeRate(idx).fee = fee; + if (m_quality == QualityLevel::OVERSIZED) { + // Nothing to do. + } else if (!NeedsSplitting()) { + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); + } else { + graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); + } + Updated(graph); +} + +void TxGraphImpl::SetTransactionFee(const Ref& ref, int64_t fee) noexcept +{ + // Don't do anything if the passed Ref is empty. + if (GetRefGraph(ref) == nullptr) return; + Assume(GetRefGraph(ref) == this); + Assume(m_chunkindex_observers == 0); + // Find the entry, its locator, and inform its Cluster about the new feerate, if any. + auto& entry = m_entries[GetRefIndex(ref)]; + for (int level = 0; level < MAX_LEVELS; ++level) { + auto& locator = entry.m_locator[level]; + if (locator.IsPresent()) { + locator.cluster->SetFee(*this, locator.index, fee); + } + } +} + +std::strong_ordering TxGraphImpl::CompareMainOrder(const Ref& a, const Ref& b) noexcept +{ + // The references must not be empty. + Assume(GetRefGraph(a) == this); + Assume(GetRefGraph(b) == this); + // Apply dependencies in main. + ApplyDependencies(0); + Assume(m_clustersets[0].m_deps_to_add.empty()); + // Make both involved Clusters acceptable, so chunk feerates are relevant. + const auto& entry_a = m_entries[GetRefIndex(a)]; + const auto& entry_b = m_entries[GetRefIndex(b)]; + const auto& locator_a = entry_a.m_locator[0]; + const auto& locator_b = entry_b.m_locator[0]; + Assume(locator_a.IsPresent()); + Assume(locator_b.IsPresent()); + MakeAcceptable(*locator_a.cluster); + MakeAcceptable(*locator_b.cluster); + // Compare chunk feerates, and return result if it differs. + auto feerate_cmp = FeeRateCompare(entry_b.m_main_chunk_feerate, entry_a.m_main_chunk_feerate); + if (feerate_cmp < 0) return std::strong_ordering::less; + if (feerate_cmp > 0) return std::strong_ordering::greater; + // Compare Cluster* as tie-break for equal chunk feerates. + if (locator_a.cluster != locator_b.cluster) return locator_a.cluster <=> locator_b.cluster; + // As final tie-break, compare position within cluster linearization. + return entry_a.m_main_lin_index <=> entry_b.m_main_lin_index; +} + +TxGraph::GraphIndex TxGraphImpl::CountDistinctClusters(std::span refs, bool main_only) noexcept +{ + size_t level = main_only ? 0 : m_clustersets.size() - 1; + ApplyDependencies(level); + Assume(m_clustersets[level].m_deps_to_add.empty()); + // Build a vector of Clusters that the specified Refs occur in. + std::vector clusters; + clusters.reserve(refs.size()); + for (const Ref* ref : refs) { + if (ref == nullptr) continue; + if (GetRefGraph(*ref) == nullptr) continue; + Assume(GetRefGraph(*ref) == this); + auto cluster = FindCluster(GetRefIndex(*ref), level); + if (cluster != nullptr) clusters.push_back(cluster); + } + // Count the number of distinct elements in clusters. + std::sort(clusters.begin(), clusters.end()); + Cluster* last{nullptr}; + GraphIndex ret{0}; + for (Cluster* cluster : clusters) { + ret += (cluster != last); + last = cluster; + } + return ret; +} + +std::pair, std::vector> TxGraphImpl::GetMainStagingDiagrams() noexcept +{ + Assume(m_clustersets.size() >= 2); + MakeAllAcceptable(m_clustersets.size() - 2); + Assume(m_clustersets[m_clustersets.size() - 2].m_deps_to_add.empty()); + MakeAllAcceptable(m_clustersets.size() - 1); + Assume(m_clustersets[m_clustersets.size() - 1].m_deps_to_add.empty()); + // For all Clusters in main which conflict with Clusters in staging (i.e., all that are removed + // by, or replaced in, staging), gather their chunk feerates. + auto main_clusters = GetConflicts(); + std::vector main_feerates, staging_feerates; + for (Cluster* cluster : main_clusters) { + cluster->AppendChunkFeerates(main_feerates); + } + // Do the same for the Clusters in staging themselves. + const auto& staging = m_clustersets.back(); + for (int quality = 0; quality < int(QualityLevel::NONE); ++quality) { + for (const auto& cluster : staging.m_clusters[quality]) { + cluster->AppendChunkFeerates(staging_feerates); + } + } + // Sort both by decreasing feerate to obtain diagrams, and return them. + std::sort(main_feerates.begin(), main_feerates.end(), [](auto& a, auto& b) { return a >> b; }); + std::sort(staging_feerates.begin(), staging_feerates.end(), [](auto& a, auto& b) { return a >> b; }); + return std::make_pair(std::move(main_feerates), std::move(staging_feerates)); +} + +void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const +{ + // There must be an m_mapping for each m_depgraph position (including holes). + assert(m_depgraph.PositionRange() == m_mapping.size()); + // The linearization for this Cluster must contain every transaction once. + assert(m_depgraph.TxCount() == m_linearization.size()); + // The number of transactions in a Cluster cannot exceed m_max_cluster_count. + assert(m_linearization.size() <= graph.m_max_cluster_count); + // The level must match the level the Cluster occurs in. + assert(m_level == level); + // The sum of their sizes cannot exceed m_max_cluster_size, unless it is oversized. + assert(m_quality == QualityLevel::OVERSIZED || GetTxSize() <= graph.m_max_cluster_size); + // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. + + // OVERSIZED clusters are singletons. + assert(m_quality != QualityLevel::OVERSIZED || m_linearization.size() == 1); + + // Compute the chunking of m_linearization. + LinearizationChunking linchunking(m_depgraph, m_linearization); + + // Verify m_linearization. + SetType m_done; + LinearizationIndex linindex{0}; + DepGraphIndex chunk_pos{0}; //!< position within the current chunk + assert(m_depgraph.IsAcyclic()); + for (auto lin_pos : m_linearization) { + assert(lin_pos < m_mapping.size()); + const auto& entry = graph.m_entries[m_mapping[lin_pos]]; + // Check that the linearization is topological. + m_done.Set(lin_pos); + assert(m_done.IsSupersetOf(m_depgraph.Ancestors(lin_pos))); + // Check that the Entry has a locator pointing back to this Cluster & position within it. + assert(entry.m_locator[level].cluster == this); + assert(entry.m_locator[level].index == lin_pos); + // For top-level entries, check linearization position and chunk feerate. + if (level == 0 && IsAcceptable()) { + assert(entry.m_main_lin_index == linindex++); + if (!linchunking.GetChunk(0).transactions[lin_pos]) { + linchunking.MarkDone(linchunking.GetChunk(0).transactions); + chunk_pos = 0; + } + assert(entry.m_main_chunk_feerate == linchunking.GetChunk(0).feerate); + // Verify that an entry in the chunk index exists for every chunk-ending transaction. + ++chunk_pos; + bool is_chunk_end = (chunk_pos == linchunking.GetChunk(0).transactions.Count()); + assert((entry.m_chunkindex_iterator != graph.m_chunkindex.end()) == is_chunk_end); + // If this Cluster has an acceptable quality level, its chunks must be connected. + assert(m_depgraph.IsConnected(linchunking.GetChunk(0).transactions)); + } + } + // Verify that each element of m_depgraph occured in m_linearization. + assert(m_done == m_depgraph.Positions()); +} + +void TxGraphImpl::SanityCheck() const +{ + /** Which GraphIndexes ought to occur in m_unlinked, based on m_entries. */ + std::set expected_unlinked; + /** Which Clusters ought to occur in ClusterSet::m_clusters, based on m_entries. */ + std::set expected_clusters[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in ClusterSet::m_removed, based on m_entries. */ + std::set expected_removed[MAX_LEVELS]; + /** Which GraphIndexes ought to occur in m_chunkindex, based on m_entries. */ + std::set expected_chunkindex; + /** Whether compaction is possible in the current state. */ + bool compact_possible{true}; + + // Go over all Entry objects in m_entries. + for (GraphIndex idx = 0; idx < m_entries.size(); ++idx) { + const auto& entry = m_entries[idx]; + if (entry.m_ref == nullptr) { + // Unlinked Entry must have indexes appear in m_unlinked. + expected_unlinked.insert(idx); + } else { + // Every non-unlinked Entry must have a Ref that points back to it. + assert(GetRefGraph(*entry.m_ref) == this); + assert(GetRefIndex(*entry.m_ref) == idx); + } + if (entry.m_chunkindex_iterator != m_chunkindex.end()) { + // Remember which entries we see a chunkindex entry for. + assert(entry.m_locator[0].IsPresent()); + expected_chunkindex.insert(idx); + } + // Verify the Entry m_locators. + bool was_present{false}, was_removed{false}; + for (int level = 0; level < MAX_LEVELS; ++level) { + const auto& locator = entry.m_locator[level]; + // Every Locator must be in exactly one of these 3 states. + assert(locator.IsMissing() + locator.IsRemoved() + locator.IsPresent() == 1); + if (locator.IsPresent()) { + // Once removed, a transaction cannot be revived. + assert(!was_removed); + // Verify that the Cluster agrees with where the Locator claims the transaction is. + assert(locator.cluster->GetClusterEntry(locator.index) == idx); + // Remember that we expect said Cluster to appear in the ClusterSet::m_clusters. + expected_clusters[level].insert(locator.cluster); + was_present = true; + } else if (locator.IsRemoved()) { + // Level 0 (main) cannot have IsRemoved locators (IsMissing there means non-existing). + assert(level > 0); + // A Locator can only be IsRemoved if it was IsPresent before, and only once. + assert(was_present && !was_removed); + // Remember that we expect this GraphIndex to occur in the ClusterSet::m_removed. + expected_removed[level].insert(idx); + was_removed = true; + } + } + } + + // For all levels (0 = main, 1 = staged)... + for (size_t level = 0; level < m_clustersets.size(); ++level) { + assert(level < MAX_LEVELS); + auto& clusterset = m_clustersets[level]; + std::set actual_clusters; + + // For all quality levels... + for (int qual = 0; qual < int(QualityLevel::NONE); ++qual) { + QualityLevel quality{qual}; + const auto& quality_clusters = clusterset.m_clusters[qual]; + // ... for all clusters in them ... + for (ClusterSetIndex setindex = 0; setindex < quality_clusters.size(); ++setindex) { + const auto& cluster = *quality_clusters[setindex]; + // Remember we saw this Cluster (only if it is non-empty; empty Clusters aren't + // expected to be referenced by the Entry vector). + if (cluster.GetTxCount() != 0) { + actual_clusters.insert(&cluster); + } + // Sanity check the cluster, according to the Cluster's internal rules. + cluster.SanityCheck(*this, level); + // Check that the cluster's quality and setindex matches its position in the quality list. + assert(cluster.m_quality == quality); + assert(cluster.m_setindex == setindex); + } + } + + // Verify that all to-be-removed transactions have valid identifiers. + for (GraphIndex idx : clusterset.m_to_remove) { + assert(idx < m_entries.size()); + // We cannot assert that all m_to_remove transactions are still present: ~Ref on a + // (P,M) transaction (present in main, inherited in staging) will cause an m_to_remove + // addition in both main and staging, but a subsequence ApplyRemovals in main will + // cause it to disappear from staging too, leaving the m_to_remove in place. + } + + // Verify that all to-be-added dependencies have valid identifiers. + for (auto [par_idx, chl_idx] : clusterset.m_deps_to_add) { + assert(par_idx != chl_idx); + assert(par_idx < m_entries.size()); + assert(chl_idx < m_entries.size()); + } + + // Verify that the actually encountered clusters match the ones occurring in Entry vector. + assert(actual_clusters == expected_clusters[level]); + + // Verify that the contents of m_removed matches what was expected based on the Entry vector. + std::set actual_removed(clusterset.m_removed.begin(), clusterset.m_removed.end()); + for (auto i : expected_unlinked) { + // If a transaction exists in both main and staging, and is removed from staging (adding + // it to m_removed there), and consequently destroyed (wiping the locator completely), + // it can remain in m_removed despite not having an IsRemoved() locator. Exclude those + // transactions from the comparison here. + actual_removed.erase(i); + expected_removed[level].erase(i); + } + + assert(actual_removed == expected_removed[level]); + + // If any GraphIndex entries remain in this ClusterSet, compact is not possible. + if (!clusterset.m_deps_to_add.empty()) compact_possible = false; + if (!clusterset.m_to_remove.empty()) compact_possible = false; + if (!clusterset.m_removed.empty()) compact_possible = false; + + // If m_group_data exists, and no outstanding removals remain, m_group_oversized must match + // m_group_oversized || (m_txcount_oversized > 0). + if (clusterset.m_group_data.has_value() && clusterset.m_to_remove.empty()) { + assert(clusterset.m_oversized == + (clusterset.m_group_data->m_group_oversized || (clusterset.m_txcount_oversized > 0))); + } + + // For non-top levels, m_oversized must be known (as it cannot change until the level + // on top is gone). + if (level < m_clustersets.size() - 1) assert(clusterset.m_oversized.has_value()); + } + + // Verify that the contents of m_unlinked matches what was expected based on the Entry vector. + std::set actual_unlinked(m_unlinked.begin(), m_unlinked.end()); + assert(actual_unlinked == expected_unlinked); + + // If compaction was possible, it should have been performed already, and m_unlinked must be + // empty (to prevent memory leaks due to an ever-growing m_entries vector). + if (compact_possible) { + assert(actual_unlinked.empty()); + } + + // Finally, check the chunk index. + std::set actual_chunkindex; + FeePerWeight last_chunk_feerate; + for (const auto& chunk : m_chunkindex) { + GraphIndex idx = chunk.m_graph_index; + actual_chunkindex.insert(idx); + auto chunk_feerate = m_entries[idx].m_main_chunk_feerate; + if (!last_chunk_feerate.IsEmpty()) { + assert(FeeRateCompare(last_chunk_feerate, chunk_feerate) >= 0); + } + last_chunk_feerate = chunk_feerate; + } + assert(actual_chunkindex == expected_chunkindex); +} + +void TxGraphImpl::DoWork() noexcept +{ + for (int level = 0; level < int(m_clustersets.size()); ++level) { + if (level > 0 || m_chunkindex_observers == 0) { + MakeAllAcceptable(level); + } + } +} + +void BlockBuilderImpl::Next() noexcept +{ + while (m_next_iter != m_graph->m_chunkindex.end()) { + // Find the cluster pointed to by m_next_iter (and advance it). + const auto& chunk_data = *(m_next_iter++); + const auto& chunk_end_entry = m_graph->m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + // If we previously skipped a chunk from this cluster we cannot include more from it. + if (m_excluded_clusters.contains(cluster)) continue; + // Populate m_current_chunk. + if (chunk_data.m_chunk_count == LinearizationIndex(-1)) { + // Special case in case just a single transaction remains, avoiding the need to + // dispatch to and dereference Cluster. + m_chunkdata.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + m_chunkdata[0] = chunk_end_entry.m_ref; + m_remaining_cluster = std::nullopt; + } else { + m_chunkdata.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + bool is_end = cluster->GetClusterRefs(*m_graph, m_chunkdata, start_pos); + if (is_end) { + m_remaining_cluster = std::nullopt; + } else { + m_remaining_cluster = cluster; + } + } + m_current_chunk.emplace(m_chunkdata, chunk_end_entry.m_main_chunk_feerate); + return; + } + // We reached the end of m_chunkindex. + m_current_chunk = std::nullopt; +} + +BlockBuilderImpl::BlockBuilderImpl(TxGraphImpl& graph) noexcept : m_graph(&graph) +{ + // Make sure all clusters in main are up to date, and acceptable. + m_graph->MakeAllAcceptable(0); + // There cannot remain any inapplicable dependencies. + Assume(m_graph->m_clustersets[0].m_deps_to_add.empty()); + // Remember that this object is observing the graph's index, so that we can detect concurrent + // modifications. + ++m_graph->m_chunkindex_observers; + // Find the first chunk. + m_next_iter = m_graph->m_chunkindex.begin(); + Next(); +} + +BlockBuilderImpl::~BlockBuilderImpl() +{ + Assume(m_graph->m_chunkindex_observers > 0); + // Permit modifications to the main graph again after destroying the BlockBuilderImpl. + --m_graph->m_chunkindex_observers; +} + +void BlockBuilderImpl::Include() noexcept +{ + // The actual inclusion of the chunk is done by the calling code. All we have to do is switch + // to the next chunk. + Next(); +} + +void BlockBuilderImpl::Skip() noexcept +{ + // When skipping a chunk we need to not include anything more of the cluster, as that could make + // the result topologically invalid. + if (m_remaining_cluster.has_value()) { + m_excluded_clusters.insert(*m_remaining_cluster); + } + Next(); +} + +std::unique_ptr TxGraphImpl::GetBlockBuilder() noexcept +{ + return std::make_unique(*this); +} + +std::pair, FeePerWeight> TxGraphImpl::GetWorstMainChunk() noexcept +{ + std::pair, FeePerWeight> ret; + // Make sure all clusters in main are up to date, and acceptable. + MakeAllAcceptable(0); + Assume(m_clustersets[0].m_deps_to_add.empty()); + // If the graph is not empty, populate ret. + if (!m_chunkindex.empty()) { + const auto& chunk_data = *m_chunkindex.rbegin(); + const auto& chunk_end_entry = m_entries[chunk_data.m_graph_index]; + Cluster* cluster = chunk_end_entry.m_locator[0].cluster; + if (chunk_data.m_chunk_count == LinearizationIndex(-1) || chunk_data.m_chunk_count == 1) { + // Special case for singletons. + ret.first.resize(1); + Assume(chunk_end_entry.m_ref != nullptr); + ret.first[0] = chunk_end_entry.m_ref; + } else { + ret.first.resize(chunk_data.m_chunk_count); + auto start_pos = chunk_end_entry.m_main_lin_index + 1 - chunk_data.m_chunk_count; + cluster->GetClusterRefs(*this, ret.first, start_pos); + std::reverse(ret.first.begin(), ret.first.end()); + } + ret.second = chunk_end_entry.m_main_chunk_feerate; + } + return ret; +} + +std::vector TxGraphImpl::Trim() noexcept +{ + int level = m_clustersets.size() - 1; + Assume(m_chunkindex_observers == 0 || level > 0); + std::vector ret; + + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + auto& clusterset = m_clustersets[level]; + if (clusterset.m_oversized == false) return ret; + GroupClusters(level); + Assume(clusterset.m_group_data.has_value()); + // Nothing to do if not oversized. + if (!clusterset.m_group_data->m_group_oversized) return ret; + + // In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are + // trimmed by removing transactions in them such that the resulting clusters satisfy the size + // and count limits. + // + // It works by defining for each would-be cluster a rudimentary linearization: at every point + // the highest-chunk-feerate remaining transaction is picked among those with no unmet + // dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or + // an implicit dependency added between any two consecutive transaction in their current + // cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters, + // but respecting the dependencies being added. + // + // This rudimentary linearization is computed lazily, by putting all eligible (no unmet + // dependencies) transactions in a heap, and popping the highest-feerate one from it. Along the + // way, the counts and sizes of the would-be clusters up to that point are tracked (by + // partitioning the involved transactions using a union-find structure). Any transaction whose + // addition would cause a violation is removed, along with all their descendants. + // + // A next invocation of GroupClusters (after applying the removals) will compute the new + // resulting clusters, and none of them will violate the limits. + + /** All dependencies (both to be added ones, and implicit ones between consecutive transactions + * in existing cluster linearizations), sorted by parent. */ + std::vector> deps_by_parent; + /** Same, but sorted by child. */ + std::vector> deps_by_child; + /** Information about all transactions involved in a Cluster group to be trimmed, sorted by + * GraphIndex. */ + std::vector trim_data; + /** Iterators into trim_data, treated as a max heap according to cmp_fn below. */ + std::vector::iterator> trim_heap; + /** The list of representatives of the partitions a given transaction depends on. */ + std::vector current_deps; + + /** Function to define the ordering of trim_heap. */ + static constexpr auto cmp_fn = [](auto a, auto b) noexcept { + // Sort by increasing chunk feerate, and then by decreasing size. + // We do not need to sort by cluster or within clusters, because due to the implicit + // dependency between consecutive linearization elements, no two transactions from the + // same Cluster will ever simultaneously be in the heap. + return a->m_chunk_feerate < b->m_chunk_feerate; + }; + + /** Given a TrimTxData entry, find the representative of the partition it is in. */ + static constexpr auto find_fn = [](TrimTxData* arg) noexcept { + while (arg != arg->m_uf_parent) { + // Replace pointer to parent with pointer to grandparent (path splitting). + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = arg->m_uf_parent; + arg->m_uf_parent = par->m_uf_parent; + arg = par; + } + return arg; + }; + + /** Given two TrimTxData entries, union the partitions they are in, and return the + * representative. */ + static constexpr auto union_fn = [](TrimTxData* arg1, TrimTxData* arg2) noexcept { + // Replace arg1 and arg2 by their representatives. + auto rep1 = find_fn(arg1); + auto rep2 = find_fn(arg2); + // Bail out if both representatives are the same, because that means arg1 and arg2 are in + // the same partition already. + if (rep1 == rep2) return rep1; + // Pick the lower-count root to become a child of the higher-count one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_size. + if (rep1->m_uf_count < rep2->m_uf_count) std::swap(rep1, rep2); + rep2->m_uf_parent = rep1; + // Add the statistics of arg2 (which is no longer a representative) to those of arg1 (which + // is now the representative for both). + rep1->m_uf_size += rep2->m_uf_size; + rep1->m_uf_count += rep2->m_uf_count; + return rep1; + }; + + /** Get iterator to TrimTxData entry for a given index. */ + auto locate_fn = [&](GraphIndex index) noexcept { + auto it = std::lower_bound(trim_data.begin(), trim_data.end(), index, [](TrimTxData& elem, GraphIndex idx) noexcept { + return elem.m_index < idx; + }); + Assume(it != trim_data.end() && it->m_index == index); + return it; + }; + + // For each group of to-be-merged Clusters. + for (const auto& group_data : clusterset.m_group_data->m_groups) { + trim_data.clear(); + trim_heap.clear(); + deps_by_child.clear(); + deps_by_parent.clear(); + + // Gather trim data from all involved Clusters. + auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + uint64_t size{0}; + for (Cluster* cluster : cluster_span) { + size += cluster->AppendTrimData(trim_data, deps_by_child); + } + // If this group of Clusters does not violate any limits, continue to the next group. + if (trim_data.size() <= m_max_cluster_count && size <= m_max_cluster_size) continue; + // Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as + // a map from GraphIndex to TrimTxData, and its ordering will not change anymore. + std::sort(trim_data.begin(), trim_data.end(), [](auto& a, auto& b) noexcept { return a.m_index < b.m_index; }); + + // Construct deps_by_child. + deps_by_child.insert(deps_by_child.end(), + clusterset.m_deps_to_add.begin() + group_data.m_deps_offset, + clusterset.m_deps_to_add.begin() + group_data.m_deps_offset + group_data.m_deps_count); + std::sort(deps_by_child.begin(), deps_by_child.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + // Fill m_parents_count and m_parents_offset in trim_data. Because of the sort above, all + // dependencies involving the same child are grouped together, so a single linear scan + // suffices. + auto deps_it = deps_by_child.begin(); + for (auto trim_it = trim_data.begin(); trim_it != trim_data.end(); ++trim_it) { + trim_it->m_parent_offset = deps_it - deps_by_child.begin(); + trim_it->m_deps_left = 0; + while (deps_it != deps_by_child.end() && deps_it->second == trim_it->m_index) { + ++trim_it->m_deps_left; + ++deps_it; + } + trim_it->m_parent_count = trim_it->m_deps_left; + // If this transaction has no unmet dependencies, and is not oversized, add it to the + // heap (just append for now, the heapification happens below). + if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) { + // Initialize it as a singleton partition. + trim_it->m_uf_parent = &*trim_it; + trim_it->m_uf_count = 1; + trim_it->m_uf_size = trim_it->m_tx_size; + // Add to heap. + trim_heap.push_back(trim_it); + } + } + Assume(deps_it == deps_by_child.end()); + + // Construct deps_by_parent. + deps_by_parent = deps_by_child; + std::sort(deps_by_parent.begin(), deps_by_parent.end(), [](auto& a, auto& b) noexcept { return a.first < b.first; }); + // Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all + // dependencies involving the same parent are grouped together, so a single linear scan + // suffices. + deps_it = deps_by_parent.begin(); + for (auto& trim_entry : trim_data) { + trim_entry.m_children_count = 0; + trim_entry.m_children_offset = deps_it - deps_by_parent.begin(); + while (deps_it != deps_by_parent.end() && deps_it->first == trim_entry.m_index) { + ++trim_entry.m_children_count; + ++deps_it; + } + } + Assume(deps_it == deps_by_parent.end()); + + // Build a heap of all transactions with 0 unmet dependencies. + std::make_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + + // Iterate over to-be-included transactions. It is possible that the heap empties without + // ever hitting either cluster limit, in case the implied graph (to be added dependencies + // plus implicit dependency between each original transaction and its predecessor in the + // linearization it came from) contains cycles. Such cycles will be removed entirely, + // because each of the transactions in the cycle permanently have unmet dependencies. + // However, this cannot occur in real scenarios where Trim() is called to deal with + // reorganizations that would violate cluster limits, as all added dependencies are in the + // same direction (from old mempool transactions to new from-block transactions); cycles + // require dependencies in both directions to be added. + while (!trim_heap.empty()) { + // Move the best remaining transaction to the end of trim_heap. + std::pop_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + // Pop it, and find its TrimTxData. + auto& entry = *trim_heap.back(); + trim_heap.pop_back(); + + // Find the distinct transaction partitions this entry depends on. + current_deps.clear(); + for (auto& [par, chl] : std::span{deps_by_child}.subspan(entry.m_parent_offset, entry.m_parent_count)) { + Assume(chl == entry.m_index); + current_deps.push_back(find_fn(&*locate_fn(par))); + } + std::sort(current_deps.begin(), current_deps.end()); + current_deps.erase(std::unique(current_deps.begin(), current_deps.end()), current_deps.end()); + + // Compute resource counts. + uint32_t new_count = 1; + uint64_t new_size = entry.m_tx_size; + for (TrimTxData* ptr : current_deps) { + new_count += ptr->m_uf_count; + new_size += ptr->m_uf_size; + } + // Skip the entry if this would violate any limit. + if (new_count > m_max_cluster_count || new_size > m_max_cluster_size) break; + + // Union the partitions this transactions and all its dependencies are in together. + auto rep = &entry; + for (TrimTxData* ptr : current_deps) rep = union_fn(ptr, rep); + // Mark the entry as included (so the loop below will not remove the transaction). + entry.m_deps_left = uint32_t(-1); + // Mark each to-be-added dependency involving this transaction as parent satisfied. + for (auto& [par, chl] : std::span{deps_by_parent}.subspan(entry.m_children_offset, entry.m_children_count)) { + Assume(par == entry.m_index); + auto chl_it = locate_fn(chl); + // Reduce the number of unmet dependencies of chl_it, and if that brings the number + // to zero, add it to the heap. + Assume(chl_it->m_deps_left > 0); + if (--chl_it->m_deps_left == 0) { + // Initialize as a singleton partition. + chl_it->m_uf_parent = &*chl_it; + chl_it->m_uf_count = 1; + chl_it->m_uf_size = chl_it->m_tx_size; + // Add it to the heap. + trim_heap.push_back(chl_it); + std::push_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + } + } + } + + // Remove all the transactions that were not processed above. Because nothing gets + // processed until/unless all its dependencies are met, this automatically guarantees + // that if a transaction is removed, all its descendants, or would-be descendants, are + // removed as well. + for (const auto& trim_entry : trim_data) { + if (trim_entry.m_deps_left != uint32_t(-1)) { + ret.push_back(m_entries[trim_entry.m_index].m_ref); + clusterset.m_to_remove.push_back(trim_entry.m_index); + } + } + } + clusterset.m_group_data.reset(); + clusterset.m_oversized = false; + return ret; +} + +} // namespace + +TxGraph::Ref::~Ref() +{ + if (m_graph) { + // Inform the TxGraph about the Ref being destroyed. + m_graph->UnlinkRef(m_index); + m_graph = nullptr; + } +} + +TxGraph::Ref& TxGraph::Ref::operator=(Ref&& other) noexcept +{ + // Unlink the current graph, if any. + if (m_graph) m_graph->UnlinkRef(m_index); + // Inform the other's graph about the move, if any. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually update the contents. + m_graph = other.m_graph; + m_index = other.m_index; + other.m_graph = nullptr; + other.m_index = GraphIndex(-1); + return *this; +} + +TxGraph::Ref::Ref(Ref&& other) noexcept +{ + // Inform the TxGraph of other that its Ref is being moved. + if (other.m_graph) other.m_graph->UpdateRef(other.m_index, *this); + // Actually move the contents. + std::swap(m_graph, other.m_graph); + std::swap(m_index, other.m_index); +} + +std::unique_ptr MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size) noexcept +{ + return std::make_unique(max_cluster_count, max_cluster_size); +} diff --git a/src/txgraph.h b/src/txgraph.h new file mode 100644 index 00000000000..3cb4b638ced --- /dev/null +++ b/src/txgraph.h @@ -0,0 +1,221 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include +#include + +#include + +#ifndef BITCOIN_TXGRAPH_H +#define BITCOIN_TXGRAPH_H + +static constexpr unsigned MAX_CLUSTER_COUNT_LIMIT{64}; + +/** Data structure to encapsulate fees, sizes, and dependencies for a set of transactions. */ +class TxGraph +{ +public: + /** Internal identifier for a transaction within a TxGraph. */ + using GraphIndex = uint32_t; + + /** Data type used to reference transactions within a TxGraph. + * + * Every transaction within a TxGraph has exactly one corresponding TxGraph::Ref, held by users + * of the class. Destroying the TxGraph::Ref removes the corresponding transaction. + * + * Users of the class can inherit from TxGraph::Ref. If all Refs are inherited this way, the + * Ref* pointers returned by TxGraph functions can be used as this inherited type. + */ + class Ref + { + // Allow TxGraph's GetRefGraph and GetRefIndex to access internals. + friend class TxGraph; + /** Which Graph the Entry lives in. nullptr if this Ref is empty. */ + TxGraph* m_graph = nullptr; + /** Index into the Graph's m_entries. Only used if m_graph != nullptr. */ + GraphIndex m_index = GraphIndex(-1); + public: + /** Construct an empty Ref. Non-empty Refs can only be created using + * TxGraph::AddTransaction. */ + Ref() noexcept = default; + /** Destroy this Ref. If it is not empty, the corresponding transaction is removed (in both + * main and staging, if it exists). */ + virtual ~Ref(); + // Support moving a Ref. + Ref& operator=(Ref&& other) noexcept; + Ref(Ref&& other) noexcept; + // Do not permit copy constructing or copy assignment. A TxGraph entry can have at most one + // Ref pointing to it. + Ref& operator=(const Ref&) = delete; + Ref(const Ref&) = delete; + }; + + /** Interface returned by GetBlockBuilder. */ + class BlockBuilder + { + protected: + /** The next chunk, in topological order plus feerate, or std::nullopt if done. */ + std::optional, FeePerWeight>> m_current_chunk; + /** Make constructor non-public (use TxGraph::GetBlockBuilder()). */ + BlockBuilder() noexcept = default; + public: + /** Support safe inheritance. */ + virtual ~BlockBuilder() = default; + /** Determine whether there are more transactions to be included. */ + explicit operator bool() noexcept { return m_current_chunk.has_value(); } + /** Get the chunk that is currently suggested to be included. */ + const std::span& GetCurrentChunk() noexcept { return m_current_chunk->first; } + /** Get the feerate of the currently suggested chunk. */ + const FeePerWeight& GetCurrentChunkFeerate() noexcept { return m_current_chunk->second; } + /** Mark the current chunk as included, and progress to the next one. */ + virtual void Include() noexcept = 0; + /** Mark the current chunk as skipped, and progress to the next one. */ + virtual void Skip() noexcept = 0; + }; + +protected: + // Allow TxGraph::Ref to call UpdateRef and UnlinkRef. + friend class TxGraph::Ref; + /** Inform the TxGraph implementation that a TxGraph::Ref has moved. */ + virtual void UpdateRef(GraphIndex index, Ref& new_location) noexcept = 0; + /** Inform the TxGraph implementation that a TxGraph::Ref was destroyed. */ + virtual void UnlinkRef(GraphIndex index) noexcept = 0; + // Allow TxGraph implementations (inheriting from it) to access Ref internals. + static TxGraph*& GetRefGraph(Ref& arg) noexcept { return arg.m_graph; } + static TxGraph* GetRefGraph(const Ref& arg) noexcept { return arg.m_graph; } + static GraphIndex& GetRefIndex(Ref& arg) noexcept { return arg.m_index; } + static GraphIndex GetRefIndex(const Ref& arg) noexcept { return arg.m_index; } + +public: + /** Virtual destructor, so inheriting is safe. */ + virtual ~TxGraph() = default; + /** Construct a new transaction with the specified feerate, and return a Ref to it. + * If a staging graph exists, the new transaction is only created there. In all further calls, + * only Refs created by AddTransaction() are allowed to be passed to this TxGraph object (or + * empty Ref objects). */ + [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; + /** Remove the specified transaction. If a staging graph exists, the removal only happens + * there. This is a no-op if the transaction was already removed. + * + * TxGraph may internally reorder transaction removals with dependency additions for + * performance reasons. If together with any transaction removal all its descendants, or all + * its ancestors, are removed as well (which is what always happens in realistic scenarios), + * this reordering will not affect the behavior of TxGraph. + * + * As an example, imagine 3 transactions A,B,C where B depends on A. If a dependency of C on B + * is added, and then B is deleted, C will still depend on A. If the deletion of B is reordered + * before the C->B dependency is added, the dependency adding has no effect. If, together with + * the deletion of B also either A or C is deleted, there is no distinction between the + * original order case and the reordered case. + */ + virtual void RemoveTransaction(const Ref& arg) noexcept = 0; + /** Add a dependency between two specified transactions. If a staging graph exists, the + * dependency is only added there. Parent may not be a descendant of child already (but may + * be an ancestor of it already, in which case this is a no-op). If either transaction is + * already removed, this is a no-op. */ + virtual void AddDependency(const Ref& parent, const Ref& child) noexcept = 0; + /** Modify the fee of the specified transaction, in both the main graph and the staging + * graph if it exists. Wherever the transaction does not exist (or was removed), this has no + * effect. */ + virtual void SetTransactionFee(const Ref& arg, int64_t fee) noexcept = 0; + + /** TxGraph is internally lazy, and will not compute many things until they are needed. + * Calling DoWork will compute everything now, so that future operations are fast. This can be + * invoked while oversized. */ + virtual void DoWork() noexcept = 0; + + /** Create a staging graph (which cannot exist already). This acts as if a full copy of + * the transaction graph is made, upon which further modifications are made. This copy can + * be inspected, and then either discarded, or the main graph can be replaced by it by + * commiting it. */ + virtual void StartStaging() noexcept = 0; + /** Discard the existing active staging graph (which must exist). */ + virtual void AbortStaging() noexcept = 0; + /** Replace the main graph with the staging graph (which must exist). */ + virtual void CommitStaging() noexcept = 0; + /** Check whether a staging graph exists. */ + virtual bool HaveStaging() const noexcept = 0; + + /** Determine whether arg exists in the graph (i.e., was not removed). If main_only is false + * and a staging graph exists, it is queried; otherwise the main graph is queried. */ + virtual bool Exists(const Ref& arg, bool main_only = false) noexcept = 0; + /** Determine whether the graph is oversized (contains a connected component of more than the + * configured maximum cluster count). If main_only is false and a staging graph exists, it is + * queried; otherwise the main graph is queried. Some of the functions below are not available + * for oversized graphs. The mutators above are always available. Removing a transaction by + * destroying its Ref while staging exists will not clear main's oversizedness until staging + * is aborted or committed. */ + virtual bool IsOversized(bool main_only = false) noexcept = 0; + /** Get the feerate of the chunk which transaction arg is in the main graph. Returns the empty + * FeePerWeight if arg does not exist in the main graph. The main graph must not be + * oversized. */ + virtual FeePerWeight GetMainChunkFeerate(const Ref& arg) noexcept = 0; + /** Get the individual transaction feerate of transaction arg. Returns the empty FeePerWeight + * if arg does not exist in either main or staging. This is available even for oversized + * graphs. */ + virtual FeePerWeight GetIndividualFeerate(const Ref& arg) noexcept = 0; + /** Get pointers to all transactions in the connected component ("cluster") which arg is in. + * The transactions will be returned in a topologically-valid order of acceptable quality. + * If main_only is false and a staging graph exists, it is queried; otherwise the main graph + * is queried. The queried graph must not be oversized. Returns {} if arg does not exist in + * the queried graph. */ + virtual std::vector GetCluster(const Ref& arg, bool main_only = false) noexcept = 0; + /** Get pointers to all ancestors of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetAncestors(const Ref& arg, bool main_only = false) noexcept = 0; + /** Like GetAncestors, but return the Refs for all transactions in the union of the provided + * arguments' ancestors (each transaction is only reported once). */ + virtual std::vector GetAncestorsUnion(std::span args, bool main_only = false) noexcept = 0; + /** Get pointers to all descendants of the specified transaction. If main_only is false and a + * staging graph exists, it is queried; otherwise the main graph is queried. The queried + * graph must not be oversized. Returns {} if arg does not exist in the queried graph. */ + virtual std::vector GetDescendants(const Ref& arg, bool main_only = false) noexcept = 0; + /** Like GetDescendants, but return the Refs for all transactions in the union of the provided + * arguments' descendants (each transaction is only reported once). */ + virtual std::vector GetDescendantsUnion(std::span args, bool main_only = false) noexcept = 0; + /** Get the total number of transactions in the graph. If main_only is false and a staging + * graph exists, it is queried; otherwise the main graph is queried. This is available even + * for oversized graphs. */ + virtual GraphIndex GetTransactionCount(bool main_only = false) noexcept = 0; + /** Compare two transactions according to the total order in the main graph (topological, and + * from high to low chunk feerate). Both transactions must be in the main graph. The main + * graph must not be oversized. */ + virtual std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept = 0; + /** Count the number of distinct clusters that the specified transactions belong to. If + * main_only is false and a staging graph exists, staging clusters are counted. Otherwise, + * main clusters are counted. Refs that do not exist in the graph are not counted. The + * queried graph must not be oversized. */ + virtual GraphIndex CountDistinctClusters(std::span, bool main_only = false) noexcept = 0; + /** Get feerate diagrams for both main and staging (which must both exist and not be + * oversized), ignoring unmodified components in both. Use FeeFrac rather than FeePerWeight + * so CompareChunks is usable without type-conversion. */ + virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; + /** Trim all clusters (and would-be clusters) to the TxGraph's cluster count and size + * constraints. Returns the list of all removed transactions, which will always include all + * its own descendants. Applies to staging if it exists, and main otherwise. */ + virtual std::vector Trim() noexcept = 0; + + /** Construct a block builder, drawing from the main graph, which cannot be oversized. While + * the returned object exists, no mutators on the main graph are allowed. */ + virtual std::unique_ptr GetBlockBuilder() noexcept = 0; + /** Get the worst chunk overall in the main graph, i.e., the last chunk that would be returned + * by a BlockBuilder created now. The chunk is returned in reversed order, so every element is + * preceded by all its descendants. If the graph is empty, {} is returned. */ + virtual std::pair, FeePerWeight> GetWorstMainChunk() noexcept = 0; + + /** Perform an internal consistency check on this object. */ + virtual void SanityCheck() const = 0; +}; + +/** Construct a new TxGraph with the specified limit on transactions within a cluster, and the + * specified limit on the sum of transaction sizes within a cluster. max_cluster_count cannot + * exceed MAX_CLUSTER_COUNT_LIMIT. */ +std::unique_ptr MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size) noexcept; + +#endif // BITCOIN_TXGRAPH_H diff --git a/src/util/feefrac.cpp b/src/util/feefrac.cpp index 5b6173835cb..96cb1aef2d9 100644 --- a/src/util/feefrac.cpp +++ b/src/util/feefrac.cpp @@ -36,7 +36,7 @@ std::partial_ordering CompareChunks(Span chunks0, Span chunks0, Span chunks1); +/** Tagged wrapper around FeeFrac to avoid unit confusion. */ +template +struct FeePerUnit : public FeeFrac +{ + // Inherit FeeFrac constructors. + using FeeFrac::FeeFrac; + + /** Convert a FeeFrac to a FeePerUnit. */ + static FeePerUnit FromFeeFrac(const FeeFrac& feefrac) noexcept + { + return {feefrac.fee, feefrac.size}; + } +}; + +// FeePerUnit instance for satoshi / vbyte. +struct VSizeTag {}; +using FeePerVSize = FeePerUnit; + +// FeePerUnit instance for satoshi / WU. +struct WeightTag {}; +using FeePerWeight = FeePerUnit; + #endif // BITCOIN_UTIL_FEEFRAC_H