diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt index 905187fbb00..2137beccb5a 100644 --- a/src/bench/CMakeLists.txt +++ b/src/bench/CMakeLists.txt @@ -48,6 +48,7 @@ add_executable(bench_bitcoin sign_transaction.cpp streams_findbyte.cpp strencodings.cpp + txgraph.cpp util_time.cpp verify_script.cpp xor.cpp diff --git a/src/bench/txgraph.cpp b/src/bench/txgraph.cpp new file mode 100644 index 00000000000..2504d02bc34 --- /dev/null +++ b/src/bench/txgraph.cpp @@ -0,0 +1,123 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include + +#include +#include + +namespace { + +void BenchTxGraphTrim(benchmark::Bench& bench) +{ + // The from-block transactions consist of 1000 fully linear clusters, each with 64 + // transactions. The mempool contains 11 transactions that together merge all of these into + // a single cluster. + // + // (1000 chains of 64 transactions, 64000 T's total) + // + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) + // | | | | | | | | + // | | / \ | / \ | | / + // \----------+--------/ \--------+--------/ \--------+-----+----+--------/ + // | | | + // B B B + // + // (11 B's, each attaching to up to 100 chains of 64 T's) + // + /** The maximum cluster count used in this test. */ + static constexpr int MAX_CLUSTER_COUNT = 64; + /** The number of "top" (from-block) chains of transactions. */ + static constexpr int NUM_TOP_CHAINS = 1000; + /** The number of transactions per top chain. */ + static constexpr int NUM_TX_PER_TOP_CHAIN = MAX_CLUSTER_COUNT; + /** The (maximum) number of dependencies per bottom transaction. */ + static constexpr int NUM_DEPS_PER_BOTTOM_TX = 100; + /** Set a very large cluster size limit so that only the count limit is triggered. */ + static constexpr int32_t MAX_CLUSTER_SIZE = 100'000 * 100; + + /** Refs to all top transactions. */ + std::vector top_refs; + /** Refs to all bottom transactions. */ + std::vector bottom_refs; + /** Indexes into top_refs for some transaction of each component, in arbitrary order. + * Initially these are the last transactions in each chains, but as bottom transactions are + * added, entries will be removed when they get merged, and randomized. */ + std::vector top_components; + + InsecureRandomContext rng(11); + auto graph = MakeTxGraph(MAX_CLUSTER_COUNT, MAX_CLUSTER_SIZE); + + // Construct the top chains. + for (int chain = 0; chain < NUM_TOP_CHAINS; ++chain) { + for (int chaintx = 0; chaintx < NUM_TX_PER_TOP_CHAIN; ++chaintx) { + int64_t fee = rng.randbits<27>() + 100; + FeePerWeight feerate{fee, 1}; + top_refs.push_back(graph->AddTransaction(feerate)); + // Add internal dependencies linking the chain transactions together. + if (chaintx > 0) { + graph->AddDependency(*(top_refs.rbegin()), *(top_refs.rbegin() + 1)); + } + } + // Remember the last transaction in each chain, to attach the bottom transactions to. + top_components.push_back(top_refs.size() - 1); + } + + // Make the graph linearize all clusters acceptably. + graph->GetBlockBuilder(); + + // Construct the bottom transactions, and dependencies to the top chains. + while (top_components.size() > 1) { + // Construct the transaction. + int64_t fee = rng.randbits<27>() + 100; + FeePerWeight feerate{fee, 1}; + auto bottom_tx = graph->AddTransaction(feerate); + // Determine the number of dependencies this transaction will have. + int deps = std::min(NUM_DEPS_PER_BOTTOM_TX, top_components.size()); + for (int dep = 0; dep < deps; ++dep) { + // Pick an transaction in top_components to attach to. + auto idx = rng.randrange(top_components.size()); + // Add dependency. + graph->AddDependency(/*parent=*/top_refs[top_components[idx]], /*child=*/bottom_tx); + // Unless this is the last dependency being added, remove from top_components, as + // the component will be merged with that one. + if (dep < deps - 1) { + // Move entry top the back. + if (idx != top_components.size() - 1) std::swap(top_components.back(), top_components[idx]); + // And pop it. + top_components.pop_back(); + } + } + bottom_refs.push_back(std::move(bottom_tx)); + } + + // Run the benchmark exactly once. Running it multiple times would require the setup to be + // redone, which takes a very non-negligible time compared to the trimming itself. + bench.epochIterations(1).epochs(1).run([&] { + // Call Trim() to remove transactions and bring the cluster back within limits. + graph->Trim(); + // And relinearize everything that remains acceptably. + graph->GetBlockBuilder(); + }); + + assert(!graph->IsOversized()); + // At least 99% of chains must survive. + assert(graph->GetTransactionCount() >= (NUM_TOP_CHAINS * NUM_TX_PER_TOP_CHAIN * 99) / 100); +} + +} // namespace + +static void TxGraphTrim(benchmark::Bench& bench) { BenchTxGraphTrim(bench); } + +BENCHMARK(TxGraphTrim, benchmark::PriorityLevel::HIGH); diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 6ce33621af8..f188b3c581f 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -106,6 +106,7 @@ add_executable(test_bitcoin transaction_tests.cpp translation_tests.cpp txdownload_tests.cpp + txgraph_tests.cpp txindex_tests.cpp txpackage_tests.cpp txreconciliation_tests.cpp diff --git a/src/test/fuzz/txgraph.cpp b/src/test/fuzz/txgraph.cpp index e4649970384..561bf0b420c 100644 --- a/src/test/fuzz/txgraph.cpp +++ b/src/test/fuzz/txgraph.cpp @@ -56,9 +56,12 @@ struct SimTxGraph /** Which transactions have been modified in the graph since creation, either directly or by * being in a cluster which includes modifications. Only relevant for the staging graph. */ SetType modified; + /** The configured maximum total size of transactions per cluster. */ + uint64_t max_cluster_size; - /** Construct a new SimTxGraph with the specified maximum cluster count. */ - explicit SimTxGraph(DepGraphIndex max_cluster) : max_cluster_count(max_cluster) {} + /** Construct a new SimTxGraph with the specified maximum cluster count and size. */ + explicit SimTxGraph(DepGraphIndex cluster_count, uint64_t cluster_size) : + max_cluster_count(cluster_count), max_cluster_size(cluster_size) {} // Permit copying and moving. SimTxGraph(const SimTxGraph&) noexcept = default; @@ -66,6 +69,20 @@ struct SimTxGraph SimTxGraph(SimTxGraph&&) noexcept = default; SimTxGraph& operator=(SimTxGraph&&) noexcept = default; + /** Get the connected components within this simulated transaction graph. */ + std::vector GetComponents() + { + auto todo = graph.Positions(); + std::vector ret; + // Iterate over all connected components of the graph. + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + ret.push_back(component); + todo -= component; + } + return ret; + } + /** Check whether this graph is oversized (contains a connected component whose number of * transactions exceeds max_cluster_count. */ bool IsOversized() @@ -73,12 +90,11 @@ struct SimTxGraph if (!oversized.has_value()) { // Only recompute when oversized isn't already known. oversized = false; - auto todo = graph.Positions(); - // Iterate over all connected components of the graph. - while (todo.Any()) { - auto component = graph.FindConnectedComponent(todo); + for (auto component : GetComponents()) { if (component.Count() > max_cluster_count) oversized = true; - todo -= component; + uint64_t component_size{0}; + for (auto i : component) component_size += graph.FeeRate(i).size; + if (component_size > max_cluster_size) oversized = true; } } return *oversized; @@ -128,6 +144,8 @@ struct SimTxGraph simmap[simpos] = std::make_shared(); auto ptr = simmap[simpos].get(); simrevmap[ptr] = simpos; + // This may invalidate our cached oversized value. + if (oversized.has_value() && !*oversized) oversized = std::nullopt; return ptr; } @@ -239,6 +257,31 @@ struct SimTxGraph } } } + + + /** Verify that set contains transactions from every oversized cluster, and nothing from + * non-oversized ones. */ + bool MatchesOversizedClusters(const SetType& set) + { + if (set.Any() && !IsOversized()) return false; + + auto todo = graph.Positions(); + if (!set.IsSubsetOf(todo)) return false; + + // Walk all clusters, and make sure all of set doesn't come from non-oversized clusters + while (todo.Any()) { + auto component = graph.FindConnectedComponent(todo); + // Determine whether component is oversized, due to either the size or count limit. + bool is_oversized = component.Count() > max_cluster_count; + uint64_t component_size{0}; + for (auto i : component) component_size += graph.FeeRate(i).size; + is_oversized |= component_size > max_cluster_size; + // Check whether overlap with set matches is_oversized. + if (is_oversized != set.Overlaps(component)) return false; + todo -= component; + } + return true; + } }; } // namespace @@ -254,20 +297,24 @@ FUZZ_TARGET(txgraph) FuzzedDataProvider provider(buffer.data(), buffer.size()); /** Internal test RNG, used only for decisions which would require significant amount of data - * to be read from the provider, without realistically impacting test sensitivity. */ - InsecureRandomContext rng(0xdecade2009added + buffer.size()); + * to be read from the provider, without realistically impacting test sensitivity, and for + * specialized test cases that are hard to perform more generically. */ + InsecureRandomContext rng(provider.ConsumeIntegral()); /** Variable used whenever an empty TxGraph::Ref is needed. */ TxGraph::Ref empty_ref; - // Decide the maximum number of transactions per cluster we will use in this simulation. - auto max_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + /** The maximum number of transactions per (non-oversized) cluster we will use in this + * simulation. */ + auto max_cluster_count = provider.ConsumeIntegralInRange(1, MAX_CLUSTER_COUNT_LIMIT); + /** The maximum total size of transactions in a (non-oversized) cluster. */ + auto max_cluster_size = provider.ConsumeIntegralInRange(1, 0x3fffff * MAX_CLUSTER_COUNT_LIMIT); // Construct a real graph, and a vector of simulated graphs (main, and possibly staging). - auto real = MakeTxGraph(max_count); + auto real = MakeTxGraph(max_cluster_count, max_cluster_size); std::vector sims; sims.reserve(2); - sims.emplace_back(max_count); + sims.emplace_back(max_cluster_count, max_cluster_size); /** Struct encapsulating information about a BlockBuilder that's currently live. */ struct BlockBuilderData @@ -396,7 +443,7 @@ FUZZ_TARGET(txgraph) // Otherwise, use smaller range which consume fewer fuzz input bytes, as just // these are likely sufficient to trigger all interesting code paths already. fee = provider.ConsumeIntegral(); - size = provider.ConsumeIntegral() + 1; + size = provider.ConsumeIntegralInRange(1, 0xff); } FeePerWeight feerate{fee, size}; // Create a real TxGraph::Ref. @@ -534,7 +581,7 @@ FUZZ_TARGET(txgraph) auto ref = pick_fn(); auto result = alt ? real->GetDescendants(*ref, use_main) : real->GetAncestors(*ref, use_main); - assert(result.size() <= max_count); + assert(result.size() <= max_cluster_count); auto result_set = sel_sim.MakeSet(result); assert(result.size() == result_set.Count()); auto expect_set = sel_sim.GetAncDesc(ref, alt); @@ -567,16 +614,20 @@ FUZZ_TARGET(txgraph) auto ref = pick_fn(); auto result = real->GetCluster(*ref, use_main); // Check cluster count limit. - assert(result.size() <= max_count); + assert(result.size() <= max_cluster_count); // Require the result to be topologically valid and not contain duplicates. auto left = sel_sim.graph.Positions(); + uint64_t total_size{0}; for (auto refptr : result) { auto simpos = sel_sim.Find(refptr); + total_size += sel_sim.graph.FeeRate(simpos).size; assert(simpos != SimTxGraph::MISSING); assert(left[simpos]); left.Reset(simpos); assert(!sel_sim.graph.Ancestors(simpos).Overlaps(left)); } + // Check cluster size limit. + assert(total_size <= max_cluster_size); // Require the set to be connected. auto result_set = sel_sim.MakeSet(result); assert(sel_sim.graph.IsConnected(result_set)); @@ -774,6 +825,146 @@ FUZZ_TARGET(txgraph) assert(sum == worst_chunk_feerate); } break; + } else if ((block_builders.empty() || sims.size() > 1) && command-- == 0) { + // Trim. + bool was_oversized = top_sim.IsOversized(); + auto removed = real->Trim(); + // Verify that something was removed if and only if there was an oversized cluster. + assert(was_oversized == !removed.empty()); + if (!was_oversized) break; + auto removed_set = top_sim.MakeSet(removed); + // The removed set must contain all its own descendants. + for (auto simpos : removed_set) { + assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set)); + } + // Something from every oversized cluster should have been removed, and nothing + // else. + assert(top_sim.MatchesOversizedClusters(removed_set)); + + // Apply all removals to the simulation, and verify the result is no longer + // oversized. Don't query the real graph for oversizedness; it is compared + // against the simulation anyway later. + for (auto simpos : removed_set) { + top_sim.RemoveTransaction(top_sim.GetRef(simpos)); + } + assert(!top_sim.IsOversized()); + break; + } else if ((block_builders.empty() || sims.size() > 1) && + top_sim.GetTransactionCount() > max_cluster_count && !top_sim.IsOversized() && command-- == 0) { + // Trim (special case which avoids apparent cycles in the implicit approximate + // dependency graph constructed inside the Trim() implementation). This is worth + // testing separately, because such cycles cannot occur in realistic scenarios, + // but this is hard to replicate in general in this fuzz test. + + // First, we need to have dependencies applied and linearizations fixed to avoid + // circular dependencies in implied graph; trigger it via whatever means. + real->CountDistinctClusters({}, false); + + // Gather the current clusters. + auto clusters = top_sim.GetComponents(); + + // Merge clusters randomly until at least one oversized one appears. + bool made_oversized = false; + auto merges_left = clusters.size() - 1; + while (merges_left > 0) { + --merges_left; + // Find positions of clusters in the clusters vector to merge together. + auto par_cl = rng.randrange(clusters.size()); + auto chl_cl = rng.randrange(clusters.size() - 1); + chl_cl += (chl_cl >= par_cl); + Assume(chl_cl != par_cl); + // Add between 1 and 3 dependencies between them. As all are in the same + // direction (from the child cluster to parent cluster), no cycles are possible, + // regardless of what internal topology Trim() uses as approximation within the + // clusters. + int num_deps = rng.randrange(3) + 1; + for (int i = 0; i < num_deps; ++i) { + // Find a parent transaction in the parent cluster. + auto par_idx = rng.randrange(clusters[par_cl].Count()); + SimTxGraph::Pos par_pos = 0; + for (auto j : clusters[par_cl]) { + if (par_idx == 0) { + par_pos = j; + break; + } + --par_idx; + } + // Find a child transaction in the child cluster. + auto chl_idx = rng.randrange(clusters[chl_cl].Count()); + SimTxGraph::Pos chl_pos = 0; + for (auto j : clusters[chl_cl]) { + if (chl_idx == 0) { + chl_pos = j; + break; + } + --chl_idx; + } + // Add dependency to both simulation and real TxGraph. + auto par_ref = top_sim.GetRef(par_pos); + auto chl_ref = top_sim.GetRef(chl_pos); + top_sim.AddDependency(par_ref, chl_ref); + real->AddDependency(*par_ref, *chl_ref); + } + // Compute the combined cluster. + auto par_cluster = clusters[par_cl]; + auto chl_cluster = clusters[chl_cl]; + auto new_cluster = par_cluster | chl_cluster; + // Remove the parent and child cluster from clusters. + std::erase_if(clusters, [&](const auto& cl) noexcept { return cl == par_cluster || cl == chl_cluster; }); + // Add the combined cluster. + clusters.push_back(new_cluster); + // If this is the first merge that causes an oversized cluster to appear, pick + // a random number of further merges to appear. + if (!made_oversized) { + made_oversized = new_cluster.Count() > max_cluster_count; + if (!made_oversized) { + FeeFrac total; + for (auto i : new_cluster) total += top_sim.graph.FeeRate(i); + if (uint32_t(total.size) > max_cluster_size) made_oversized = true; + } + if (made_oversized) merges_left = rng.randrange(clusters.size()); + } + } + + // Determine an upper bound on how many transactions are removed. + uint32_t max_removed = 0; + for (auto& cluster : clusters) { + // Gather all transaction sizes in the to-be-combined cluster. + std::vector sizes; + for (auto i : cluster) sizes.push_back(top_sim.graph.FeeRate(i).size); + auto sum_sizes = std::accumulate(sizes.begin(), sizes.end(), uint64_t{0}); + // Sort from large to small. + std::sort(sizes.begin(), sizes.end(), std::greater{}); + // In the worst case, only the smallest transactions are removed. + while (sizes.size() > max_cluster_count || sum_sizes > max_cluster_size) { + sum_sizes -= sizes.back(); + sizes.pop_back(); + ++max_removed; + } + } + + // Invoke Trim now on the definitely-oversized txgraph. + auto removed = real->Trim(); + // Verify that the number of removals is within range. + assert(removed.size() >= 1); + assert(removed.size() <= max_removed); + // The removed set must contain all its own descendants. + auto removed_set = top_sim.MakeSet(removed); + for (auto simpos : removed_set) { + assert(top_sim.graph.Descendants(simpos).IsSubsetOf(removed_set)); + } + // Something from every oversized cluster should have been removed, and nothing + // else. + assert(top_sim.MatchesOversizedClusters(removed_set)); + + // Apply all removals to the simulation, and verify the result is no longer + // oversized. Don't query the real graph for oversizedness; it is compared + // against the simulation anyway later. + for (auto simpos : removed_set) { + top_sim.RemoveTransaction(top_sim.GetRef(simpos)); + } + assert(!top_sim.IsOversized()); + break; } } } @@ -941,28 +1132,32 @@ FUZZ_TARGET(txgraph) // Check its ancestors against simulation. auto expect_anc = sim.graph.Ancestors(i); auto anc = sim.MakeSet(real->GetAncestors(*sim.GetRef(i), main_only)); - assert(anc.Count() <= max_count); + assert(anc.Count() <= max_cluster_count); assert(anc == expect_anc); // Check its descendants against simulation. auto expect_desc = sim.graph.Descendants(i); auto desc = sim.MakeSet(real->GetDescendants(*sim.GetRef(i), main_only)); - assert(desc.Count() <= max_count); + assert(desc.Count() <= max_cluster_count); assert(desc == expect_desc); // Check the cluster the transaction is part of. auto cluster = real->GetCluster(*sim.GetRef(i), main_only); - assert(cluster.size() <= max_count); + assert(cluster.size() <= max_cluster_count); assert(sim.MakeSet(cluster) == component); // Check that the cluster is reported in a valid topological order (its // linearization). std::vector simlin; SimTxGraph::SetType done; + uint64_t total_size{0}; for (TxGraph::Ref* ptr : cluster) { auto simpos = sim.Find(ptr); assert(sim.graph.Descendants(simpos).IsSubsetOf(component - done)); done.Set(simpos); assert(sim.graph.Ancestors(simpos).IsSubsetOf(done)); simlin.push_back(simpos); + total_size += sim.graph.FeeRate(simpos).size; } + // Check cluster size. + assert(total_size <= max_cluster_size); // Construct a chunking object for the simulated graph, using the reported cluster // linearization as ordering, and compare it against the reported chunk feerates. if (sims.size() == 1 || main_only) { diff --git a/src/test/txgraph_tests.cpp b/src/test/txgraph_tests.cpp new file mode 100644 index 00000000000..75abfba7dc5 --- /dev/null +++ b/src/test/txgraph_tests.cpp @@ -0,0 +1,290 @@ +// Copyright (c) 2023-present The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://opensource.org/license/mit/. + +#include + +#include + +#include + +#include +#include + +BOOST_AUTO_TEST_SUITE(txgraph_tests) + +BOOST_AUTO_TEST_CASE(txgraph_trim_zigzag) +{ + // T T T T T T T T T T T T T T (50 T's) + // \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / + // \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / \ / + // B B B B B B B B B B B B B (49 B's) + // + /** The maximum cluster count used in this test. */ + static constexpr int MAX_CLUSTER_COUNT = 50; + /** The number of "bottom" transactions, which are in the mempool already. */ + static constexpr int NUM_BOTTOM_TX = 49; + /** The number of "top" transactions, which come from disconnected blocks. These are re-added + * to the mempool and, while connecting them to the already-in-mempool transactions, we + * discover the resulting cluster is oversized. */ + static constexpr int NUM_TOP_TX = 50; + /** The total number of transactions in the test. */ + static constexpr int NUM_TOTAL_TX = NUM_BOTTOM_TX + NUM_TOP_TX; + static_assert(NUM_TOTAL_TX > MAX_CLUSTER_COUNT); + /** Set a very large cluster size limit so that only the count limit is triggered. */ + static constexpr int32_t MAX_CLUSTER_SIZE = 100'000 * 100; + + // Create a new graph for the test. + auto graph = MakeTxGraph(MAX_CLUSTER_COUNT, MAX_CLUSTER_SIZE); + + // Add all transactions and store their Refs. + std::vector refs; + refs.reserve(NUM_TOTAL_TX); + // First all bottom transactions: the i'th bottom transaction is at position i. + for (unsigned int i = 0; i < NUM_BOTTOM_TX; ++i) { + refs.push_back(graph->AddTransaction(FeePerWeight{200 - i, 100})); + } + // Then all top transactions: the i'th top transaction is at position NUM_BOTTOM_TX + i. + for (unsigned int i = 0; i < NUM_TOP_TX; ++i) { + refs.push_back(graph->AddTransaction(FeePerWeight{100 - i, 100})); + } + + // Create the zigzag dependency structure. + // Each transaction in the bottom row depends on two adjacent transactions from the top row. + graph->SanityCheck(); + for (unsigned int i = 0; i < NUM_BOTTOM_TX; ++i) { + graph->AddDependency(/*parent=*/refs[NUM_BOTTOM_TX + i], /*child=*/refs[i]); + graph->AddDependency(/*parent=*/refs[NUM_BOTTOM_TX + i + 1], /*child=*/refs[i]); + } + + // Check that the graph is now oversized. This also forces the graph to + // group clusters and compute the oversized status. + graph->SanityCheck(); + BOOST_CHECK_EQUAL(graph->GetTransactionCount(), NUM_TOTAL_TX); + BOOST_CHECK(graph->IsOversized(/*main_only=*/false)); + + // Call Trim() to remove transactions and bring the cluster back within limits. + auto removed_refs = graph->Trim(); + graph->SanityCheck(); + BOOST_CHECK(!graph->IsOversized(/*main_only=*/false)); + + // We only need to trim the middle bottom transaction to end up with 2 clusters each within cluster limits. + BOOST_CHECK_EQUAL(removed_refs.size(), 1); + BOOST_CHECK_EQUAL(graph->GetTransactionCount(), MAX_CLUSTER_COUNT * 2 - 2); + for (unsigned int i = 0; i < refs.size(); ++i) { + BOOST_CHECK_EQUAL(graph->Exists(refs[i]), i != (NUM_BOTTOM_TX / 2)); + } +} + +BOOST_AUTO_TEST_CASE(txgraph_trim_flower) +{ + // We will build an oversized flower-shaped graph: all transactions are spent by 1 descendant. + // + // T T T T T T T T (100 T's) + // | | | | | | | | + // | | | | | | | | + // \---+---+---+-+-+---+---+---/ + // | + // B (1 B) + // + /** The maximum cluster count used in this test. */ + static constexpr int MAX_CLUSTER_COUNT = 50; + /** The number of "top" transactions, which come from disconnected blocks. These are re-added + * to the mempool and, connecting them to the already-in-mempool transactions, we discover the + * resulting cluster is oversized. */ + static constexpr int NUM_TOP_TX = MAX_CLUSTER_COUNT * 2; + /** The total number of transactions in this test. */ + static constexpr int NUM_TOTAL_TX = NUM_TOP_TX + 1; + /** Set a very large cluster size limit so that only the count limit is triggered. */ + static constexpr int32_t MAX_CLUSTER_SIZE = 100'000 * 100; + + auto graph = MakeTxGraph(MAX_CLUSTER_COUNT, MAX_CLUSTER_SIZE); + + // Add all transactions and store their Refs. + std::vector refs; + refs.reserve(NUM_TOTAL_TX); + + // Add all transactions. They are in individual clusters. + refs.push_back(graph->AddTransaction({1, 100})); + for (unsigned int i = 0; i < NUM_TOP_TX; ++i) { + refs.push_back(graph->AddTransaction(FeePerWeight{500 + i, 100})); + } + graph->SanityCheck(); + + // The 0th transaction spends all the top transactions. + for (unsigned int i = 1; i < NUM_TOTAL_TX; ++i) { + graph->AddDependency(/*parent=*/refs[i], /*child=*/refs[0]); + } + graph->SanityCheck(); + + // Check that the graph is now oversized. This also forces the graph to + // group clusters and compute the oversized status. + BOOST_CHECK(graph->IsOversized(/*main_only=*/false)); + + // Call Trim() to remove transactions and bring the cluster back within limits. + auto removed_refs = graph->Trim(); + graph->SanityCheck(); + BOOST_CHECK(!graph->IsOversized(/*main_only=*/false)); + + // Since only the bottom transaction connects these clusters, we only need to remove it. + BOOST_CHECK_EQUAL(removed_refs.size(), 1); + BOOST_CHECK_EQUAL(graph->GetTransactionCount(false), MAX_CLUSTER_COUNT * 2); + BOOST_CHECK(!graph->Exists(refs[0])); + for (unsigned int i = 1; i < refs.size(); ++i) { + BOOST_CHECK(graph->Exists(refs[i])); + } +} + +BOOST_AUTO_TEST_CASE(txgraph_trim_huge) +{ + // The from-block transactions consist of 1000 fully linear clusters, each with 64 + // transactions. The mempool contains 11 transactions that together merge all of these into + // a single cluster. + // + // (1000 chains of 64 transactions, 64000 T's total) + // + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // | | | | | | | | + // T T T T T T T T + // (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) (64 long) + // | | | | | | | | + // | | / \ | / \ | | / + // \----------+--------/ \--------+--------/ \--------+-----+----+--------/ + // | | | + // B B B + // + // (11 B's, each attaching to up to 100 chains of 64 T's) + // + /** The maximum cluster count used in this test. */ + static constexpr int MAX_CLUSTER_COUNT = 64; + /** The number of "top" (from-block) chains of transactions. */ + static constexpr int NUM_TOP_CHAINS = 1000; + /** The number of transactions per top chain. */ + static constexpr int NUM_TX_PER_TOP_CHAIN = MAX_CLUSTER_COUNT; + /** The (maximum) number of dependencies per bottom transaction. */ + static constexpr int NUM_DEPS_PER_BOTTOM_TX = 100; + /** The number of bottom transactions that are expected to be created. */ + static constexpr int NUM_BOTTOM_TX = (NUM_TOP_CHAINS - 1 + (NUM_DEPS_PER_BOTTOM_TX - 2)) / (NUM_DEPS_PER_BOTTOM_TX - 1); + /** The total number of transactions created in this test. */ + static constexpr int NUM_TOTAL_TX = NUM_TOP_CHAINS * NUM_TX_PER_TOP_CHAIN + NUM_BOTTOM_TX; + /** Set a very large cluster size limit so that only the count limit is triggered. */ + static constexpr int32_t MAX_CLUSTER_SIZE = 100'000 * 100; + + /** Refs to all top transactions. */ + std::vector top_refs; + /** Refs to all bottom transactions. */ + std::vector bottom_refs; + /** Indexes into top_refs for some transaction of each component, in arbitrary order. + * Initially these are the last transactions in each chains, but as bottom transactions are + * added, entries will be removed when they get merged, and randomized. */ + std::vector top_components; + + FastRandomContext rng; + auto graph = MakeTxGraph(MAX_CLUSTER_COUNT, MAX_CLUSTER_SIZE); + + // Construct the top chains. + for (int chain = 0; chain < NUM_TOP_CHAINS; ++chain) { + for (int chaintx = 0; chaintx < NUM_TX_PER_TOP_CHAIN; ++chaintx) { + // Use random fees, size 1. + int64_t fee = rng.randbits<27>() + 100; + FeePerWeight feerate{fee, 1}; + top_refs.push_back(graph->AddTransaction(feerate)); + // Add internal dependencies linked the chain transactions together. + if (chaintx > 0) { + graph->AddDependency(*(top_refs.rbegin()), *(top_refs.rbegin() + 1)); + } + } + // Remember the last transaction in each chain, to attach the bottom transactions to. + top_components.push_back(top_refs.size() - 1); + } + graph->SanityCheck(); + + // Not oversized so far (just 1000 clusters of 64). + BOOST_CHECK(!graph->IsOversized()); + + // Construct the bottom transactions, and dependencies to the top chains. + while (top_components.size() > 1) { + // Construct the transaction. + int64_t fee = rng.randbits<27>() + 100; + FeePerWeight feerate{fee, 1}; + auto bottom_tx = graph->AddTransaction(feerate); + // Determine the number of dependencies this transaction will have. + int deps = std::min(NUM_DEPS_PER_BOTTOM_TX, top_components.size()); + for (int dep = 0; dep < deps; ++dep) { + // Pick an transaction in top_components to attach to. + auto idx = rng.randrange(top_components.size()); + // Add dependency. + graph->AddDependency(/*parent=*/top_refs[top_components[idx]], /*child=*/bottom_tx); + // Unless this is the last dependency being added, remove from top_components, as + // the component will be merged with that one. + if (dep < deps - 1) { + // Move entry top the back. + if (idx != top_components.size() - 1) std::swap(top_components.back(), top_components[idx]); + // And pop it. + top_components.pop_back(); + } + } + bottom_refs.push_back(std::move(bottom_tx)); + } + graph->SanityCheck(); + + // Now we are oversized (one cluster of 64011). + BOOST_CHECK(graph->IsOversized()); + const auto total_tx_count = graph->GetTransactionCount(); + BOOST_CHECK(total_tx_count == top_refs.size() + bottom_refs.size()); + BOOST_CHECK(total_tx_count == NUM_TOTAL_TX); + + // Call Trim() to remove transactions and bring the cluster back within limits. + auto removed_refs = graph->Trim(); + BOOST_CHECK(!graph->IsOversized()); + BOOST_CHECK(removed_refs.size() == total_tx_count - graph->GetTransactionCount()); + graph->SanityCheck(); + + // At least 99% of chains must survive. + BOOST_CHECK(graph->GetTransactionCount() >= (NUM_TOP_CHAINS * NUM_TX_PER_TOP_CHAIN * 99) / 100); +} + +BOOST_AUTO_TEST_CASE(txgraph_trim_big_singletons) +{ + // Mempool consists of 100 singleton clusters; there are no dependencies. Some are oversized. Trim() should remove all of the oversized ones. + static constexpr int MAX_CLUSTER_COUNT = 64; + static constexpr int32_t MAX_CLUSTER_SIZE = 100'000; + static constexpr int NUM_TOTAL_TX = 100; + + // Create a new graph for the test. + auto graph = MakeTxGraph(MAX_CLUSTER_COUNT, MAX_CLUSTER_SIZE); + + // Add all transactions and store their Refs. + std::vector refs; + refs.reserve(NUM_TOTAL_TX); + + // Add all transactions. They are in individual clusters. + for (unsigned int i = 0; i < NUM_TOTAL_TX; ++i) { + // The 88th transaction is oversized. + // Every 20th transaction is oversized. + const FeePerWeight feerate{500 + i, (i == 88 || i % 20 == 0) ? MAX_CLUSTER_SIZE + 1 : 100}; + refs.push_back(graph->AddTransaction(feerate)); + } + graph->SanityCheck(); + + // Check that the graph is now oversized. This also forces the graph to + // group clusters and compute the oversized status. + BOOST_CHECK(graph->IsOversized(/*main_only=*/false)); + + // Call Trim() to remove transactions and bring the cluster back within limits. + auto removed_refs = graph->Trim(); + graph->SanityCheck(); + BOOST_CHECK_EQUAL(graph->GetTransactionCount(), NUM_TOTAL_TX - 6); + BOOST_CHECK(!graph->IsOversized(/*main_only=*/false)); + + // Check that all the oversized transactions were removed. + for (unsigned int i = 0; i < refs.size(); ++i) { + BOOST_CHECK_EQUAL(graph->Exists(refs[i]), i != 88 && i % 20 != 0); + } +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/txgraph.cpp b/src/txgraph.cpp index 33afca68b0b..1b29a05e534 100644 --- a/src/txgraph.cpp +++ b/src/txgraph.cpp @@ -35,6 +35,9 @@ using ClusterSetIndex = uint32_t; /** Quality levels for cached cluster linearizations. */ enum class QualityLevel { + /** This is a singleton cluster consisting of a transaction that individually exceeds the + * cluster size limit. It cannot be merged with anything. */ + OVERSIZED_SINGLETON, /** This cluster may have multiple disconnected components, which are all NEEDS_RELINEARIZE. */ NEEDS_SPLIT, /** This cluster may have multiple disconnected components, which are all ACCEPTABLE. */ @@ -50,6 +53,47 @@ enum class QualityLevel NONE, }; +/** Information about a transaction inside TxGraphImpl::Trim. */ +struct TrimTxData +{ + // Fields populated by Cluster::AppendTrimData(). These are immutable after TrimTxData + // construction. + /** Chunk feerate for this transaction. */ + FeePerWeight m_chunk_feerate; + /** GraphIndex of the transaction. */ + TxGraph::GraphIndex m_index; + /** Size of the transaction. */ + uint32_t m_tx_size; + + // Fields only used internally by TxGraphImpl::Trim(): + /** Number of unmet dependencies this transaction has. -1 if the transaction is included. */ + uint32_t m_deps_left; + /** Number of dependencies that apply to this transaction as child. */ + uint32_t m_parent_count; + /** Where in deps_by_child those dependencies begin. */ + uint32_t m_parent_offset; + /** Number of dependencies that apply to this transaction as parent. */ + uint32_t m_children_count; + /** Where in deps_by_parent those dependencies begin. */ + uint32_t m_children_offset; + + // Fields only used internally by TxGraphImpl::Trim()'s union-find implementation, and only for + // transactions that are definitely included or definitely rejected. + // + // As transactions get processed, they get organized into trees which form partitions + // representing the would-be clusters up to that point. The root of each tree is a + // representative for that partition. See + // https://en.wikipedia.org/wiki/Disjoint-set_data_structure. + // + /** Pointer to another TrimTxData, towards the root of the tree. If this is a root, m_uf_parent + * is equal to this itself. */ + TrimTxData* m_uf_parent; + /** If this is a root, the total number of transactions in the partition. */ + uint32_t m_uf_count; + /** If this is a root, the total size of transactions in the partition. */ + uint64_t m_uf_size; +}; + /** A grouping of connected transactions inside a TxGraphImpl::ClusterSet. */ class Cluster { @@ -101,6 +145,10 @@ public: { return m_quality == QualityLevel::OPTIMAL; } + /** Whether this cluster is oversized. Note that no changes that can cause oversizedness are + * ever applied, so the only way a materialized Cluster object can be oversized is by being + * an individually oversized transaction singleton. */ + bool IsOversized() const noexcept { return m_quality == QualityLevel::OVERSIZED_SINGLETON; } /** Whether this cluster requires splitting. */ bool NeedsSplitting() const noexcept { @@ -109,6 +157,8 @@ public: } /** Get the number of transactions in this Cluster. */ LinearizationIndex GetTxCount() const noexcept { return m_linearization.size(); } + /** Get the total size of the transactions in this Cluster. */ + uint64_t GetTotalTxSize() const noexcept; /** Given a DepGraphIndex into this Cluster, find the corresponding GraphIndex. */ GraphIndex GetClusterEntry(DepGraphIndex index) const noexcept { return m_mapping[index]; } /** Only called by Graph::SwapIndexes. */ @@ -143,6 +193,10 @@ public: void Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept; /** For every chunk in the cluster, append its FeeFrac to ret. */ void AppendChunkFeerates(std::vector& ret) const noexcept; + /** Add a TrimTxData entry (filling m_chunk_feerate, m_index, m_tx_size) for every + * transaction in the Cluster to ret. Implicit dependencies between consecutive transactions + * in the linearization are added to deps. Return the Cluster's total transaction size. */ + uint64_t AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept; // Functions that implement the Cluster-specific side of public TxGraph functions. @@ -199,6 +253,8 @@ private: FastRandomContext m_rng; /** This TxGraphImpl's maximum cluster count limit. */ const DepGraphIndex m_max_cluster_count; + /** This TxGraphImpl's maximum cluster size limit. */ + const uint64_t m_max_cluster_size; /** Information about one group of Clusters to be merged. */ struct GroupEntry @@ -220,9 +276,6 @@ private: std::vector m_groups; /** Which clusters are to be merged. GroupEntry::m_cluster_offset indexes into this. */ std::vector m_group_clusters; - /** Whether at least one of the groups cannot be applied because it would result in a - * Cluster that violates the cluster count limit. */ - bool m_group_oversized; }; /** The collection of all Clusters in main or staged. */ @@ -244,8 +297,9 @@ private: /** Total number of transactions in this graph (sum of all transaction counts in all * Clusters, and for staging also those inherited from the main ClusterSet). */ GraphIndex m_txcount{0}; - /** Whether this graph is oversized (if known). This roughly matches - * m_group_data->m_group_oversized, but may be known even if m_group_data is not. */ + /** Total number of individually oversized transactions in the graph. */ + GraphIndex m_txcount_oversized{0}; + /** Whether this graph is oversized (if known). */ std::optional m_oversized{false}; ClusterSet() noexcept = default; @@ -401,9 +455,10 @@ private: std::vector m_unlinked; public: - /** Construct a new TxGraphImpl with the specified maximum cluster count. */ - explicit TxGraphImpl(DepGraphIndex max_cluster_count) noexcept : + /** Construct a new TxGraphImpl with the specified limits. */ + explicit TxGraphImpl(DepGraphIndex max_cluster_count, uint64_t max_cluster_size) noexcept : m_max_cluster_count(max_cluster_count), + m_max_cluster_size(max_cluster_size), m_main_chunkindex(ChunkOrder(this)) { Assume(max_cluster_count >= 1); @@ -441,8 +496,10 @@ public: /** Get a reference to the ClusterSet at the specified level (which must exist). */ ClusterSet& GetClusterSet(int level) noexcept; const ClusterSet& GetClusterSet(int level) const noexcept; - /** Make a transaction not exist at a specified level. It must currently exist there. */ - void ClearLocator(int level, GraphIndex index) noexcept; + /** Make a transaction not exist at a specified level. It must currently exist there. + * oversized_tx indicates whether the transaction is an individually-oversized one + * (OVERSIZED_SINGLETON). */ + void ClearLocator(int level, GraphIndex index, bool oversized_tx) noexcept; /** Find which Clusters in main conflict with ones in staging. */ std::vector GetConflicts() const noexcept; /** Clear an Entry's ChunkData. */ @@ -551,6 +608,7 @@ public: std::strong_ordering CompareMainOrder(const Ref& a, const Ref& b) noexcept final; GraphIndex CountDistinctClusters(std::span refs, bool main_only = false) noexcept final; std::pair, std::vector> GetMainStagingDiagrams() noexcept final; + std::vector Trim() noexcept final; std::unique_ptr GetBlockBuilder() noexcept final; std::pair, FeePerWeight> GetWorstMainChunk() noexcept final; @@ -635,7 +693,16 @@ void TxGraphImpl::CreateChunkData(GraphIndex idx, LinearizationIndex chunk_count } } -void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept +uint64_t Cluster::GetTotalTxSize() const noexcept +{ + uint64_t ret{0}; + for (auto i : m_linearization) { + ret += m_depgraph.FeeRate(i).size; + } + return ret; +} + +void TxGraphImpl::ClearLocator(int level, GraphIndex idx, bool oversized_tx) noexcept { auto& entry = m_entries[idx]; auto& clusterset = GetClusterSet(level); @@ -649,12 +716,14 @@ void TxGraphImpl::ClearLocator(int level, GraphIndex idx) noexcept } // Update the transaction count. --clusterset.m_txcount; + clusterset.m_txcount_oversized -= oversized_tx; // If clearing main, adjust the status of Locators of this transaction in staging, if it exists. if (level == 0 && GetTopLevel() == 1) { if (entry.m_locator[1].IsRemoved()) { entry.m_locator[1].SetMissing(); } else if (!entry.m_locator[1].IsPresent()) { --m_staging_clusterset->m_txcount; + m_staging_clusterset->m_txcount_oversized -= oversized_tx; } } if (level == 0) ClearChunkData(entry); @@ -785,7 +854,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove entry.m_main_lin_index = LinearizationIndex(-1); } // - Mark it as missing/removed in the Entry's locator. - graph.ClearLocator(m_level, idx); + graph.ClearLocator(m_level, idx, m_quality == QualityLevel::OVERSIZED_SINGLETON); to_remove = to_remove.subspan(1); } while(!to_remove.empty()); @@ -824,7 +893,7 @@ void Cluster::ApplyRemovals(TxGraphImpl& graph, std::span& to_remove void Cluster::Clear(TxGraphImpl& graph) noexcept { for (auto i : m_linearization) { - graph.ClearLocator(m_level, m_mapping[i]); + graph.ClearLocator(m_level, m_mapping[i], m_quality == QualityLevel::OVERSIZED_SINGLETON); } m_depgraph = {}; m_linearization.clear(); @@ -852,6 +921,37 @@ void Cluster::AppendChunkFeerates(std::vector& ret) const noexcept ret.insert(ret.end(), chunk_feerates.begin(), chunk_feerates.end()); } +uint64_t Cluster::AppendTrimData(std::vector& ret, std::vector>& deps) const noexcept +{ + const LinearizationChunking linchunking(m_depgraph, m_linearization); + LinearizationIndex pos{0}; + uint64_t size{0}; + auto prev_index = GraphIndex(-1); + // Iterate over the chunks of this cluster's linearization. + for (unsigned i = 0; i < linchunking.NumChunksLeft(); ++i) { + const auto& [chunk, chunk_feerate] = linchunking.GetChunk(i); + // Iterate over the transactions of that chunk, in linearization order. + auto chunk_tx_count = chunk.Count(); + for (unsigned j = 0; j < chunk_tx_count; ++j) { + auto cluster_idx = m_linearization[pos]; + // The transaction must appear in the chunk. + Assume(chunk[cluster_idx]); + // Construct a new element in ret. + auto& entry = ret.emplace_back(); + entry.m_chunk_feerate = FeePerWeight::FromFeeFrac(chunk_feerate); + entry.m_index = m_mapping[cluster_idx]; + // If this is not the first transaction of the cluster linearization, it has an + // implicit dependency on its predecessor. + if (pos != 0) deps.emplace_back(prev_index, entry.m_index); + prev_index = entry.m_index; + entry.m_tx_size = m_depgraph.FeeRate(cluster_idx).size; + size += entry.m_tx_size; + ++pos; + } + } + return size; +} + bool Cluster::Split(TxGraphImpl& graph) noexcept { // This function can only be called when the Cluster needs splitting. @@ -1284,6 +1384,17 @@ void TxGraphImpl::GroupClusters(int level) noexcept * to-be-merged group). */ std::vector, uint64_t>> an_deps; + // Construct an an_clusters entry for every oversized cluster, including ones from levels below, + // as they may be inherited in this one. + for (int level_iter = 0; level_iter <= level; ++level_iter) { + for (auto& cluster : GetClusterSet(level_iter).m_clusters[int(QualityLevel::OVERSIZED_SINGLETON)]) { + auto graph_idx = cluster->GetClusterEntry(0); + auto cur_cluster = FindCluster(graph_idx, level); + if (cur_cluster == nullptr) continue; + an_clusters.emplace_back(cur_cluster, cur_cluster->m_sequence); + } + } + // Construct a an_clusters entry for every parent and child in the to-be-applied dependencies, // and an an_deps entry for each dependency to be applied. an_deps.reserve(clusterset.m_deps_to_add.size()); @@ -1300,7 +1411,7 @@ void TxGraphImpl::GroupClusters(int level) noexcept an_deps.emplace_back(std::pair{par, chl}, chl_cluster->m_sequence); } // Sort and deduplicate an_clusters, so we end up with a sorted list of all involved Clusters - // to which dependencies apply. + // to which dependencies apply, or which are oversized. std::sort(an_clusters.begin(), an_clusters.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); an_clusters.erase(std::unique(an_clusters.begin(), an_clusters.end()), an_clusters.end()); // Sort an_deps by applying the same order to the involved child cluster. @@ -1424,9 +1535,9 @@ void TxGraphImpl::GroupClusters(int level) noexcept // back to m_deps_to_add. clusterset.m_group_data = GroupData{}; clusterset.m_group_data->m_group_clusters.reserve(an_clusters.size()); - clusterset.m_group_data->m_group_oversized = false; clusterset.m_deps_to_add.clear(); clusterset.m_deps_to_add.reserve(an_deps.size()); + clusterset.m_oversized = false; auto an_deps_it = an_deps.begin(); auto an_clusters_it = an_clusters.begin(); while (an_clusters_it != an_clusters.end()) { @@ -1439,10 +1550,12 @@ void TxGraphImpl::GroupClusters(int level) noexcept new_entry.m_deps_offset = clusterset.m_deps_to_add.size(); new_entry.m_deps_count = 0; uint32_t total_count{0}; + uint64_t total_size{0}; // Add all its clusters to it (copying those from an_clusters to m_group_clusters). while (an_clusters_it != an_clusters.end() && an_clusters_it->second == rep) { clusterset.m_group_data->m_group_clusters.push_back(an_clusters_it->first); total_count += an_clusters_it->first->GetTxCount(); + total_size += an_clusters_it->first->GetTotalTxSize(); ++an_clusters_it; ++new_entry.m_cluster_count; } @@ -1453,13 +1566,12 @@ void TxGraphImpl::GroupClusters(int level) noexcept ++new_entry.m_deps_count; } // Detect oversizedness. - if (total_count > m_max_cluster_count) { - clusterset.m_group_data->m_group_oversized = true; + if (total_count > m_max_cluster_count || total_size > m_max_cluster_size) { + clusterset.m_oversized = true; } } Assume(an_deps_it == an_deps.end()); Assume(an_clusters_it == an_clusters.end()); - clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; Compact(); } @@ -1501,7 +1613,7 @@ void TxGraphImpl::ApplyDependencies(int level) noexcept // Nothing to do if there are no dependencies to be added. if (clusterset.m_deps_to_add.empty()) return; // Dependencies cannot be applied if it would result in oversized clusters. - if (clusterset.m_group_data->m_group_oversized) return; + if (clusterset.m_oversized == true) return; // For each group of to-be-merged Clusters. for (const auto& group_entry : clusterset.m_group_data->m_groups) { @@ -1557,7 +1669,7 @@ void Cluster::Relinearize(TxGraphImpl& graph, uint64_t max_iters) noexcept void TxGraphImpl::MakeAcceptable(Cluster& cluster) noexcept { // Relinearize the Cluster if needed. - if (!cluster.NeedsSplitting() && !cluster.IsAcceptable()) { + if (!cluster.NeedsSplitting() && !cluster.IsAcceptable() && !cluster.IsOversized()) { cluster.Relinearize(*this, 10000); } } @@ -1587,6 +1699,7 @@ Cluster::Cluster(uint64_t sequence, TxGraphImpl& graph, const FeePerWeight& feer TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept { Assume(m_main_chunkindex_observers == 0 || GetTopLevel() != 0); + Assume(feerate.size > 0); // Construct a new Ref. Ref ret; // Construct a new Entry, and link it with the Ref. @@ -1598,13 +1711,20 @@ TxGraph::Ref TxGraphImpl::AddTransaction(const FeePerWeight& feerate) noexcept GetRefGraph(ret) = this; GetRefIndex(ret) = idx; // Construct a new singleton Cluster (which is necessarily optimally linearized). + bool oversized = uint64_t(feerate.size) > m_max_cluster_size; auto cluster = std::make_unique(m_next_sequence_counter++, *this, feerate, idx); auto cluster_ptr = cluster.get(); int level = GetTopLevel(); auto& clusterset = GetClusterSet(level); - InsertCluster(level, std::move(cluster), QualityLevel::OPTIMAL); + InsertCluster(level, std::move(cluster), oversized ? QualityLevel::OVERSIZED_SINGLETON : QualityLevel::OPTIMAL); cluster_ptr->Updated(*this); ++clusterset.m_txcount; + // Deal with individually oversized transactions. + if (oversized) { + ++clusterset.m_txcount_oversized; + clusterset.m_oversized = true; + clusterset.m_group_data = std::nullopt; + } // Return the Ref. return ret; } @@ -1917,11 +2037,15 @@ bool TxGraphImpl::IsOversized(bool main_only) noexcept // Return cached value if known. return *clusterset.m_oversized; } - // Find which Clusters will need to be merged together, as that is where the oversize - // property is assessed. - GroupClusters(level); - Assume(clusterset.m_group_data.has_value()); - clusterset.m_oversized = clusterset.m_group_data->m_group_oversized; + ApplyRemovals(level); + if (clusterset.m_txcount_oversized > 0) { + clusterset.m_oversized = true; + } else { + // Find which Clusters will need to be merged together, as that is where the oversize + // property is assessed. + GroupClusters(level); + } + Assume(clusterset.m_oversized.has_value()); return *clusterset.m_oversized; } @@ -1941,6 +2065,7 @@ void TxGraphImpl::StartStaging() noexcept // Copy statistics, precomputed data, and to-be-applied dependencies (only if oversized) to // the new graph. To-be-applied removals will always be empty at this point. m_staging_clusterset->m_txcount = m_main_clusterset.m_txcount; + m_staging_clusterset->m_txcount_oversized = m_main_clusterset.m_txcount_oversized; m_staging_clusterset->m_deps_to_add = m_main_clusterset.m_deps_to_add; m_staging_clusterset->m_group_data = m_main_clusterset.m_group_data; m_staging_clusterset->m_oversized = m_main_clusterset.m_oversized; @@ -1968,7 +2093,13 @@ void TxGraphImpl::AbortStaging() noexcept if (!m_main_clusterset.m_group_data.has_value()) { // In case m_oversized in main was kept after a Ref destruction while staging exists, we // need to re-evaluate m_oversized now. - m_main_clusterset.m_oversized = std::nullopt; + if (m_main_clusterset.m_to_remove.empty() && m_main_clusterset.m_txcount_oversized > 0) { + // It is possible that a Ref destruction caused a removal in main while staging existed. + // In this case, m_txcount_oversized may be inaccurate. + m_main_clusterset.m_oversized = true; + } else { + m_main_clusterset.m_oversized = std::nullopt; + } } } @@ -2002,6 +2133,7 @@ void TxGraphImpl::CommitStaging() noexcept m_main_clusterset.m_group_data = std::move(m_staging_clusterset->m_group_data); m_main_clusterset.m_oversized = std::move(m_staging_clusterset->m_oversized); m_main_clusterset.m_txcount = std::move(m_staging_clusterset->m_txcount); + m_main_clusterset.m_txcount_oversized = std::move(m_staging_clusterset->m_txcount_oversized); // Delete the old staging graph, after all its information was moved to main. m_staging_clusterset.reset(); Compact(); @@ -2016,7 +2148,9 @@ void Cluster::SetFee(TxGraphImpl& graph, DepGraphIndex idx, int64_t fee) noexcep // Update the fee, remember that relinearization will be necessary, and update the Entries // in the same Cluster. m_depgraph.FeeRate(idx).fee = fee; - if (!NeedsSplitting()) { + if (m_quality == QualityLevel::OVERSIZED_SINGLETON) { + // Nothing to do. + } else if (!NeedsSplitting()) { graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_RELINEARIZE); } else { graph.SetClusterQuality(m_level, m_quality, m_setindex, QualityLevel::NEEDS_SPLIT); @@ -2124,8 +2258,15 @@ void Cluster::SanityCheck(const TxGraphImpl& graph, int level) const assert(m_linearization.size() <= graph.m_max_cluster_count); // The level must match the level the Cluster occurs in. assert(m_level == level); + // The sum of their sizes cannot exceed m_max_cluster_size, unless it is an individually + // oversized transaction singleton. Note that groups of to-be-merged clusters which would + // exceed this limit are marked oversized, which means they are never applied. + assert(m_quality == QualityLevel::OVERSIZED_SINGLETON || GetTotalTxSize() <= graph.m_max_cluster_size); // m_quality and m_setindex are checked in TxGraphImpl::SanityCheck. + // OVERSIZED clusters are singletons. + assert(m_quality != QualityLevel::OVERSIZED_SINGLETON || m_linearization.size() == 1); + // Compute the chunking of m_linearization. LinearizationChunking linchunking(m_depgraph, m_linearization); @@ -2296,11 +2437,6 @@ void TxGraphImpl::SanityCheck() const if (!clusterset.m_to_remove.empty()) compact_possible = false; if (!clusterset.m_removed.empty()) compact_possible = false; - // If m_group_data exists, its m_group_oversized must match m_oversized. - if (clusterset.m_group_data.has_value()) { - assert(clusterset.m_oversized == clusterset.m_group_data->m_group_oversized); - } - // For non-top levels, m_oversized must be known (as it cannot change until the level // on top is gone). if (level < GetTopLevel()) assert(clusterset.m_oversized.has_value()); @@ -2466,6 +2602,255 @@ std::pair, FeePerWeight> TxGraphImpl::GetWorstMainChu return ret; } +std::vector TxGraphImpl::Trim() noexcept +{ + int level = GetTopLevel(); + Assume(m_main_chunkindex_observers == 0 || level != 0); + std::vector ret; + + // Compute the groups of to-be-merged Clusters (which also applies all removals, and splits). + auto& clusterset = GetClusterSet(level); + if (clusterset.m_oversized == false) return ret; + GroupClusters(level); + Assume(clusterset.m_group_data.has_value()); + // Nothing to do if not oversized. + Assume(clusterset.m_oversized.has_value()); + if (clusterset.m_oversized == false) return ret; + + // In this function, would-be clusters (as precomputed in m_group_data by GroupClusters) are + // trimmed by removing transactions in them such that the resulting clusters satisfy the size + // and count limits. + // + // It works by defining for each would-be cluster a rudimentary linearization: at every point + // the highest-chunk-feerate remaining transaction is picked among those with no unmet + // dependencies. "Dependency" here means either a to-be-added dependency (m_deps_to_add), or + // an implicit dependency added between any two consecutive transaction in their current + // cluster linearization. So it can be seen as a "merge sort" of the chunks of the clusters, + // but respecting the dependencies being added. + // + // This rudimentary linearization is computed lazily, by putting all eligible (no unmet + // dependencies) transactions in a heap, and popping the highest-feerate one from it. Along the + // way, the counts and sizes of the would-be clusters up to that point are tracked (by + // partitioning the involved transactions using a union-find structure). Any transaction whose + // addition would cause a violation is removed, along with all their descendants. + // + // A next invocation of GroupClusters (after applying the removals) will compute the new + // resulting clusters, and none of them will violate the limits. + + /** All dependencies (both to be added ones, and implicit ones between consecutive transactions + * in existing cluster linearizations), sorted by parent. */ + std::vector> deps_by_parent; + /** Same, but sorted by child. */ + std::vector> deps_by_child; + /** Information about all transactions involved in a Cluster group to be trimmed, sorted by + * GraphIndex. It contains entries both for transactions that have already been included, + * and ones that have not yet been. */ + std::vector trim_data; + /** Iterators into trim_data, treated as a max heap according to cmp_fn below. Each entry is + * a transaction that has not yet been included yet, but all its ancestors have. */ + std::vector::iterator> trim_heap; + /** The list of representatives of the partitions a given transaction depends on. */ + std::vector current_deps; + + /** Function to define the ordering of trim_heap. */ + static constexpr auto cmp_fn = [](auto a, auto b) noexcept { + // Sort by increasing chunk feerate, and then by decreasing size. + // We do not need to sort by cluster or within clusters, because due to the implicit + // dependency between consecutive linearization elements, no two transactions from the + // same Cluster will ever simultaneously be in the heap. + return a->m_chunk_feerate < b->m_chunk_feerate; + }; + + /** Given a TrimTxData entry, find the representative of the partition it is in. */ + static constexpr auto find_fn = [](TrimTxData* arg) noexcept { + while (arg != arg->m_uf_parent) { + // Replace pointer to parent with pointer to grandparent (path splitting). + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Finding_set_representatives. + auto par = arg->m_uf_parent; + arg->m_uf_parent = par->m_uf_parent; + arg = par; + } + return arg; + }; + + /** Given two TrimTxData entries, union the partitions they are in, and return the + * representative. */ + static constexpr auto union_fn = [](TrimTxData* arg1, TrimTxData* arg2) noexcept { + // Replace arg1 and arg2 by their representatives. + auto rep1 = find_fn(arg1); + auto rep2 = find_fn(arg2); + // Bail out if both representatives are the same, because that means arg1 and arg2 are in + // the same partition already. + if (rep1 == rep2) return rep1; + // Pick the lower-count root to become a child of the higher-count one. + // See https://en.wikipedia.org/wiki/Disjoint-set_data_structure#Union_by_size. + if (rep1->m_uf_count < rep2->m_uf_count) std::swap(rep1, rep2); + rep2->m_uf_parent = rep1; + // Add the statistics of arg2 (which is no longer a representative) to those of arg1 (which + // is now the representative for both). + rep1->m_uf_size += rep2->m_uf_size; + rep1->m_uf_count += rep2->m_uf_count; + return rep1; + }; + + /** Get iterator to TrimTxData entry for a given index. */ + auto locate_fn = [&](GraphIndex index) noexcept { + auto it = std::lower_bound(trim_data.begin(), trim_data.end(), index, [](TrimTxData& elem, GraphIndex idx) noexcept { + return elem.m_index < idx; + }); + Assume(it != trim_data.end() && it->m_index == index); + return it; + }; + + // For each group of to-be-merged Clusters. + for (const auto& group_data : clusterset.m_group_data->m_groups) { + trim_data.clear(); + trim_heap.clear(); + deps_by_child.clear(); + deps_by_parent.clear(); + + // Gather trim data and implicit dependency data from all involved Clusters. + auto cluster_span = std::span{clusterset.m_group_data->m_group_clusters} + .subspan(group_data.m_cluster_offset, group_data.m_cluster_count); + uint64_t size{0}; + for (Cluster* cluster : cluster_span) { + size += cluster->AppendTrimData(trim_data, deps_by_child); + } + // If this group of Clusters does not violate any limits, continue to the next group. + if (trim_data.size() <= m_max_cluster_count && size <= m_max_cluster_size) continue; + // Sort the trim data by GraphIndex. In what follows, we will treat this sorted vector as + // a map from GraphIndex to TrimTxData via locate_fn, and its ordering will not change + // anymore. + std::sort(trim_data.begin(), trim_data.end(), [](auto& a, auto& b) noexcept { return a.m_index < b.m_index; }); + + // Add the explicitly added dependencies to deps_by_child. + deps_by_child.insert(deps_by_child.end(), + clusterset.m_deps_to_add.begin() + group_data.m_deps_offset, + clusterset.m_deps_to_add.begin() + group_data.m_deps_offset + group_data.m_deps_count); + + // Sort deps_by_child by child transaction GraphIndex. The order will not be changed + // anymore after this. + std::sort(deps_by_child.begin(), deps_by_child.end(), [](auto& a, auto& b) noexcept { return a.second < b.second; }); + // Fill m_parents_count and m_parents_offset in trim_data, as well as m_deps_left, and + // initially populate trim_heap. Because of the sort above, all dependencies involving the + // same child are grouped together, so a single linear scan suffices. + auto deps_it = deps_by_child.begin(); + for (auto trim_it = trim_data.begin(); trim_it != trim_data.end(); ++trim_it) { + trim_it->m_parent_offset = deps_it - deps_by_child.begin(); + trim_it->m_deps_left = 0; + while (deps_it != deps_by_child.end() && deps_it->second == trim_it->m_index) { + ++trim_it->m_deps_left; + ++deps_it; + } + trim_it->m_parent_count = trim_it->m_deps_left; + // If this transaction has no unmet dependencies, and is not oversized, add it to the + // heap (just append for now, the heapification happens below). + if (trim_it->m_deps_left == 0 && trim_it->m_tx_size <= m_max_cluster_size) { + trim_heap.push_back(trim_it); + } + } + Assume(deps_it == deps_by_child.end()); + + // Construct deps_by_parent, sorted by parent transaction GraphIndex. The order will not be + // changed anymore after this. + deps_by_parent = deps_by_child; + std::sort(deps_by_parent.begin(), deps_by_parent.end(), [](auto& a, auto& b) noexcept { return a.first < b.first; }); + // Fill m_children_offset and m_children_count in trim_data. Because of the sort above, all + // dependencies involving the same parent are grouped together, so a single linear scan + // suffices. + deps_it = deps_by_parent.begin(); + for (auto& trim_entry : trim_data) { + trim_entry.m_children_count = 0; + trim_entry.m_children_offset = deps_it - deps_by_parent.begin(); + while (deps_it != deps_by_parent.end() && deps_it->first == trim_entry.m_index) { + ++trim_entry.m_children_count; + ++deps_it; + } + } + Assume(deps_it == deps_by_parent.end()); + + // Build a heap of all transactions with 0 unmet dependencies. + std::make_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + + // Iterate over to-be-included transactions, and convert them to included transactions, or + // skip them if adding them would violate resource limits of the would-be cluster. + // + // It is possible that the heap empties without ever hitting either cluster limit, in case + // the implied graph (to be added dependencies plus implicit dependency between each + // original transaction and its predecessor in the linearization it came from) contains + // cycles. Such cycles will be removed entirely, because each of the transactions in the + // cycle permanently have unmet dependencies. However, this cannot occur in real scenarios + // where Trim() is called to deal with reorganizations that would violate cluster limits, + // as all added dependencies are in the same direction (from old mempool transactions to + // new from-block transactions); cycles require dependencies in both directions to be + // added. + while (!trim_heap.empty()) { + // Move the best remaining transaction to the end of trim_heap. + std::pop_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + // Pop it, and find its TrimTxData. + auto& entry = *trim_heap.back(); + trim_heap.pop_back(); + + // Initialize it as a singleton partition. + entry.m_uf_parent = &entry; + entry.m_uf_count = 1; + entry.m_uf_size = entry.m_tx_size; + + // Find the distinct transaction partitions this entry depends on. + current_deps.clear(); + for (auto& [par, chl] : std::span{deps_by_child}.subspan(entry.m_parent_offset, entry.m_parent_count)) { + Assume(chl == entry.m_index); + current_deps.push_back(find_fn(&*locate_fn(par))); + } + std::sort(current_deps.begin(), current_deps.end()); + current_deps.erase(std::unique(current_deps.begin(), current_deps.end()), current_deps.end()); + + // Compute resource counts. + uint32_t new_count = 1; + uint64_t new_size = entry.m_tx_size; + for (TrimTxData* ptr : current_deps) { + new_count += ptr->m_uf_count; + new_size += ptr->m_uf_size; + } + // Skip the entry if this would violate any limit. + if (new_count > m_max_cluster_count || new_size > m_max_cluster_size) continue; + + // Union the partitions this transaction and all its dependencies are in together. + auto rep = &entry; + for (TrimTxData* ptr : current_deps) rep = union_fn(ptr, rep); + // Mark the entry as included (so the loop below will not remove the transaction). + entry.m_deps_left = uint32_t(-1); + // Mark each to-be-added dependency involving this transaction as parent satisfied. + for (auto& [par, chl] : std::span{deps_by_parent}.subspan(entry.m_children_offset, entry.m_children_count)) { + Assume(par == entry.m_index); + auto chl_it = locate_fn(chl); + // Reduce the number of unmet dependencies of chl_it, and if that brings the number + // to zero, add it to the heap of includable transactions. + Assume(chl_it->m_deps_left > 0); + if (--chl_it->m_deps_left == 0) { + trim_heap.push_back(chl_it); + std::push_heap(trim_heap.begin(), trim_heap.end(), cmp_fn); + } + } + } + + // Remove all the transactions that were not processed above. Because nothing gets + // processed until/unless all its dependencies are met, this automatically guarantees + // that if a transaction is removed, all its descendants, or would-be descendants, are + // removed as well. + for (const auto& trim_entry : trim_data) { + if (trim_entry.m_deps_left != uint32_t(-1)) { + ret.push_back(m_entries[trim_entry.m_index].m_ref); + clusterset.m_to_remove.push_back(trim_entry.m_index); + } + } + } + clusterset.m_group_data.reset(); + clusterset.m_oversized = false; + Assume(!ret.empty()); + return ret; +} + } // namespace TxGraph::Ref::~Ref() @@ -2500,7 +2885,7 @@ TxGraph::Ref::Ref(Ref&& other) noexcept std::swap(m_index, other.m_index); } -std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept +std::unique_ptr MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size) noexcept { - return std::make_unique(max_cluster_count); + return std::make_unique(max_cluster_count, max_cluster_size); } diff --git a/src/txgraph.h b/src/txgraph.h index 5779397f82f..2c40cd28d2e 100644 --- a/src/txgraph.h +++ b/src/txgraph.h @@ -63,10 +63,10 @@ public: /** Virtual destructor, so inheriting is safe. */ virtual ~TxGraph() = default; /** Construct a new transaction with the specified feerate, and return a Ref to it. - * If a staging graph exists, the new transaction is only created there. In all - * further calls, only Refs created by AddTransaction() are allowed to be passed to this - * TxGraph object (or empty Ref objects). Ref objects may outlive the TxGraph they were - * created for. */ + * If a staging graph exists, the new transaction is only created there. feerate.size must be + * strictly positive. In all further calls, only Refs created by AddTransaction() are allowed + * to be passed to this TxGraph object (or empty Ref objects). Ref objects may outlive the + * TxGraph they were created for. */ [[nodiscard]] virtual Ref AddTransaction(const FeePerWeight& feerate) noexcept = 0; /** Remove the specified transaction. If a staging graph exists, the removal only happens * there. This is a no-op if the transaction was already removed. @@ -169,6 +169,11 @@ public: * that appear identically in both. Use FeeFrac rather than FeePerWeight so CompareChunks is * usable without type-conversion. */ virtual std::pair, std::vector> GetMainStagingDiagrams() noexcept = 0; + /** Remove transactions (including their own descendants) according to a fast but best-effort + * strategy such that the TxGraph's cluster and size limits are respected. Applies to staging + * if it exists, and to main otherwise. Returns the list of all removed transactions in + * unspecified order. This has no effect unless the relevant graph is oversized. */ + virtual std::vector Trim() noexcept = 0; /** Interface returned by GetBlockBuilder. */ class BlockBuilder @@ -240,8 +245,9 @@ public: }; }; -/** Construct a new TxGraph with the specified limit on transactions within a cluster. That - * number cannot exceed MAX_CLUSTER_COUNT_LIMIT. */ -std::unique_ptr MakeTxGraph(unsigned max_cluster_count) noexcept; +/** Construct a new TxGraph with the specified limit on the number of transactions within a cluster, + * and on the sum of transaction sizes within a cluster. max_cluster_count cannot exceed + * MAX_CLUSTER_COUNT_LIMIT. */ +std::unique_ptr MakeTxGraph(unsigned max_cluster_count, uint64_t max_cluster_size) noexcept; #endif // BITCOIN_TXGRAPH_H