mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-01-19 23:03:45 +01:00
clusterlin: replace cluster linearization with SFL (feature)
This replaces the existing LIMO linearization algorithm (which internally uses ancestor set finding and candidate set finding) with the much more performant spanning-forest linearization algorithm. This removes the old candidate-set search algorithm, and several of its tests, benchmarks, and needed utility code. The worst case time per cost is similar to the previous algorithm, so ACCEPTABLE_ITERS is unchanged.
This commit is contained in:
@@ -24,29 +24,22 @@
|
||||
* possibly by comparison with other implementations (at the end of the line ->).
|
||||
* <<---: The right side is implemented using the left side.
|
||||
*
|
||||
* +-----------------------+
|
||||
* | SearchCandidateFinder | <<---------------------\
|
||||
* +-----------------------+ |
|
||||
* | +-----------+ +---------------------+
|
||||
* | | Linearize | | SpanningForestState |
|
||||
* | +-----------+ +---------------------+
|
||||
* | +-------------------------+ | | |
|
||||
* | | AncestorCandidateFinder | <<--------/ | |
|
||||
* | +-------------------------+ | |
|
||||
* | | ^ | ^^ PRODUCTION CODE |
|
||||
* | | | | || |
|
||||
* +---------------------+ +-----------+
|
||||
* | SpanningForestState | <<-------------------- | Linearize |
|
||||
* +---------------------+ +-----------+
|
||||
* | |
|
||||
* | | ^^ PRODUCTION CODE
|
||||
* | | ||
|
||||
* ==============================================================================================
|
||||
* | | | | || |
|
||||
* | clusterlin_ancestor_finder* | | vv TEST CODE |
|
||||
* | | | |
|
||||
* |-clusterlin_search_finder* | |-clusterlin_linearize* |
|
||||
* | | | |
|
||||
* v | v clusterlin_sfl--|
|
||||
* +-----------------------+ | +-----------------+ |
|
||||
* | SimpleCandidateFinder | <<-------------------| SimpleLinearize |<----------------/
|
||||
* +-----------------------+ | +-----------------+
|
||||
* | | |
|
||||
* +-------------------/ |
|
||||
* | | ||
|
||||
* |-clusterlin_sfl* | vv TEST CODE
|
||||
* | |
|
||||
* \------------------------------------\ |-clusterlin_linearize*
|
||||
* | |
|
||||
* v v
|
||||
* +-----------------------+ +-----------------+
|
||||
* | SimpleCandidateFinder | <<-------------------| SimpleLinearize |
|
||||
* +-----------------------+ +-----------------+
|
||||
* | |
|
||||
* |-clusterlin_simple_finder* |-clusterlin_simple_linearize*
|
||||
* v v
|
||||
@@ -78,11 +71,8 @@ using namespace cluster_linearize;
|
||||
|
||||
namespace {
|
||||
|
||||
/** A simple finder class for candidate sets.
|
||||
*
|
||||
* This class matches SearchCandidateFinder in interface and behavior, though with fewer
|
||||
* optimizations.
|
||||
*/
|
||||
/** A simple finder class for candidate sets (topologically-valid subsets with high feerate), only
|
||||
* used by SimpleLinearize below. */
|
||||
template<typename SetType>
|
||||
class SimpleCandidateFinder
|
||||
{
|
||||
@@ -153,7 +143,8 @@ public:
|
||||
/** A very simple finder class for optimal candidate sets, which tries every subset.
|
||||
*
|
||||
* It is even simpler than SimpleCandidateFinder, and exists just to help test the correctness of
|
||||
* SimpleCandidateFinder, which is then used to test the correctness of SearchCandidateFinder.
|
||||
* SimpleCandidateFinder, so that it can be used in SimpleLinearize, which is then used to test the
|
||||
* correctness of Linearize.
|
||||
*/
|
||||
template<typename SetType>
|
||||
class ExhaustiveCandidateFinder
|
||||
@@ -204,8 +195,8 @@ public:
|
||||
/** A simple linearization algorithm.
|
||||
*
|
||||
* This matches Linearize() in interface and behavior, though with fewer optimizations, lacking
|
||||
* the ability to pass in an existing linearization, and using just SimpleCandidateFinder rather
|
||||
* than AncestorCandidateFinder and SearchCandidateFinder.
|
||||
* the ability to pass in an existing linearization, and linearizing by simply finding the
|
||||
* consecutive remaining highest-feerate topological subset using SimpleCandidateFinder.
|
||||
*/
|
||||
template<typename SetType>
|
||||
std::pair<std::vector<DepGraphIndex>, bool> SimpleLinearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations)
|
||||
@@ -766,68 +757,17 @@ FUZZ_TARGET(clusterlin_chunking)
|
||||
assert(todo.None());
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_ancestor_finder)
|
||||
{
|
||||
// Verify that AncestorCandidateFinder works as expected.
|
||||
|
||||
// Retrieve a depgraph from the fuzz input.
|
||||
SpanReader reader(buffer);
|
||||
DepGraph<TestBitSet> depgraph;
|
||||
try {
|
||||
reader >> Using<DepGraphFormatter>(depgraph);
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
|
||||
AncestorCandidateFinder anc_finder(depgraph);
|
||||
auto todo = depgraph.Positions();
|
||||
while (todo.Any()) {
|
||||
// Call the ancestor finder's FindCandidateSet for what remains of the graph.
|
||||
assert(!anc_finder.AllDone());
|
||||
assert(todo.Count() == anc_finder.NumRemaining());
|
||||
auto best_anc = anc_finder.FindCandidateSet();
|
||||
// Sanity check the result.
|
||||
assert(best_anc.transactions.Any());
|
||||
assert(best_anc.transactions.IsSubsetOf(todo));
|
||||
assert(depgraph.FeeRate(best_anc.transactions) == best_anc.feerate);
|
||||
assert(depgraph.IsConnected(best_anc.transactions));
|
||||
// Check that it is topologically valid.
|
||||
for (auto i : best_anc.transactions) {
|
||||
assert((depgraph.Ancestors(i) & todo).IsSubsetOf(best_anc.transactions));
|
||||
}
|
||||
|
||||
// Compute all remaining ancestor sets.
|
||||
std::optional<SetInfo<TestBitSet>> real_best_anc;
|
||||
for (auto i : todo) {
|
||||
SetInfo info(depgraph, todo & depgraph.Ancestors(i));
|
||||
if (!real_best_anc.has_value() || info.feerate > real_best_anc->feerate) {
|
||||
real_best_anc = info;
|
||||
}
|
||||
}
|
||||
// The set returned by anc_finder must equal the real best ancestor sets.
|
||||
assert(real_best_anc.has_value());
|
||||
assert(*real_best_anc == best_anc);
|
||||
|
||||
// Find a non-empty topologically valid subset of transactions to remove from the graph.
|
||||
// Using an empty set would mean the next iteration is identical to the current one, and
|
||||
// could cause an infinite loop.
|
||||
auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
|
||||
todo -= del_set;
|
||||
anc_finder.MarkDone(del_set);
|
||||
}
|
||||
assert(anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == 0);
|
||||
}
|
||||
|
||||
static constexpr auto MAX_SIMPLE_ITERATIONS = 300000;
|
||||
|
||||
FUZZ_TARGET(clusterlin_simple_finder)
|
||||
{
|
||||
// Verify that SimpleCandidateFinder works as expected by sanity checking the results
|
||||
// and comparing them (if claimed to be optimal) against the sets found by
|
||||
// ExhaustiveCandidateFinder and AncestorCandidateFinder.
|
||||
// ExhaustiveCandidateFinder.
|
||||
//
|
||||
// Note that SimpleCandidateFinder is only used in tests; the purpose of this fuzz test is to
|
||||
// establish confidence in SimpleCandidateFinder, so that it can be used to test
|
||||
// SearchCandidateFinder below.
|
||||
// establish confidence in SimpleCandidateFinder, so that it can be used in SimpleLinearize,
|
||||
// which is then used to test Linearize below.
|
||||
|
||||
// Retrieve a depgraph from the fuzz input.
|
||||
SpanReader reader(buffer);
|
||||
@@ -836,18 +776,15 @@ FUZZ_TARGET(clusterlin_simple_finder)
|
||||
reader >> Using<DepGraphFormatter>(depgraph);
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
|
||||
// Instantiate the SimpleCandidateFinder to be tested, and the ExhaustiveCandidateFinder and
|
||||
// AncestorCandidateFinder it is being tested against.
|
||||
// Instantiate the SimpleCandidateFinder to be tested, and the ExhaustiveCandidateFinder it is
|
||||
// being tested against.
|
||||
SimpleCandidateFinder smp_finder(depgraph);
|
||||
ExhaustiveCandidateFinder exh_finder(depgraph);
|
||||
AncestorCandidateFinder anc_finder(depgraph);
|
||||
|
||||
auto todo = depgraph.Positions();
|
||||
while (todo.Any()) {
|
||||
assert(!smp_finder.AllDone());
|
||||
assert(!exh_finder.AllDone());
|
||||
assert(!anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == todo.Count());
|
||||
|
||||
// Call SimpleCandidateFinder.
|
||||
auto [found, iterations_done] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
|
||||
@@ -874,10 +811,6 @@ FUZZ_TARGET(clusterlin_simple_finder)
|
||||
|
||||
// Perform further quality checks only if SimpleCandidateFinder claims an optimal result.
|
||||
if (optimal) {
|
||||
// Compare with AncestorCandidateFinder.
|
||||
auto anc = anc_finder.FindCandidateSet();
|
||||
assert(anc.feerate <= found.feerate);
|
||||
|
||||
if (todo.Count() <= 12) {
|
||||
// Compare with ExhaustiveCandidateFinder. This quickly gets computationally
|
||||
// expensive for large clusters (O(2^n)), so only do it for sufficiently small ones.
|
||||
@@ -898,119 +831,10 @@ FUZZ_TARGET(clusterlin_simple_finder)
|
||||
todo -= del_set;
|
||||
smp_finder.MarkDone(del_set);
|
||||
exh_finder.MarkDone(del_set);
|
||||
anc_finder.MarkDone(del_set);
|
||||
}
|
||||
|
||||
assert(smp_finder.AllDone());
|
||||
assert(exh_finder.AllDone());
|
||||
assert(anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == 0);
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_search_finder)
|
||||
{
|
||||
// Verify that SearchCandidateFinder works as expected by sanity checking the results
|
||||
// and comparing with the results from SimpleCandidateFinder and AncestorCandidateFinder,
|
||||
// if the result is claimed to be optimal.
|
||||
|
||||
// Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input.
|
||||
SpanReader reader(buffer);
|
||||
DepGraph<TestBitSet> depgraph;
|
||||
uint64_t rng_seed{0};
|
||||
uint8_t make_connected{1};
|
||||
try {
|
||||
reader >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
// The most complicated graphs are connected ones (other ones just split up). Optionally force
|
||||
// the graph to be connected.
|
||||
if (make_connected) MakeConnected(depgraph);
|
||||
|
||||
// Instantiate the candidate finders.
|
||||
SearchCandidateFinder src_finder(depgraph, rng_seed);
|
||||
SimpleCandidateFinder smp_finder(depgraph);
|
||||
AncestorCandidateFinder anc_finder(depgraph);
|
||||
|
||||
auto todo = depgraph.Positions();
|
||||
while (todo.Any()) {
|
||||
assert(!src_finder.AllDone());
|
||||
assert(!smp_finder.AllDone());
|
||||
assert(!anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == todo.Count());
|
||||
|
||||
// For each iteration, read an iteration count limit from the fuzz input.
|
||||
uint64_t max_iterations = 1;
|
||||
try {
|
||||
reader >> VARINT(max_iterations);
|
||||
} catch (const std::ios_base::failure&) {}
|
||||
max_iterations &= 0xfffff;
|
||||
|
||||
// Read an initial subset from the fuzz input (allowed to be empty).
|
||||
auto init_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/false);
|
||||
SetInfo init_best(depgraph, init_set);
|
||||
|
||||
// Call the search finder's FindCandidateSet for what remains of the graph.
|
||||
auto [found, iterations_done] = src_finder.FindCandidateSet(max_iterations, init_best);
|
||||
bool optimal = iterations_done < max_iterations;
|
||||
|
||||
// Sanity check the result.
|
||||
assert(iterations_done <= max_iterations);
|
||||
assert(found.transactions.Any());
|
||||
assert(found.transactions.IsSubsetOf(todo));
|
||||
assert(depgraph.FeeRate(found.transactions) == found.feerate);
|
||||
if (!init_best.feerate.IsEmpty()) assert(found.feerate >= init_best.feerate);
|
||||
// Check that it is topologically valid.
|
||||
for (auto i : found.transactions) {
|
||||
assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo));
|
||||
}
|
||||
|
||||
// At most 2^(N-1) iterations can be required: the maximum number of non-empty topological
|
||||
// subsets a (connected) cluster with N transactions can have. Even when the cluster is no
|
||||
// longer connected after removing certain transactions, this holds, because the connected
|
||||
// components are searched separately.
|
||||
assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1)));
|
||||
// Additionally, test that no more than sqrt(2^N)+1 iterations are required. This is just
|
||||
// an empirical bound that seems to hold, without proof. Still, add a test for it so we
|
||||
// can learn about counterexamples if they exist.
|
||||
if (iterations_done >= 1 && todo.Count() <= 63) {
|
||||
Assume((iterations_done - 1) * (iterations_done - 1) <= uint64_t{1} << todo.Count());
|
||||
}
|
||||
|
||||
// Perform quality checks only if SearchCandidateFinder claims an optimal result.
|
||||
if (optimal) {
|
||||
// Optimal sets are always connected.
|
||||
assert(depgraph.IsConnected(found.transactions));
|
||||
|
||||
// Compare with SimpleCandidateFinder.
|
||||
auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS);
|
||||
assert(found.feerate >= simple.feerate);
|
||||
if (simple_iters < MAX_SIMPLE_ITERATIONS) {
|
||||
assert(found.feerate == simple.feerate);
|
||||
}
|
||||
|
||||
// Compare with AncestorCandidateFinder;
|
||||
auto anc = anc_finder.FindCandidateSet();
|
||||
assert(found.feerate >= anc.feerate);
|
||||
|
||||
// Compare with a non-empty topological set read from the fuzz input (comparing with an
|
||||
// empty set is not interesting).
|
||||
auto read_topo = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
|
||||
assert(found.feerate >= depgraph.FeeRate(read_topo));
|
||||
}
|
||||
|
||||
// Find a non-empty topologically valid subset of transactions to remove from the graph.
|
||||
// Using an empty set would mean the next iteration is identical to the current one, and
|
||||
// could cause an infinite loop.
|
||||
auto del_set = ReadTopologicalSet(depgraph, todo, reader, /*non_empty=*/true);
|
||||
todo -= del_set;
|
||||
src_finder.MarkDone(del_set);
|
||||
smp_finder.MarkDone(del_set);
|
||||
anc_finder.MarkDone(del_set);
|
||||
}
|
||||
|
||||
assert(src_finder.AllDone());
|
||||
assert(smp_finder.AllDone());
|
||||
assert(anc_finder.AllDone());
|
||||
assert(anc_finder.NumRemaining() == 0);
|
||||
}
|
||||
|
||||
FUZZ_TARGET(clusterlin_linearization_chunking)
|
||||
@@ -1250,6 +1074,10 @@ FUZZ_TARGET(clusterlin_sfl)
|
||||
}
|
||||
test_fn(/*is_optimal=*/true);
|
||||
|
||||
// Verify that optimality is reached within an expected amount of work. This protects against
|
||||
// hypothetical bugs that hugely increase the amount of work needed to reach optimality.
|
||||
assert(sfl.GetCost() <= MaxOptimalLinearizationIters(depgraph.TxCount()));
|
||||
|
||||
// The result must be as good as SimpleLinearize.
|
||||
auto [simple_linearization, simple_optimal] = SimpleLinearize(depgraph, MAX_SIMPLE_ITERATIONS / 10);
|
||||
auto simple_diagram = ChunkLinearization(depgraph, simple_linearization);
|
||||
@@ -1301,7 +1129,6 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
// Invoke Linearize().
|
||||
iter_count &= 0x7ffff;
|
||||
auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
|
||||
assert(cost <= iter_count);
|
||||
SanityCheck(depgraph, linearization);
|
||||
auto chunking = ChunkLinearization(depgraph, linearization);
|
||||
|
||||
@@ -1313,7 +1140,7 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
}
|
||||
|
||||
// If the iteration count is sufficiently high, an optimal linearization must be found.
|
||||
if (iter_count >= MaxOptimalLinearizationIters(depgraph.TxCount())) {
|
||||
if (iter_count > MaxOptimalLinearizationIters(depgraph.TxCount())) {
|
||||
assert(optimal);
|
||||
}
|
||||
|
||||
@@ -1328,9 +1155,13 @@ FUZZ_TARGET(clusterlin_linearize)
|
||||
// If SimpleLinearize finds the optimal result too, they must be equal (if not,
|
||||
// SimpleLinearize is broken).
|
||||
if (simple_optimal) assert(cmp == 0);
|
||||
// If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
|
||||
// chunking is claimed to be optimal, which implies minimal chunks).
|
||||
if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
|
||||
|
||||
// Temporarily disabled, as Linearize() currently does not guarantee minimal chunks, even
|
||||
// when it reports an optimal result. This will be re-introduced in a later commit.
|
||||
//
|
||||
// // If simple_chunking is diagram-optimal, it cannot have more chunks than chunking (as
|
||||
// // chunking is claimed to be optimal, which implies minimal chunks).
|
||||
// if (cmp == 0) assert(chunking.size() >= simple_chunking.size());
|
||||
|
||||
// Compare with a linearization read from the fuzz input.
|
||||
auto read = ReadLinearization(depgraph, reader);
|
||||
|
||||
Reference in New Issue
Block a user