diff --git a/src/bench/cluster_linearize.cpp b/src/bench/cluster_linearize.cpp index de857419090..cf071dda2d1 100644 --- a/src/bench/cluster_linearize.cpp +++ b/src/bench/cluster_linearize.cpp @@ -4,7 +4,9 @@ #include #include +#include #include +#include #include #include @@ -12,6 +14,7 @@ #include using namespace cluster_linearize; +using namespace util::hex_literals; namespace { @@ -45,8 +48,8 @@ DepGraph MakeWideGraph(ClusterIndex ntx) return depgraph; } -// Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the best -// known algorithms (purely empirically determined). +// Construct a difficult graph. These need at least sqrt(2^(n-1)) iterations in the implemented +// algorithm (purely empirically determined). template DepGraph MakeHardGraph(ClusterIndex ntx) { @@ -111,17 +114,18 @@ DepGraph MakeHardGraph(ClusterIndex ntx) return depgraph; } -/** Benchmark that does search-based candidate finding with 10000 iterations. +/** Benchmark that does search-based candidate finding with a specified number of iterations. * - * Its goal is measuring how much time every additional search iteration in linearization costs. + * Its goal is measuring how much time every additional search iteration in linearization costs, + * by running with a low and a high count, subtracting the results, and divided by the number + * iterations difference. */ template -void BenchLinearizePerIterWorstCase(ClusterIndex ntx, benchmark::Bench& bench) +void BenchLinearizeWorstCase(ClusterIndex ntx, benchmark::Bench& bench, uint64_t iter_limit) { const auto depgraph = MakeHardGraph(ntx); - const auto iter_limit = std::min(10000, uint64_t{1} << (ntx / 2 - 1)); uint64_t rng_seed = 0; - bench.batch(iter_limit).unit("iters").run([&] { + bench.run([&] { SearchCandidateFinder finder(depgraph, rng_seed++); auto [candidate, iters_performed] = finder.FindCandidateSet(iter_limit, {}); assert(iters_performed == iter_limit); @@ -132,11 +136,12 @@ void BenchLinearizePerIterWorstCase(ClusterIndex ntx, benchmark::Bench& bench) * * Its goal is measuring how much time linearization may take without any search iterations. * - * If P is the resulting time of BenchLinearizePerIterWorstCase, and N is the resulting time of - * BenchLinearizeNoItersWorstCase*, then an invocation of Linearize with max_iterations=m should - * take no more than roughly N+m*P time. This may however be an overestimate, as the worst cases - * do not coincide (the ones that are worst for linearization without any search happen to be ones - * that do not need many search iterations). + * If P is the benchmarked per-iteration count (obtained by running BenchLinearizeWorstCase for a + * high and a low iteration count, subtracting them, and dividing by the difference in count), and + * N is the resulting time of BenchLinearizeNoItersWorstCase*, then an invocation of Linearize with + * max_iterations=m should take no more than roughly N+m*P time. This may however be an + * overestimate, as the worst cases do not coincide (the ones that are worst for linearization + * without any search happen to be ones that do not need many search iterations). * * This benchmark exercises a worst case for AncestorCandidateFinder, but for which improvement is * cheap. @@ -205,14 +210,57 @@ void BenchMergeLinearizationsWorstCase(ClusterIndex ntx, benchmark::Bench& bench }); } +template +void BenchLinearizeOptimally(benchmark::Bench& bench, const std::array& serialized) +{ + // Determine how many transactions the serialized cluster has. + ClusterIndex num_tx{0}; + { + SpanReader reader{serialized}; + DepGraph> depgraph; + reader >> Using(depgraph); + num_tx = depgraph.TxCount(); + assert(num_tx < 128); + } + + SpanReader reader{serialized}; + auto runner_fn = [&]() noexcept { + DepGraph depgraph; + reader >> Using(depgraph); + uint64_t rng_seed = 0; + bench.run([&] { + auto res = Linearize(depgraph, /*max_iterations=*/10000000, rng_seed++); + assert(res.second); + }); + }; + + if (num_tx <= 32) { + runner_fn.template operator()>(); + } else if (num_tx <= 64) { + runner_fn.template operator()>(); + } else if (num_tx <= 96) { + runner_fn.template operator()>(); + } else if (num_tx <= 128) { + runner_fn.template operator()>(); + } else { + assert(false); + } +} + } // namespace -static void LinearizePerIter16TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(16, bench); } -static void LinearizePerIter32TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(32, bench); } -static void LinearizePerIter48TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(48, bench); } -static void LinearizePerIter64TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(64, bench); } -static void LinearizePerIter75TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(75, bench); } -static void LinearizePerIter99TxWorstCase(benchmark::Bench& bench) { BenchLinearizePerIterWorstCase>(99, bench); } +static void Linearize16TxWorstCase20Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(16, bench, 20); } +static void Linearize16TxWorstCase120Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(16, bench, 120); } +static void Linearize32TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(32, bench, 5000); } +static void Linearize32TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(32, bench, 15000); } +static void Linearize48TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(48, bench, 5000); } +static void Linearize48TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(48, bench, 15000); } +static void Linearize64TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(64, bench, 5000); } +static void Linearize64TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(64, bench, 15000); } +static void Linearize75TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(75, bench, 5000); } +static void Linearize75TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(75, bench, 15000); } +static void Linearize99TxWorstCase5000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(99, bench, 5000); } +static void Linearize99TxWorstCase15000Iters(benchmark::Bench& bench) { BenchLinearizeWorstCase>(99, bench, 15000); } static void LinearizeNoIters16TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc>(16, bench); } static void LinearizeNoIters32TxWorstCaseAnc(benchmark::Bench& bench) { BenchLinearizeNoItersWorstCaseAnc>(32, bench); } @@ -242,12 +290,84 @@ static void MergeLinearizations64TxWorstCase(benchmark::Bench& bench) { BenchMer static void MergeLinearizations75TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(75, bench); } static void MergeLinearizations99TxWorstCase(benchmark::Bench& bench) { BenchMergeLinearizationsWorstCase>(99, bench); } -BENCHMARK(LinearizePerIter16TxWorstCase, benchmark::PriorityLevel::HIGH); -BENCHMARK(LinearizePerIter32TxWorstCase, benchmark::PriorityLevel::HIGH); -BENCHMARK(LinearizePerIter48TxWorstCase, benchmark::PriorityLevel::HIGH); -BENCHMARK(LinearizePerIter64TxWorstCase, benchmark::PriorityLevel::HIGH); -BENCHMARK(LinearizePerIter75TxWorstCase, benchmark::PriorityLevel::HIGH); -BENCHMARK(LinearizePerIter99TxWorstCase, benchmark::PriorityLevel::HIGH); +// The following example clusters were constructed by replaying historical mempool activity, and +// selecting for ones that take many iterations (after the introduction of some but not all +// linearization algorithm optimizations). + +/* 2023-05-05T23:12:21Z 71, 521780, 543141,*/ +static constexpr auto BENCH_EXAMPLE_00 = "801081a5360092239efc6201810982ab58029b6b98c86803800eed7804800ecb7e058f2f878778068030d43407853e81902a08962a81d176098010b6620a8010b2280b8010da3a0c9f069da9580d800db11e0e9d719ad37a0f967897ed5210990e99fc0e11812c81982012804685823e0f0a893982b6040a10804682c146110a6e80db5c120a8010819806130a8079858f0c140a8054829a120c12803483a1760c116f81843c0d11718189000e11800d81ac2c0f11800d81e50e10117181c77c1111822e87f2601012815983d17211127180f2121212811584a21e1312800e80d1781412813c83e81815126f80ef5016126f80ff6c16126f80f66017126e80fd541812800d81942a1912800e80dd781a12800d81f96c1b12805282e7581b127180fd721c1271a918230b805fc11a220d8118a15a2d036f80e5002011817684d8241e346f80e1181c37805082fc04260024800d81f8621734803382b354270b12805182ca2e162f800e80d52e0d32803dc360201b850e818c400b318c49808a5a290210805181d65823142a800d81a34e0850800e81fb3c0851886994fc0a280b00082c805482d208032e28805e83ba380059801081cd4a0159811884f770002e0015e17280e49024300a0000000000000031803dcb48014200"_hex_u8; +/* 2023-12-06T09:30:01Z 81, 141675, 647053,*/ +static constexpr auto BENCH_EXAMPLE_01 = "b348f1fc4000f365818a9e2c01b44cf7ca0002b004f0b02003b33ef8ae3004b334f9e87005800d81c85e06b368fae26007b05ef2e14208be1a8093a50409b15cf5ee500a802c80a1420b802dea440c802ce50a0d802cdc320e802cd7220f802dd72210805380f74a118174f370126e96b32812127182c4701312817389d26414128035848c221512800e82bf3816126f81e4341712801082b228181280518af57418128040859a0019127182d0401a12803e858b641b127182c4421c126f82b3481d12811486b6301e12821d89e7281f126e8a8b421f127182d6642012806284c12021126e81d34822126e86a76222126e86d8102212805187b6542312800d82fc002412803d848e0e2512801082d27a26126e8589642612800e83a9602712800e83bd0028126e81ef1a29116e858d7228126f82db5e2912801083843c2a127181c93c2b126e85d0162b127181c5622c126e84f8262c12800f8392202d12800e82b66c2e126e81d0082f12803282d50430126e84f9003012805f84be6c3112846e88df0e2b12804080d44c340a8b31898808350a800ed760350b801083a1182b517182817e2a51800e82b6582951803583cb52420030806284cb6c204f7181d300204f82688ce0303e001d800e82bb200f488010808a182822a3289cd63041000a6fcd100a408a7caaa7024800002f803584e0741e27288f3386dd783b001000802683f27e004b8c44bcd0763f0000000000000000000100000e00"_hex_u8; +/* 2023-04-04T00:26:50Z 90, 99930, 529375,*/ +static constexpr auto BENCH_EXAMPLE_02 = "815b80b61e00800da63001cd378da70e028010991a03800e9d3e0480109708058010991a068010973a07da738fa72408de7491831009b35b88f0080a9d4485de180b71974e0c71974e0d80108e500eb27988a75a0f719632108061a56c11801087761280108a1413807893441480538c1415a606828806168010893e1780548c40188e4b80bb2c196eab3e1718805ed60e18188051c97a19188010cf781a1871b11e1b1871c5281c1880508080581d186e80b13c1e188035cf421f18805fe0482018804caa661f198035a9001f156e80cb701d1871a2281e1871ad281f18817380a16020186f98642118805ee04821198010b6702219800ea12623196eb67024198035808b0025196fa65c26198054ba1c2719807680bf7c28198053cd782919803d80b80429198051db5a2a198040d3742b19976584bb1c28196efc1c281971b21a29198052bc762a1971a2502b196eb73c2c19976381ab0c2a18806290543409862081c3423b00336fbc70224d80109e7c1c52805ebd5c1942800eb57016468034ba423405158118da28350416927480f4743000159f6a81c9462e00188051ec5e380e00800e9e420775800d9e26007c906c82f754251d0025870480f12c14280023800d9e26027e9e1385ed08102900001a804fac7a018001719856028001800da87e0180039b1a868b60064102246e9f42018005800da87e028005850d81d600026d862381a2200e0008230015831480a5480342000524803eeb32006e873582a4700a0100351300"_hex_u8; +/* 2023-05-08T15:51:59Z 87, 76869, 505222,*/ +static constexpr auto BENCH_EXAMPLE_03 = "c040b9e15a00b10eac842601805f85931802c104bae17403ae50aaa336049d76a9bf7005c55bbeab6606ae2aa9c72c07805e81992e08af7dab817a096e80a7e4520909803e92bd780a097185c76c0b096e98e7380b09850bb9953c0c09803389f6260d096f859d620e09803f88d3000f0971829c6e1009837690f6481109806285931811097181f56814076ea09b74120980408eb73213096f87853214096f86e2701509803f8c860016098a6fe6c3721709814f92a204180980628a8a441909803285df681a0980348498661b096e8290781c096e978e081c097187da1a1d097186c05c1e097185893c1f09805f8ad9002009800d84e74e21097183a67a22097182e23423097184b53a23096ea393062309840faddd46240980618eb732250980548bee6a2609807986883c2709718298402809815388b6582909805384ec742a097181b9142b096e97b5262b096e85e14e2c0980518abb5c2d09805489e75a2e09803187e3382f097180eb1c34046f87c34a2f098309a5c54430097186911831098054899c083209801083bc1033097081e02a3409805f848f0c35096e80d4343a057180c37040006f80a22438097180a0503f03816f8381444003803f80ef003f05800580a4283f066ef72845016efb91663e09923d808d8216470041803584837c46012f9247dc86684501268267a09610450222862184db68440712803585ea40440113835d97887805800b8723c7a40a4b00022f81529ae2143c0c1f80548b8f381b311980408e955c055e802589dc10037e801083b54602658010848130006700"_hex_u8; +/* 2023-05-01T19:32:10Z 35, 55747, 504128,*/ +static constexpr auto BENCH_EXAMPLE_04 = "801af95c00801af72801801af95c02873e85f2180202873e85f2180202873e85f21802028018fb2802068018fb2803068018fb2804068018fb2805068018fb2806068018fb2807068018fb2808068018fb2809068018fb280a068018fb280a058018fb280b058018fb280c058018fb280d058018fb280e058018fb280f058018fb2810058018fb2811058018fb2812058018fb2813058018fb2814058018fb2815058018fb2815048018fb2816048018fb2817048018fb2818048018fb2819048018fb281a048018fb281b04810d80d9481f00000100"_hex_u8; +/* 2023-02-27T17:06:38Z 60, 55680, 502749,*/ +static constexpr auto BENCH_EXAMPLE_05 = "b5108ab56600b26d89f85601b07383b01602b22683c96003b34a83d82e04b12f83b53a05b20e83c75a066e80840a06068040be0007066fb10608066fb2120906800eba320a06842b80b05a0a066eff420b067199300b068124c3140c0680618085180d066faa1c0e068010b4440f068051af541006800da1781106857881946812066eee1613068052b31014068324808d361506806180885c150671b03216066ef11017068052b63218066ef3521806803f80865419066e93441a068035a13e1b0680628085181c06806ec4481d068117e72c1e06719c721f068077c42420068159808d1821066eef0c21058010b90022056f9908230571993024058010b00a25058010b00a260580608087402705803fc10027068032b42828068051b6322906800db11e212a8324808d361933803ff400192f826381a7141a2f8032ac08152a800db54c044e8323808d3630010002018158d84000042d821cea12002807853580d462002d01891181d022002e00"_hex_u8; +/* 2023-04-20T22:25:49Z 99, 49100, 578622,*/ +static constexpr auto BENCH_EXAMPLE_06 = "bf3c87c14c008010955a01b21d85e07002800d946c036e8e3404b77f86c26605b33c85f55e06bd06879852078010970a08bd4b87cf00098123a7720ab2158687680b8054d4440b0a8062fa4c0c0a71ac400d0a80628081540e0a8010a2580f0a8054b676100a8032b85c110a6e9a40120a6e809012130a817f80c31e140a8175808674150a719d46160a8172d86415098033c1481609800da4181709800ada2e1809803dc85219098034b4041a096ef5501b098052d67c1c098051d3281d09800ebc4a1e098175808c641f098061c55020098078c85021096e8081141f0b6faf1e200b8061da68210b8062f000220b800ebc20230b8035d058240b8053de32250b8050b610250b6fad32260b803dc276270b803d80a610280b6ef812290b8052b6322a0b800eb57e2b0b8052bd062c0b719e522d0b71a3762e0b8010bb1e2f0b80109a78310a80109962320a8051a60c330a6f9f3e320b6e808b24330b719e40340b8117cc50350b803d80971a360b8051b930370b6f9e0a380b719b10390b8052a6003a0b6e808c76390a7195603a0a6f935c3b0a8054a31a3c0a803ce30c3b0b803fa3003c0b800dbe2a3d0b8f3480a84244058005851a44069d1bf824400b83098f284507719c723d4f6f9c1c3449719c722f4f6eb23c304f8061c5502e528061da682b4e8118bb724e022a8054b35028476e941c1d51815be02c4f01148557808e3a4f070e8104af464e001180329d364e010d805f9f6a421b9c3387aa744c0d4d71ac400b800881748098444710338173809b780b80008054d444292c12821dc040550403078b4682b4664517003f00"_hex_u8; +/* 2023-06-05T19:56:12Z 52, 44896, 540514,*/ +static constexpr auto BENCH_EXAMPLE_07 = "b317998a4000b40098d53e01b45b99814802b7289b940003b3699a9d1204b6619a807a05814682cb78050571d854060571d8540705800e808d7a0805803480c06a09056e8189280a056ffd060b05800d80ea7a0c05803c80b80c0c03803e80d86e0d036ed2280e03811581804a0f036fd34e1003805380eb6811036e81f60e12038010ec101204805f80e83a13048033809534140471e00a15048010f95816046e81fa301704805180a74c1705800d808f1018056fd55c1905800e8091481a056e80a76e1b05805f80e2741c0571809b021c05826382c8401d0571df201e05800e809d2c1f05850083e87c1f05811580af68200571f20a21056ff9042205803e80df1e23056e81956c24056e9f542604805180e83829000e800e8080621325803380b0402a020d6ef8100e2c8c4889a96a2c000f803580ce4c2c000b6e9f54062a803480c96406260500"_hex_u8; +/* 2023-12-05T23:48:44Z 69, 44283, 586734,*/ +static constexpr auto BENCH_EXAMPLE_08 = "83728ce80000b90befca1001806083b24002b40de6da3203b545e9c35c04b34beede3005b068e8883006d41c80b1e14c07b337e7841208b26beadb2e096e83892e090980518487380a096e82815c0a096e81ce3c0b097181db200c097181d4020d09810084ed600e096e96b0100f0971819a0210086e93da2e0f09803583ee5e1009803583c66c1109800d82bb6e1209800d81d56a1309803c82e622140971819f521509803d84a55c15057181d6161605806283ac5217056e949c5a18056e89e8641806815889e23419067181de321a066e8af2641a076e82a70a1b07803583f2081c076f81e76e1d076e81d33e1e07800d83b8761e086e82a5541f087181de302008805f84ad0021086e81c74022086e81bd3e23086e9288182408806184b3102409803283816025096e91ed662609830a88e70827096e81d14a27097181ce6028096e8cf03829097181883832016f81835c3103806181e0103203804180b8103204863584fe183304800de66434046e9e4c34056e81d6742f429213c0eb602e3d6483b06c283a6e81d73c263d6e82f9581831805485ab360e37805080c62609398b3189880838010603916db1f3583a03000110873199f8623c000000011100"_hex_u8; +/* 2023-04-14T19:36:52Z 77, 20418, 501117,*/ +static constexpr auto BENCH_EXAMPLE_09 = "bf2989d00400815bca5c01af1e86f97602800d9d6c03800d8a3404b47988866e05b36287f92e0680109f68078010991a08805ecf1208076e80933e09078062d01c0a078054b6760b078053b6760c076f9c1c0d078054b6760e0771af260f0771b17e10078032f57011078035d56812078054e1581307886b83dc301407817480d13013068005a6001406803d80821a15066ef3201606800ea2181706800da628180671ab1219068054db0c1a06719b001b06815b80a11c1c068050b9301d066fac2a1e068033ab481f06719b1020068035ab721e07803dc2761f0771ae3c20078040f60e210771ce282207800ea4322307882a81a66024078035ad4625076efe7e26078162808e1827078118bb7228076eac7428088010bf58290871a04c2a0871bc722b086fa8382c08803d80a0142d088035d6282e088051c30c2f086efc623008800d9f6231086f986432088117bb7237028010a63034068010c84e2740800ea64c2237832c80933e1f3b830880c454390208813c80955c3905068032c73611348010a03c093c837a808a101b278050ac34093a8051ac34291b8f3b8187401d28881a82cb3a3a0a37977b86d20843000028996686a7083f030f8078d3761b27106e995a08499070839b5a1131000b00"_hex_u8; +/* 2023-11-07T17:59:35Z 48, 4792, 498995,*/ +static constexpr auto BENCH_EXAMPLE_10 = "875f89aa1000b51ec09d7201c55cc7a72e02a11aa1fb3203b233a7f95204800ef56205b33ea9d13006803e80b26e07d90ec9dd4008b45eabbe6c09806080ca000a815984e8680a0a6f80925e0a0a803f80e1660c09937c94b7420d086e82f5640a086e80997e0b086f808d320c08800580a5640d086f8089100e08804080c9060f088115819a1c10086e82961a0f0a805f81bc0a100a6ff826110a6ef53e120a807584c60c110a6e818f32120a803c81c246130a805481d508140a8159838410150a7180a55c160a6f80821c170a6fe6101c066fe6101d06805080f854190a6e81b27c1a0a8155819c701e06805180ae0c21046e8b9a222501805180f53422001680f26880f8a62a220116803580da582007058153838e6e21000c800d80a712033a807681ae1c23000308834a82d36023020205815981e03a051a08001700"_hex_u8; +/* 2023-11-16T10:47:08Z 77, 473962, 486863,*/ +static constexpr auto BENCH_EXAMPLE_11 = "801980c06000801980c06001801980c06002801980c06003801980c06004801980c06005801980c06006801980c06007801980c06008801980c06009801980c0600a801980c0600b801980c0600c801980c0600d801980c0600e801980c0600f801980c060108019d12c11800f80b1601111800f80b1601111801080b1601111800f80b160100e800f80b160100f801980c060110f800f80b160140d801180b1601111801180b160100d801180b160120c801180b1600f10801180b1600f11801980c0601011800f80b160140e800f80b160110f801980c060170a801180b1601210801980c060140f800f80b1601311801980c0602005801180b1601f07800f80b1601b0c800fca7c1611812081f9601638812081f9601637812081fb001636801080b160142f801980c0600e2a801080b1600f2a801180b1600d25801980c0600e25800f80b1600d27801980c0600e27801980c0600d27801180b1600e26812080b1500c27812081f960201025812081f960200f27812081fc201d101c812081fc201d101d812081fc201d0f1f812081fc201d0f20812081f9601b1016800f80b1600a35800f80b1600a36800f80b1600e32801080b160122f812081f960280040812081fc20121d1b812081f960112713812081f960160d37812081fc20140d2b812081f960130d2d812081fc20130c2c812081fb001b0157812081fb001a0245812081fc20140030812081fc20092747812081fb000b152500"_hex_u8; +/* 2023-10-06T20:44:09Z 40, 341438, 341438,*/ +static constexpr auto BENCH_EXAMPLE_12 = "80318f4c0080318f4c0180318f4c0280318f4c0380318f4c0480318f4c0580318f4c0680318f4c078033a57807078033a57807078033a57807078033a57807078033a57807078033a57807078033a57807078033a578070780318f4c0e0180318f4c0d0380318f4c0c0580318f4c0b078033a57803128033a57803128033a57803128033a578031280318f4c0412810b9c28140300810c9c281303028033a57802188033a57802188033a5780218810c9c280b01108033a578001c810c9c2807050f8033a578001b810c98040700158033a578001c810c98040301158033a5780019806ca1240101118033a578001300"_hex_u8; +/* 2023-11-15T21:40:46Z 96, 23608, 138286,*/ +static constexpr auto BENCH_EXAMPLE_13 = "8060829f4000b157bab07a01b27cc2b16802b22fbce54603826480a95804803da81a05bc7bcac93806800de55207800daf0608805bc71809805bc7180a800d9d4a0b805bbc700c8152d7180d805bb9380e850a8886260f800d80d33410bf38d3d55011b41dc4eb6012bd70d2ce2e138d3596af7812137180cd501313805e81f7281413718092001513803d81f90016136e8b916c1713801081861a17106e80cd2a18106f80cc3c19106e80cf161911800d80fe781b107180d87c1c106e80fb081d10803e8286701d11800d81c4781f10804082a6002010801081912e21107180ff0021116e81da4a2310850b8b864023116e89db3224116e84ff7e2610897c95993427106f80bb1a240b803581c272250b8032828c10260b6e80d42a270b804082b35a280b800d80fe3e290b805cc0282312821d8697022b0b6e8add562c0b805281c8063007811883f1082313800d80fe3e24137180c9142513800d8380102613803382c00e2713805eb32228136e8494542913800e8186742913806082b74c2a1380528285782b13800d818f7a2c136e84a5562d1380508286702e136f80a46e3e04803f8191364102805481ad4c3d076e809a5a3e077180fe4032136e838b7233138c4790cf384106853584ab624206805b80932a4801806280966c48028168ef04400b7181bd524903806282db5c375b9316acbf703a599c68c5a454385c6e81d63e364a6f80ff64334e817485a6784f023171819536234e800d81826e1e498053829a12420018834c87cb14291d2e840e8bc94c1d2825800d81b7220368811783fe0e271f1f811783e758380f001ecd55809edf6e56000000003a815984ba76008010d54d80aebb4e2c22000000000000002c807682f150007a00"_hex_u8; +/* 2023-12-06T09:18:20Z 93, 68130, 122830,*/ +static constexpr auto BENCH_EXAMPLE_14 = "b26beadb2e00800d80ca0a01d41c80b1e14c02b068e8883003800d81af1604b34beede30056e80b14006b151f5d46c07b93e8085b02608b30cf98b1009b14ef6b3040ab176f6ab480bb7078082b8640c800d81c6460d802c80a8080e802c80a8080f802c80a14210802ce50a11802cd722127181ce6012126e81d14a13126e9b8b00141282428dd42c15128051828408150e6e81bd3e150f805f84ad00160f7181de30170f6e81c740180f800d83b876190f6e82a5541a0f6e81d33e1a106e82a70a1b106f81e76e1c10803583f2081d106e82d9401e106e96e4441f107181de321e12815889e2341f127182d60c20126e979d4e21126e8282262410800d82972c25106f838a5822126f82842a23127182d24a2412803e84bc2a2512800d83c81a26126e84f8142712805085a22c27126e889e6a2812801083aa50281280348598102912801082d5522a126e85865c2b127182c7602b1282468c82042c126e84972c2d12805485d93a2d12801083c7322e12815386e1582f126e84fb0c30126f82eb6c3011813a85b47a3111803f869f5c3211805181ed30370d6e84bf0a3411804180e1383809815883aa183a08815a8392203e05807681f140380c6e9e4c4005805485ab363255805183856030406e82f9582c45805185c1001b4f82418df1001a4e803283c50e430026800d83a6201a4b836886be3044010b8b318988084c0101803183a6120776800d828a1e087682338ae050301c33873199f8624d010032813986bc663c1034800d83a5220a6f800d82be52048000805183e364084907800d83cc4a018005815987b41e1832000017884b9dce72035035803284c11e00800885769d9538192f0000000002001000"_hex_u8; +/* 2023-12-14T02:02:29Z 55, 247754, 247754,*/ +static constexpr auto BENCH_EXAMPLE_15 = "801980c06000801980c06001801980c06002801980c06003801980c06004801980c06005801980c06006801980c06007801980c06008801980c06009801980c0600a801980c0600b801980c0600c801980c0600d801980c0600e801180b1600e0e801180b1600e0e801180b1600e0e801180b1600e0e801180b1600e0e801180b1600e0e801180b1600d07801180b1600f06801180b1600c0a801180b1600f08801180b1600c0c801180b1600c0d801180b1600c0e801180b160100b801180b1601309812081fc200e2a812081fc200e29812081fc200e28812081fc200e0e18812081fc200e0e17801980c060042e812081fc200e0d07812081fc200e0d08812081fc200e0c0a812081fc200e0d0a801980c060081e812081fc200f0c0c812081fc200f0c0d812081fc200f0c0e801180b160083a801180b1600426801980c0600b20801980c0600a22812081fc200f0b30801180b160022b801180b160022b812081fc20062422812081fc2006220b812081fc200c0a1e812081fc2012041a00"_hex_u8; +/* 2023-12-14T15:17:20Z 76, 102600, 103935,*/ +static constexpr auto BENCH_EXAMPLE_16 = "801980c06000801980c06001801980c06002801980c06003801980c06004801180b1600404801180b1600404801180b1600404801980c0600504801980c0600802801980c0600803801180b1600704801980c0600804801280b1600804812081fc200810812081fc20080f812081fc20080e801180b160080c800f80b160080d801980c060090d801180b160090e801980c0600a0e812181fc200a0c801180b1600a0d812181fd400a0c801980c0600a1c801980c0600916801180b1600719801180b160061b801980c0600d15801980c0600717812081fc200718801980c0600716801180b160072d801180b1600722801180b1600525801980c060091b801980c060071e801080b160071f801280b160061d812081fc20063a812181f960160815801280b1600525801980c0600625801180b1600626801980c0600726801980c0600536801180b160032b801980c060042b801280b160032d801980c060033e801180b160043e812181fc20100c27801080b160042f801980c0600342801180b1600442812081fc20150d25800f80b1600245812081fd40120619812081fc20040243812081fc20120c2c812081fd40120a1d812181fb00100623812081fc20030347812081fc20072126801980c0600236812081fc20040d2b812081fc20120328801980c0600237801180b1600337812081fc20052230801180b1600239812081fc2008242c812081fd4005112d812081fb00070b32812081f96011034700"_hex_u8; +/* 2023-12-15T07:12:29Z 98, 112693, 112730,*/ +static constexpr auto BENCH_EXAMPLE_17 = "801980c06000801980c06001801980c06002801980c06003801980c06004801980c06005801980c06006801180b1600606801180b1600606801180b1600606801180b1600606801280b1600606801180b1600606801180b1600606801980c0600d00801980c0600b03801980c0600b04801980c0600f01812081fc200a16812081fc200a15812081fc200a14812081fc200a13812081fd400a12812181fc200a11812181fc200a0f801180b1600a10801180b1600a10801980c0600a10801180b1600b10801180b1600b10801980c0600621801980c0600915801980c060041b801180b160051b801980c0600f12801980c0600f13801980c0600d15801980c0600c17801980c060072e800f80b160082e812181fc200d150e801980c0600922801180b1600923801980c0600823801180b1600623801180b1600a20801180b1600e1c801180b1600b20801180b1600b21801980c0600a3e800f80b1600b3e801980c0600931801180b1600a31812181fc20140325801180b1600a30801180b160054c801180b160043b801980c0600336812181fc200253812081f960090944812081fc2007003c801980c0600339801180b1600433801980c0600453801980c0600340801980c060033d801080b160043d812081f960070854801980c060045a801180b160055a801180b1600545801980c0600643801980c0600641801280b1600739801180b1600562812081fc20121f27812181fc20210137812181fc2016112f801980c0600259801980c0600156812181fc20053a31801180b160025c801180b1600257801980c0600357812081fc200d2d1e812181fc20102444812181fc20035a801180b160035b801980c0600751812181fc2007392a812181fc20025f801980c060045e801180b1600350812081fc20070f6f801180b1600263812181fc201b1322812181fc2011283b812081fc2002442100"_hex_u8; +/* 2023-12-16T02:25:33Z 99, 112399, 112399,*/ +static constexpr auto BENCH_EXAMPLE_18 = "801980c06000801980c06001801980c06002801980c06003801980c06004801980c06005801980c06006801980c06007801180b16008801180b16009801180b1600a801180b1600a0a801180b1600a0a801180b1600a0a801180b1600a0a801980c0600d06801180b1600b09801980c0601005801180b1600c0a801980c0600d0a801980c0601106801180b1600e0a801980c0601207801980c0601207801180b160100a812081e668100a812081e668100a812081e668100a801980c0601407801980c0601606812081fc201226812081fc201225812081fc201224812081fc201223801180b1600e21801980c0600b1e801180b1600c1e801180b1601316801980c060091b801980c0601312801980c0600a1c801180b160190e801180b1601315801180b1600e1b801180b1601713801180b1600f1c801980c0600d34801980c0600d30801980c060102e801980c060122d801980c0600b2a801980c0600b2a801980c0600b2b801180b1601122801180b1600e26801180b1601025801180b1600f26812081fc20280032812081fc20270034812081fc20250034801180b1600d4b801980c0600d457a809a000d46801980c0601044801980c0600e46801180b1600f43801180b160123f801180b160123e801180b1601130801180b1601131801180b1601131812081fc20230a36801980c0600a5a801180b1600a5b801980c0600a5b801180b1600b5b801980c0600b5a801180b1600f57801180b1600d3f801980c0600669801980c0600568801980c0600466801180b1600945801180b1600649801180b1600945812081fc2018234b812081fc20142534812081fc20142532812081fc20142530801180b160074d801180b1600a4b801180b1600a4a812081fc20221662812081fc200c0472812081fc20072e42812081fc20062c23812081fc20100572812081fc200f036c812081fc2001345100"_hex_u8; +/* 2023-03-31T19:24:02Z 78, 90393, 152832,*/ +static constexpr auto BENCH_EXAMPLE_19 = "800dd042008028b13c018028b13c028028b13c038029b13c048029b13c058029b13c0680299948078029b13c088029b13c09802899480a802899480b8028b13c0c80299e700d802899480e802999480f8029b13c10802999481180299948128028b13c138029b13c1480289e701580289948168028b13c1780289948188028994819802899481a802999481b802999481c802899481d802999481e8028b13c1f8029b13c20802999482180299948228028b13c2380298c242480289948258029b13c2680288c242780298c242880299e70298f5a80ea762a824780aa00292a82038090402429813fcf00152a8203809040142a813ff700112982038090402d002d813ff70028002c8203809040270024824780aa00270025820380904025002882038090401e022a82038090401d042782038090401c01298203809040190029813ff700170028813ff700140128807b9258120128841280f6402c01002e82038090402b00062b820380904027000031813ff70011192d82038090401d000129851981a9403a0000003b82038090400c182e813ff7000b0f2982038090401314141b807b925805192b84568190001121000334807bdd400149824780aa00001f2a813ff700003d0b8203809040050d1915807bdd4001498728828f400b010004050501000a050c851981a9400104050b061a0400"_hex_u8; + +static void LinearizeOptimallyExample00(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_00); } +static void LinearizeOptimallyExample01(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_01); } +static void LinearizeOptimallyExample02(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_02); } +static void LinearizeOptimallyExample03(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_03); } +static void LinearizeOptimallyExample04(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_04); } +static void LinearizeOptimallyExample05(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_05); } +static void LinearizeOptimallyExample06(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_06); } +static void LinearizeOptimallyExample07(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_07); } +static void LinearizeOptimallyExample08(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_08); } +static void LinearizeOptimallyExample09(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_09); } +static void LinearizeOptimallyExample10(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_10); } +static void LinearizeOptimallyExample11(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_11); } +static void LinearizeOptimallyExample12(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_12); } +static void LinearizeOptimallyExample13(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_13); } +static void LinearizeOptimallyExample14(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_14); } +static void LinearizeOptimallyExample15(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_15); } +static void LinearizeOptimallyExample16(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_16); } +static void LinearizeOptimallyExample17(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_17); } +static void LinearizeOptimallyExample18(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_18); } +static void LinearizeOptimallyExample19(benchmark::Bench& bench) { BenchLinearizeOptimally(bench, BENCH_EXAMPLE_19); } + +BENCHMARK(Linearize16TxWorstCase20Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize16TxWorstCase120Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize32TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize32TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize48TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize48TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize64TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize64TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize75TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize75TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize99TxWorstCase5000Iters, benchmark::PriorityLevel::HIGH); +BENCHMARK(Linearize99TxWorstCase15000Iters, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizeNoIters16TxWorstCaseAnc, benchmark::PriorityLevel::HIGH); BENCHMARK(LinearizeNoIters32TxWorstCaseAnc, benchmark::PriorityLevel::HIGH); @@ -276,3 +396,24 @@ BENCHMARK(MergeLinearizations48TxWorstCase, benchmark::PriorityLevel::HIGH); BENCHMARK(MergeLinearizations64TxWorstCase, benchmark::PriorityLevel::HIGH); BENCHMARK(MergeLinearizations75TxWorstCase, benchmark::PriorityLevel::HIGH); BENCHMARK(MergeLinearizations99TxWorstCase, benchmark::PriorityLevel::HIGH); + +BENCHMARK(LinearizeOptimallyExample00, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample01, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample02, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample03, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample04, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample05, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample06, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample07, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample08, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample09, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample10, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample11, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample12, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample13, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample14, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample15, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample16, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample17, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample18, benchmark::PriorityLevel::HIGH); +BENCHMARK(LinearizeOptimallyExample19, benchmark::PriorityLevel::HIGH); diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 607ae681d25..e964849f228 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -118,6 +118,28 @@ public: } } + /** Construct a DepGraph object given another DepGraph and a mapping from old to new. + * + * Complexity: O(N^2) where N=depgraph.TxCount(). + */ + DepGraph(const DepGraph& depgraph, Span mapping) noexcept : entries(depgraph.TxCount()) + { + Assert(mapping.size() == depgraph.TxCount()); + // Fill in fee, size, ancestors. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + const auto& input = depgraph.entries[i]; + auto& output = entries[mapping[i]]; + output.feerate = input.feerate; + for (auto j : input.ancestors) output.ancestors.Set(mapping[j]); + } + // Fill in descendant information. + for (ClusterIndex i = 0; i < entries.size(); ++i) { + for (auto j : entries[i].ancestors) { + entries[j].descendants.Set(i); + } + } + } + /** Get the number of transactions in the graph. Complexity: O(1). */ auto TxCount() const noexcept { return entries.size(); } /** Get the feerate of a given transaction i. Complexity: O(1). */ @@ -257,6 +279,14 @@ struct SetInfo explicit SetInfo(const DepGraph& depgraph, const SetType& txn) noexcept : transactions(txn), feerate(depgraph.FeeRate(txn)) {} + /** Add a transaction to this SetInfo (which must not yet be in it). */ + void Set(const DepGraph& depgraph, ClusterIndex pos) noexcept + { + Assume(!transactions[pos]); + transactions.Set(pos); + feerate += depgraph.FeeRate(pos); + } + /** Add the transactions of other to this SetInfo (no overlap allowed). */ SetInfo& operator|=(const SetInfo& other) noexcept { @@ -506,6 +536,12 @@ public: return m_todo.None(); } + /** Count the number of remaining unlinearized transactions. */ + ClusterIndex NumRemaining() const noexcept + { + return m_todo.Count(); + } + /** Find the best (highest-feerate, smallest among those in case of a tie) ancestor set * among the remaining transactions. Requires !AllDone(). * @@ -541,23 +577,60 @@ class SearchCandidateFinder { /** Internal RNG. */ InsecureRandomContext m_rng; - /** Internal dependency graph for the cluster. */ - const DepGraph& m_depgraph; - /** Which transactions are left to do (sorted indices). */ + /** m_sorted_to_original[i] is the original position that sorted transaction position i had. */ + std::vector m_sorted_to_original; + /** m_original_to_sorted[i] is the sorted position original transaction position i has. */ + std::vector m_original_to_sorted; + /** Internal dependency graph for the cluster (with transactions in decreasing individual + * feerate order). */ + DepGraph m_sorted_depgraph; + /** Which transactions are left to do (indices in m_sorted_depgraph's order). */ SetType m_todo; + /** Given a set of transactions with sorted indices, get their original indices. */ + SetType SortedToOriginal(const SetType& arg) const noexcept + { + SetType ret; + for (auto pos : arg) ret.Set(m_sorted_to_original[pos]); + return ret; + } + + /** Given a set of transactions with original indices, get their sorted indices. */ + SetType OriginalToSorted(const SetType& arg) const noexcept + { + SetType ret; + for (auto pos : arg) ret.Set(m_original_to_sorted[pos]); + return ret; + } + public: /** Construct a candidate finder for a graph. * * @param[in] depgraph Dependency graph for the to-be-linearized cluster. * @param[in] rng_seed A random seed to control the search order. * - * Complexity: O(1). + * Complexity: O(N^2) where N=depgraph.Count(). */ - SearchCandidateFinder(const DepGraph& depgraph LIFETIMEBOUND, uint64_t rng_seed) noexcept : + SearchCandidateFinder(const DepGraph& depgraph, uint64_t rng_seed) noexcept : m_rng(rng_seed), - m_depgraph(depgraph), - m_todo(SetType::Fill(depgraph.TxCount())) {} + m_sorted_to_original(depgraph.TxCount()), + m_original_to_sorted(depgraph.TxCount()), + m_todo(SetType::Fill(depgraph.TxCount())) + { + // Determine reordering mapping, by sorting by decreasing feerate. + std::iota(m_sorted_to_original.begin(), m_sorted_to_original.end(), ClusterIndex{0}); + std::sort(m_sorted_to_original.begin(), m_sorted_to_original.end(), [&](auto a, auto b) { + auto feerate_cmp = depgraph.FeeRate(a) <=> depgraph.FeeRate(b); + if (feerate_cmp == 0) return a < b; + return feerate_cmp > 0; + }); + // Compute reverse mapping. + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + m_original_to_sorted[m_sorted_to_original[i]] = i; + } + // Compute reordered dependency graph. + m_sorted_depgraph = DepGraph(depgraph, m_original_to_sorted); + } /** Check whether any unlinearized transactions remain. */ bool AllDone() const noexcept @@ -580,12 +653,15 @@ public: * be <= max_iterations. If strictly < max_iterations, the * returned subset is optimal. * - * Complexity: O(N * min(max_iterations, 2^N)) where N=depgraph.TxCount(). + * Complexity: possibly O(N * min(max_iterations, sqrt(2^N))) where N=depgraph.TxCount(). */ std::pair, uint64_t> FindCandidateSet(uint64_t max_iterations, SetInfo best) noexcept { Assume(!AllDone()); + // Convert the provided best to internal sorted indices. + best.transactions = OriginalToSorted(best.transactions); + /** Type for work queue items. */ struct WorkItem { @@ -596,16 +672,27 @@ public: /** Set of undecided transactions. This must be a subset of m_todo, and have no overlap * with inc. The set (inc | und) must be topologically valid. */ SetType und; + /** (Only when inc is not empty) The best feerate of any superset of inc that is also a + * subset of (inc | und), without requiring it to be topologically valid. It forms a + * conservative upper bound on how good a set this work item can give rise to. + * Transactions whose feerate is below best's are ignored when determining this value, + * which means it may technically be an underestimate, but if so, this work item + * cannot result in something that beats best anyway. */ + FeeFrac pot_feerate; /** Construct a new work item. */ - WorkItem(SetInfo&& i, SetType&& u) noexcept : - inc(std::move(i)), und(std::move(u)) {} + WorkItem(SetInfo&& i, SetType&& u, FeeFrac&& p_f) noexcept : + inc(std::move(i)), und(std::move(u)), pot_feerate(std::move(p_f)) + { + Assume(pot_feerate.IsEmpty() == inc.feerate.IsEmpty()); + } /** Swap two WorkItems. */ void Swap(WorkItem& other) noexcept { swap(inc, other.inc); swap(und, other.und); + swap(pot_feerate, other.pot_feerate); } }; @@ -613,39 +700,111 @@ public: VecDeque queue; queue.reserve(std::max(256, 2 * m_todo.Count())); - // Create an initial entry with m_todo as undecided. Also use it as best if not provided, - // so that during the work processing loop below, and during the add_fn/split_fn calls, we - // do not need to deal with the best=empty case. - if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, m_todo); - queue.emplace_back(SetInfo{}, SetType{m_todo}); + // Create initial entries per connected component of m_todo. While clusters themselves are + // generally connected, this is not necessarily true after some parts have already been + // removed from m_todo. Without this, effort can be wasted on searching "inc" sets that + // span multiple components. + auto to_cover = m_todo; + do { + auto component = m_sorted_depgraph.FindConnectedComponent(to_cover); + to_cover -= component; + // If best is not provided, set it to the first component, so that during the work + // processing loop below, and during the add_fn/split_fn calls, we do not need to deal + // with the best=empty case. + if (best.feerate.IsEmpty()) best = SetInfo(m_sorted_depgraph, component); + queue.emplace_back(/*inc=*/SetInfo{}, + /*und=*/std::move(component), + /*pot_feerate=*/FeeFrac{}); + } while (to_cover.Any()); /** Local copy of the iteration limit. */ uint64_t iterations_left = max_iterations; + /** The set of transactions in m_todo which have feerate > best's. */ + SetType imp = m_todo; + while (imp.Any()) { + ClusterIndex check = imp.Last(); + if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; + imp.Reset(check); + } + /** Internal function to add an item to the queue of elements to explore if there are any - * transactions left to split on, and to update best. + * transactions left to split on, possibly improving it before doing so, and to update + * best/imp. * * - inc: the "inc" value for the new work item (must be topological). * - und: the "und" value for the new work item ((inc | und) must be topological). */ auto add_fn = [&](SetInfo inc, SetType und) noexcept { + /** SetInfo object with the set whose feerate will become the new work item's + * pot_feerate. It starts off equal to inc. */ + auto pot = inc; if (!inc.feerate.IsEmpty()) { + // Add entries to pot. We iterate over all undecided transactions whose feerate is + // higher than best. While undecided transactions of lower feerate may improve pot, + // the resulting pot feerate cannot possibly exceed best's (and this item will be + // skipped in split_fn anyway). + for (auto pos : imp & und) { + // Determine if adding transaction pos to pot (ignoring topology) would improve + // it. If not, we're done updating pot. This relies on the fact that + // m_sorted_depgraph, and thus the transactions iterated over, are in decreasing + // individual feerate order. + if (!(m_sorted_depgraph.FeeRate(pos) >> pot.feerate)) break; + pot.Set(m_sorted_depgraph, pos); + } + + // The "jump ahead" optimization: whenever pot has a topologically-valid subset, + // that subset can be added to inc. Any subset of (pot - inc) has the property that + // its feerate exceeds that of any set compatible with this work item (superset of + // inc, subset of (inc | und)). Thus, if T is a topological subset of pot, and B is + // the best topologically-valid set compatible with this work item, and (T - B) is + // non-empty, then (T | B) is better than B and also topological. This is in + // contradiction with the assumption that B is best. Thus, (T - B) must be empty, + // or T must be a subset of B. + // + // See https://delvingbitcoin.org/t/how-to-linearize-your-cluster/303 section 2.4. + const auto init_inc = inc.transactions; + for (auto pos : pot.transactions - inc.transactions) { + // If the transaction's ancestors are a subset of pot, we can add it together + // with its ancestors to inc. Just update the transactions here; the feerate + // update happens below. + auto anc_todo = m_sorted_depgraph.Ancestors(pos) & m_todo; + if (anc_todo.IsSubsetOf(pot.transactions)) inc.transactions |= anc_todo; + } + // Finally update und and inc's feerate to account for the added transactions. + und -= inc.transactions; + inc.feerate += m_sorted_depgraph.FeeRate(inc.transactions - init_inc); + // If inc's feerate is better than best's, remember it as our new best. if (inc.feerate > best.feerate) { best = inc; + // See if we can remove any entries from imp now. + while (imp.Any()) { + ClusterIndex check = imp.Last(); + if (m_sorted_depgraph.FeeRate(check) >> best.feerate) break; + imp.Reset(check); + } } + + // If no potential transactions exist beyond the already included ones, no + // improvement is possible anymore. + if (pot.feerate.size == inc.feerate.size) return; + // At this point und must be non-empty. If it were empty then pot would equal inc. + Assume(und.Any()); } else { Assume(inc.transactions.None()); + // If inc is empty, we just make sure there are undecided transactions left to + // split on. + if (und.None()) return; } - // Make sure there are undecided transactions left to split on. - if (und.None()) return; - // Actually construct a new work item on the queue. Due to the switch to DFS when queue // space runs out (see below), we know that no reallocation of the queue should ever // occur. Assume(queue.size() < queue.capacity()); - queue.emplace_back(std::move(inc), std::move(und)); + queue.emplace_back(/*inc=*/std::move(inc), + /*und=*/std::move(und), + /*pot_feerate=*/std::move(pot.feerate)); }; /** Internal process function. It takes an existing work item, and splits it in two: one @@ -659,18 +818,66 @@ public: Assume(elem.inc.transactions.IsSubsetOf(m_todo) && elem.und.IsSubsetOf(m_todo)); // Included transactions cannot be undecided. Assume(!elem.inc.transactions.Overlaps(elem.und)); + // If pot is empty, then so is inc. + Assume(elem.inc.feerate.IsEmpty() == elem.pot_feerate.IsEmpty()); - // Pick the first undecided transaction as the one to split on. - const ClusterIndex split = elem.und.First(); + const ClusterIndex first = elem.und.First(); + if (!elem.inc.feerate.IsEmpty()) { + // If no undecided transactions remain with feerate higher than best, this entry + // cannot be improved beyond best. + if (!elem.und.Overlaps(imp)) return; + // We can ignore any queue item whose potential feerate isn't better than the best + // seen so far. + if (elem.pot_feerate <= best.feerate) return; + } else { + // In case inc is empty use a simpler alternative check. + if (m_sorted_depgraph.FeeRate(first) <= best.feerate) return; + } + + // Decide which transaction to split on. Splitting is how new work items are added, and + // how progress is made. One split transaction is chosen among the queue item's + // undecided ones, and: + // - A work item is (potentially) added with that transaction plus its remaining + // descendants excluded (removed from the und set). + // - A work item is (potentially) added with that transaction plus its remaining + // ancestors included (added to the inc set). + // + // To decide what to split on, consider the undecided ancestors of the highest + // individual feerate undecided transaction. Pick the one which reduces the search space + // most. Let I(t) be the size of the undecided set after including t, and E(t) the size + // of the undecided set after excluding t. Then choose the split transaction t such + // that 2^I(t) + 2^E(t) is minimal, tie-breaking by highest individual feerate for t. + ClusterIndex split = 0; + const auto select = elem.und & m_sorted_depgraph.Ancestors(first); + Assume(select.Any()); + std::optional> split_counts; + for (auto t : select) { + // Call max = max(I(t), E(t)) and min = min(I(t), E(t)). Let counts = {max,min}. + // Sorting by the tuple counts is equivalent to sorting by 2^I(t) + 2^E(t). This + // expression is equal to 2^max + 2^min = 2^max * (1 + 1/2^(max - min)). The second + // factor (1 + 1/2^(max - min)) there is in (1,2]. Thus increasing max will always + // increase it, even when min decreases. Because of this, we can first sort by max. + std::pair counts{ + (elem.und - m_sorted_depgraph.Ancestors(t)).Count(), + (elem.und - m_sorted_depgraph.Descendants(t)).Count()}; + if (counts.first < counts.second) std::swap(counts.first, counts.second); + // Remember the t with the lowest counts. + if (!split_counts.has_value() || counts < *split_counts) { + split = t; + split_counts = counts; + } + } + // Since there was at least one transaction in select, we must always find one. + Assume(split_counts.has_value()); // Add a work item corresponding to exclusion of the split transaction. - const auto& desc = m_depgraph.Descendants(split); + const auto& desc = m_sorted_depgraph.Descendants(split); add_fn(/*inc=*/elem.inc, /*und=*/elem.und - desc); // Add a work item corresponding to inclusion of the split transaction. - const auto anc = m_depgraph.Ancestors(split) & m_todo; - add_fn(/*inc=*/elem.inc.Add(m_depgraph, anc), + const auto anc = m_sorted_depgraph.Ancestors(split) & m_todo; + add_fn(/*inc=*/elem.inc.Add(m_sorted_depgraph, anc), /*und=*/elem.und - anc); // Account for the performed split. @@ -713,7 +920,9 @@ public: split_fn(std::move(elem)); } - // Return the found best set and the number of iterations performed. + // Return the found best set (converted to the original transaction indices), and the + // number of iterations performed. + best.transactions = SortedToOriginal(best.transactions); return {std::move(best), max_iterations - iterations_left}; } @@ -723,9 +932,10 @@ public: */ void MarkDone(const SetType& done) noexcept { - Assume(done.Any()); - Assume(done.IsSubsetOf(m_todo)); - m_todo -= done; + const auto done_sorted = OriginalToSorted(done); + Assume(done_sorted.Any()); + Assume(done_sorted.IsSubsetOf(m_todo)); + m_todo -= done_sorted; } }; @@ -744,7 +954,7 @@ public: * - A boolean indicating whether the result is guaranteed to be * optimal. * - * Complexity: O(N * min(max_iterations + N, 2^N)) where N=depgraph.TxCount(). + * Complexity: possibly O(N * min(max_iterations + N, sqrt(2^N))) where N=depgraph.TxCount(). */ template std::pair, bool> Linearize(const DepGraph& depgraph, uint64_t max_iterations, uint64_t rng_seed, Span old_linearization = {}) noexcept @@ -756,10 +966,20 @@ std::pair, bool> Linearize(const DepGraph& de std::vector linearization; AncestorCandidateFinder anc_finder(depgraph); - SearchCandidateFinder src_finder(depgraph, rng_seed); + std::optional> src_finder; linearization.reserve(depgraph.TxCount()); bool optimal = true; + // Treat the initialization of SearchCandidateFinder as taking N^2/64 (rounded up) iterations + // (largely due to the cost of constructing the internal sorted-by-feerate DepGraph inside + // SearchCandidateFinder), a rough approximation based on benchmark. If we don't have that + // many, don't start it. + uint64_t start_iterations = (uint64_t{depgraph.TxCount()} * depgraph.TxCount() + 63) / 64; + if (iterations_left > start_iterations) { + iterations_left -= start_iterations; + src_finder.emplace(depgraph, rng_seed); + } + /** Chunking of what remains of the old linearization. */ LinearizationChunking old_chunking(depgraph, old_linearization); @@ -772,12 +992,22 @@ std::pair, bool> Linearize(const DepGraph& de auto best = anc_finder.FindCandidateSet(); if (!best_prefix.feerate.IsEmpty() && best_prefix.feerate >= best.feerate) best = best_prefix; - // Invoke bounded search to update best, with up to half of our remaining iterations as - // limit. - uint64_t max_iterations_now = (iterations_left + 1) / 2; uint64_t iterations_done_now = 0; - std::tie(best, iterations_done_now) = src_finder.FindCandidateSet(max_iterations_now, best); - iterations_left -= iterations_done_now; + uint64_t max_iterations_now = 0; + if (src_finder) { + // Treat the invocation of SearchCandidateFinder::FindCandidateSet() as costing N/4 + // up-front (rounded up) iterations (largely due to the cost of connected-component + // splitting), a rough approximation based on benchmarks. + uint64_t base_iterations = (anc_finder.NumRemaining() + 3) / 4; + if (iterations_left > base_iterations) { + // Invoke bounded search to update best, with up to half of our remaining + // iterations as limit. + iterations_left -= base_iterations; + max_iterations_now = (iterations_left + 1) / 2; + std::tie(best, iterations_done_now) = src_finder->FindCandidateSet(max_iterations_now, best); + iterations_left -= iterations_done_now; + } + } if (iterations_done_now == max_iterations_now) { optimal = false; @@ -795,7 +1025,7 @@ std::pair, bool> Linearize(const DepGraph& de // Update state to reflect best is no longer to be linearized. anc_finder.MarkDone(best.transactions); if (anc_finder.AllDone()) break; - src_finder.MarkDone(best.transactions); + if (src_finder) src_finder->MarkDone(best.transactions); if (old_chunking.NumChunksLeft() > 0) { old_chunking.MarkDone(best.transactions); } diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 2dfdfbb41de..d91f85d867b 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -165,6 +165,23 @@ std::pair, bool> SimpleLinearize(const DepGraph +void MakeConnected(DepGraph& depgraph) +{ + auto todo = BS::Fill(depgraph.TxCount()); + auto comp = depgraph.FindConnectedComponent(todo); + Assume(depgraph.IsConnected(comp)); + todo -= comp; + while (todo.Any()) { + auto nextcomp = depgraph.FindConnectedComponent(todo); + Assume(depgraph.IsConnected(nextcomp)); + depgraph.AddDependency(comp.Last(), nextcomp.First()); + todo -= nextcomp; + comp = nextcomp; + } +} + /** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */ template SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& todo, SpanReader& reader) @@ -369,6 +386,20 @@ FUZZ_TARGET(clusterlin_components) assert(depgraph.FindConnectedComponent(todo).None()); } +FUZZ_TARGET(clusterlin_make_connected) +{ + // Verify that MakeConnected makes graphs connected. + + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + MakeConnected(depgraph); + SanityCheck(depgraph); + assert(depgraph.IsConnected()); +} + FUZZ_TARGET(clusterlin_chunking) { // Verify the correctness of the ChunkLinearization function. @@ -398,7 +429,7 @@ FUZZ_TARGET(clusterlin_chunking) SetInfo accumulator, best; for (ClusterIndex idx : linearization) { if (todo[idx]) { - accumulator |= SetInfo(depgraph, idx); + accumulator.Set(depgraph, idx); if (best.feerate.IsEmpty() || accumulator.feerate >> best.feerate) { best = accumulator; } @@ -427,6 +458,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder) while (todo.Any()) { // Call the ancestor finder's FindCandidateSet for what remains of the graph. assert(!anc_finder.AllDone()); + assert(todo.Count() == anc_finder.NumRemaining()); auto best_anc = anc_finder.FindCandidateSet(); // Sanity check the result. assert(best_anc.transactions.Any()); @@ -458,6 +490,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder) anc_finder.MarkDone(del_set); } assert(anc_finder.AllDone()); + assert(anc_finder.NumRemaining() == 0); } static constexpr auto MAX_SIMPLE_ITERATIONS = 300000; @@ -468,13 +501,17 @@ FUZZ_TARGET(clusterlin_search_finder) // and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and // AncestorCandidateFinder. - // Retrieve an RNG seed and a depgraph from the fuzz input. + // Retrieve an RNG seed, a depgraph, and whether to make it connected, from the fuzz input. SpanReader reader(buffer); DepGraph depgraph; uint64_t rng_seed{0}; + uint8_t make_connected{1}; try { - reader >> Using(depgraph) >> rng_seed; + reader >> Using(depgraph) >> rng_seed >> make_connected; } catch (const std::ios_base::failure&) {} + // The most complicated graphs are connected ones (other ones just split up). Optionally force + // the graph to be connected. + if (make_connected) MakeConnected(depgraph); // Instantiate ALL the candidate finders. SearchCandidateFinder src_finder(depgraph, rng_seed); @@ -488,6 +525,7 @@ FUZZ_TARGET(clusterlin_search_finder) assert(!smp_finder.AllDone()); assert(!exh_finder.AllDone()); assert(!anc_finder.AllDone()); + assert(anc_finder.NumRemaining() == todo.Count()); // For each iteration, read an iteration count limit from the fuzz input. uint64_t max_iterations = 1; @@ -513,9 +551,17 @@ FUZZ_TARGET(clusterlin_search_finder) assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo)); } - // At most 2^N-1 iterations can be required: the number of non-empty subsets a graph with N - // transactions has. - assert(iterations_done <= ((uint64_t{1} << todo.Count()) - 1)); + // At most 2^(N-1) iterations can be required: the maximum number of non-empty topological + // subsets a (connected) cluster with N transactions can have. Even when the cluster is no + // longer connected after removing certain transactions, this holds, because the connected + // components are searched separately. + assert(iterations_done <= (uint64_t{1} << (todo.Count() - 1))); + // Additionally, test that no more than sqrt(2^N)+1 iterations are required. This is just + // an empirical bound that seems to hold, without proof. Still, add a test for it so we + // can learn about counterexamples if they exist. + if (iterations_done >= 1 && todo.Count() <= 63) { + Assume((iterations_done - 1) * (iterations_done - 1) <= uint64_t{1} << todo.Count()); + } // Perform quality checks only if SearchCandidateFinder claims an optimal result. if (iterations_done < max_iterations) { @@ -562,6 +608,7 @@ FUZZ_TARGET(clusterlin_search_finder) assert(smp_finder.AllDone()); assert(exh_finder.AllDone()); assert(anc_finder.AllDone()); + assert(anc_finder.NumRemaining() == 0); } FUZZ_TARGET(clusterlin_linearization_chunking) @@ -621,7 +668,7 @@ FUZZ_TARGET(clusterlin_linearization_chunking) SetInfo accumulator, best; for (auto j : linearization) { if (todo[j] && !combined[j]) { - accumulator |= SetInfo(depgraph, j); + accumulator.Set(depgraph, j); if (best.feerate.IsEmpty() || accumulator.feerate > best.feerate) { best = accumulator; } @@ -685,14 +732,19 @@ FUZZ_TARGET(clusterlin_linearize) { // Verify the behavior of Linearize(). - // Retrieve an RNG seed, an iteration count, and a depgraph from the fuzz input. + // Retrieve an RNG seed, an iteration count, a depgraph, and whether to make it connected from + // the fuzz input. SpanReader reader(buffer); DepGraph depgraph; uint64_t rng_seed{0}; uint64_t iter_count{0}; + uint8_t make_connected{1}; try { - reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed; + reader >> VARINT(iter_count) >> Using(depgraph) >> rng_seed >> make_connected; } catch (const std::ios_base::failure&) {} + // The most complicated graphs are connected ones (other ones just split up). Optionally force + // the graph to be connected. + if (make_connected) MakeConnected(depgraph); // Optionally construct an old linearization for it. std::vector old_linearization; @@ -721,12 +773,24 @@ FUZZ_TARGET(clusterlin_linearize) } // If the iteration count is sufficiently high, an optimal linearization must be found. - // Each linearization step can use up to 2^k iterations, with steps k=1..n. That sum is - // 2 * (2^n - 1) + // Each linearization step can use up to 2^(k-1) iterations, with steps k=1..n. That sum is + // 2^n - 1. const uint64_t n = depgraph.TxCount(); - if (n <= 18 && iter_count > 2U * ((uint64_t{1} << n) - 1U)) { + if (n <= 19 && iter_count > (uint64_t{1} << n)) { assert(optimal); } + // Additionally, if the assumption of sqrt(2^k)+1 iterations per step holds, plus ceil(k/4) + // start-up cost per step, plus ceil(n^2/64) start-up cost overall, we can compute the upper + // bound for a whole linearization (summing for k=1..n) using the Python expression + // [sum((k+3)//4 + int(math.sqrt(2**k)) + 1 for k in range(1, n + 1)) + (n**2 + 63) // 64 for n in range(0, 35)]: + static constexpr uint64_t MAX_OPTIMAL_ITERS[] = { + 0, 4, 8, 12, 18, 26, 37, 51, 70, 97, 133, 182, 251, 346, 480, 666, 927, 1296, 1815, 2545, + 3576, 5031, 7087, 9991, 14094, 19895, 28096, 39690, 56083, 79263, 112041, 158391, 223936, + 316629, 447712 + }; + if (n < std::size(MAX_OPTIMAL_ITERS) && iter_count >= MAX_OPTIMAL_ITERS[n]) { + Assume(optimal); + } // If Linearize claims optimal result, run quality tests. if (optimal) { diff --git a/src/test/util/cluster_linearize.h b/src/test/util/cluster_linearize.h index 9477d2ed41f..b86ebcd78b2 100644 --- a/src/test/util/cluster_linearize.h +++ b/src/test/util/cluster_linearize.h @@ -102,7 +102,7 @@ bool IsAcyclic(const DepGraph& depgraph) noexcept struct DepGraphFormatter { /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */ - static uint64_t SignedToUnsigned(int64_t x) noexcept + [[maybe_unused]] static uint64_t SignedToUnsigned(int64_t x) noexcept { if (x < 0) { return 2 * uint64_t(-(x + 1)) + 1; @@ -112,7 +112,7 @@ struct DepGraphFormatter } /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */ - static int64_t UnsignedToSigned(uint64_t x) noexcept + [[maybe_unused]] static int64_t UnsignedToSigned(uint64_t x) noexcept { if (x & 1) { return -int64_t(x / 2) - 1; @@ -186,7 +186,7 @@ struct DepGraphFormatter /** The dependency graph which we deserialize into first, with transactions in * topological serialization order, not original cluster order. */ DepGraph topo_depgraph; - /** Mapping from cluster order to serialization order, used later to reconstruct the + /** Mapping from serialization order to cluster order, used later to reconstruct the * cluster order. */ std::vector reordering; @@ -205,9 +205,9 @@ struct DepGraphFormatter coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC. static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000); auto fee = UnsignedToSigned(coded_fee); - // Extend topo_depgraph with the new transaction (at the end). + // Extend topo_depgraph with the new transaction (preliminarily at the end). auto topo_idx = topo_depgraph.AddTransaction({fee, size}); - reordering.push_back(topo_idx); + reordering.push_back(reordering.size()); // Read dependency information. uint64_t diff = 0; //!< How many potential parents we have to skip. s >> VARINT(diff); @@ -226,31 +226,23 @@ struct DepGraphFormatter --diff; } } - // If we reach this point, we can interpret the remaining skip value as how far from the - // end of reordering topo_idx should be placed (wrapping around), so move it to its - // correct location. The preliminary reordering.push_back(topo_idx) above was to make - // sure that if a deserialization exception occurs, topo_idx still appears somewhere. + // If we reach this point, we can interpret the remaining skip value as how far + // from the end of reordering the new transaction should be placed (wrapping + // around), so remove the preliminary position it was put in above (which was to + // make sure that if a deserialization exception occurs, the new transaction still + // has some entry in reordering). reordering.pop_back(); - reordering.insert(reordering.end() - (diff % (reordering.size() + 1)), topo_idx); + ClusterIndex insert_distance = diff % (reordering.size() + 1); + // And then update reordering to reflect this new transaction's insertion. + for (auto& pos : reordering) { + pos += (pos >= reordering.size() - insert_distance); + } + reordering.push_back(reordering.size() - insert_distance); } } catch (const std::ios_base::failure&) {} // Construct the original cluster order depgraph. - depgraph = {}; - // Add transactions to depgraph in the original cluster order. - for (auto topo_idx : reordering) { - depgraph.AddTransaction(topo_depgraph.FeeRate(topo_idx)); - } - // Translate dependencies from topological to cluster order. - for (ClusterIndex idx = 0; idx < reordering.size(); ++idx) { - ClusterIndex topo_idx = reordering[idx]; - for (ClusterIndex dep_idx = 0; dep_idx < reordering.size(); ++dep_idx) { - ClusterIndex dep_topo_idx = reordering[dep_idx]; - if (topo_depgraph.Ancestors(topo_idx)[dep_topo_idx]) { - depgraph.AddDependency(dep_idx, idx); - } - } - } + depgraph = DepGraph(topo_depgraph, reordering); } };