mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-06-04 02:02:42 +02:00
Merge bitcoin/bitcoin#34208: bench: add fluent API for untimed setup steps in nanobench
8825051e08refactor: improve benchmark setup and execution for various tests (Lőrinc)83b8528ddbbench: add fluent API for untimed setup steps in `nanobench` (Lőrinc) Pull request description: ### Context As described in https://github.com/martinus/nanobench/issues/130, we have a few benchmarks where we have to reset the state between runs; otherwise, the repetitions will do something different than the first iteration. ### Upstream I have opened a PR to `nanobench` to introduce an untimed setup phase, see: https://github.com/martinus/nanobench/pull/136 ### Tests Tests were only added upstream. It would be a bit awkward to wire them into `nanobench.h` outside the benchmarking setup:58350cfe59 (diff-88160f647ce57661afe7d755fa70a5fa342a2b79d72d3511596878e69ed5cdc3)### Fix I have moved the changes here as well and applied them to a few simple benchmarks as a demonstration. We can revert the ones that are controversial and add others in follow-ups. This PR is mostly meant to add the `setup` feature. ### Benchmarks Most benchmarks show a modest "speedup"; others a "slowdown" - but it's only the effect of the setup that's not measured anymore - and a `run` phase that does the same operation in each epoch iteration (wallet benchmark changes were reverted for simplicity): <img width="1496" height="882" alt="image" src="https://github.com/user-attachments/assets/34c14565-f3df-41e5-9a86-95b2ca21703a" /> ACKs for top commit: achow101: ACK8825051e08janb84: re ACK8825051e08sedited: ACK8825051e08Tree-SHA512: b3e385abcfca013a21b3785b0b837c2b61e302d71a098dadcd8d2f0cb42f6bbf4a222299771443f095962d1b24e696d5684f2b8efdb6f63f2f939699961cdf0d
This commit is contained in:
@@ -161,18 +161,13 @@ static void AddrManAddThenGood(benchmark::Bench& bench)
|
||||
|
||||
CreateAddresses();
|
||||
|
||||
bench.run([&] {
|
||||
// To make the benchmark independent of the number of evaluations, we always prepare a new addrman.
|
||||
// This is necessary because AddrMan::Good() method modifies the object, affecting the timing of subsequent calls
|
||||
// to the same method and we want to do the same amount of work in every loop iteration.
|
||||
//
|
||||
// This has some overhead (exactly the result of AddrManAdd benchmark), but that overhead is constant so improvements in
|
||||
// AddrMan::Good() will still be noticeable.
|
||||
AddrMan addrman{EMPTY_NETGROUPMAN, /*deterministic=*/false, ADDRMAN_CONSISTENCY_CHECK_RATIO};
|
||||
AddAddressesToAddrMan(addrman);
|
||||
|
||||
markSomeAsGood(addrman);
|
||||
});
|
||||
std::optional<AddrMan> addrman;
|
||||
bench.epochIterations(1)
|
||||
.setup([&] {
|
||||
addrman.emplace(EMPTY_NETGROUPMAN, /*deterministic=*/false, ADDRMAN_CONSISTENCY_CHECK_RATIO);
|
||||
AddAddressesToAddrMan(*addrman);
|
||||
})
|
||||
.run([&] { markSomeAsGood(*addrman); });
|
||||
}
|
||||
|
||||
BENCHMARK(AddrManAdd);
|
||||
|
||||
@@ -27,38 +27,30 @@
|
||||
|
||||
static void DeserializeBlockTest(benchmark::Bench& bench)
|
||||
{
|
||||
DataStream stream(benchmark::data::block413567);
|
||||
std::byte a{0};
|
||||
stream.write({&a, 1}); // Prevent compaction
|
||||
|
||||
bench.unit("block").run([&] {
|
||||
CBlock block;
|
||||
stream >> TX_WITH_WITNESS(block);
|
||||
bool rewound = stream.Rewind(benchmark::data::block413567.size());
|
||||
assert(rewound);
|
||||
});
|
||||
DataStream stream;
|
||||
bench.unit("block").epochIterations(1)
|
||||
.setup([&] { stream = DataStream{benchmark::data::block413567}; })
|
||||
.run([&] { CBlock block; stream >> TX_WITH_WITNESS(block); });
|
||||
}
|
||||
|
||||
static void DeserializeAndCheckBlockTest(benchmark::Bench& bench)
|
||||
static void CheckBlockTest(benchmark::Bench& bench)
|
||||
{
|
||||
DataStream stream(benchmark::data::block413567);
|
||||
std::byte a{0};
|
||||
stream.write({&a, 1}); // Prevent compaction
|
||||
|
||||
ArgsManager bench_args;
|
||||
const auto chainParams = CreateChainParams(bench_args, ChainType::MAIN);
|
||||
|
||||
bench.unit("block").run([&] {
|
||||
CBlock block; // Note that CBlock caches its checked state, so we need to recreate it here
|
||||
stream >> TX_WITH_WITNESS(block);
|
||||
bool rewound = stream.Rewind(benchmark::data::block413567.size());
|
||||
assert(rewound);
|
||||
|
||||
BlockValidationState validationState;
|
||||
bool checked = CheckBlock(block, validationState, chainParams->GetConsensus());
|
||||
assert(checked);
|
||||
});
|
||||
CBlock block;
|
||||
bench.unit("block").epochIterations(1)
|
||||
.setup([&] {
|
||||
block = CBlock{};
|
||||
DataStream stream{benchmark::data::block413567};
|
||||
stream >> TX_WITH_WITNESS(block);
|
||||
})
|
||||
.run([&] {
|
||||
BlockValidationState validationState;
|
||||
bool checked = CheckBlock(block, validationState, chainParams->GetConsensus());
|
||||
assert(checked);
|
||||
});
|
||||
}
|
||||
|
||||
BENCHMARK(DeserializeBlockTest);
|
||||
BENCHMARK(DeserializeAndCheckBlockTest);
|
||||
BENCHMARK(CheckBlockTest);
|
||||
|
||||
@@ -124,17 +124,14 @@ static CAmount make_hard_case(int utxos, std::vector<OutputGroup>& utxo_pool)
|
||||
|
||||
static void BnBExhaustion(benchmark::Bench& bench)
|
||||
{
|
||||
// Setup
|
||||
std::vector<OutputGroup> utxo_pool;
|
||||
|
||||
bench.run([&] {
|
||||
// Benchmark
|
||||
CAmount target = make_hard_case(17, utxo_pool);
|
||||
(void)SelectCoinsBnB(utxo_pool, target, /*cost_of_change=*/0, MAX_STANDARD_TX_WEIGHT); // Should exhaust
|
||||
|
||||
// Cleanup
|
||||
utxo_pool.clear();
|
||||
});
|
||||
CAmount target;
|
||||
bench.epochIterations(1)
|
||||
.setup([&] { target = make_hard_case(17, utxo_pool); })
|
||||
.run([&] {
|
||||
auto res{SelectCoinsBnB(utxo_pool, target, /*cost_of_change=*/0, MAX_STANDARD_TX_WEIGHT)}; // Should exhaust
|
||||
ankerl::nanobench::doNotOptimizeAway(res);
|
||||
});
|
||||
}
|
||||
|
||||
BENCHMARK(CoinSelection);
|
||||
|
||||
@@ -62,12 +62,17 @@ static void LoadExternalBlockFile(benchmark::Bench& bench)
|
||||
|
||||
std::multimap<uint256, FlatFilePos> blocks_with_unknown_parent;
|
||||
FlatFilePos pos;
|
||||
bench.run([&] {
|
||||
// "rb" is "binary, O_RDONLY", positioned to the start of the file.
|
||||
// The file will be closed by LoadExternalBlockFile().
|
||||
AutoFile file{fsbridge::fopen(blkfile, "rb")};
|
||||
testing_setup->m_node.chainman->LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent);
|
||||
});
|
||||
bench.epochIterations(1)
|
||||
.setup([&] {
|
||||
blocks_with_unknown_parent.clear();
|
||||
pos = FlatFilePos{};
|
||||
})
|
||||
.run([&] {
|
||||
// "rb" is "binary, O_RDONLY", positioned to the start of the file.
|
||||
// The file will be closed by LoadExternalBlockFile().
|
||||
AutoFile file{fsbridge::fopen(blkfile, "rb")};
|
||||
testing_setup->m_node.chainman->LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent);
|
||||
});
|
||||
fs::remove(blkfile);
|
||||
}
|
||||
|
||||
|
||||
@@ -137,6 +137,11 @@ class Result;
|
||||
class Rng;
|
||||
class BigO;
|
||||
|
||||
namespace detail {
|
||||
template <typename SetupOp>
|
||||
class SetupRunner;
|
||||
} // namespace detail
|
||||
|
||||
/**
|
||||
* @brief Renders output from a mustache-like template and benchmark results.
|
||||
*
|
||||
@@ -819,7 +824,7 @@ public:
|
||||
/**
|
||||
* @brief Minimum time each epoch should take.
|
||||
*
|
||||
* Default is zero, so we are fully relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
|
||||
* Default is 1ms, so we are mostly relying on clockResolutionMultiple(). In most cases this is exactly what you want. If you see
|
||||
* that the evaluation is unreliable with a high `err%`, you can increase either minEpochTime() or minEpochIterations().
|
||||
*
|
||||
* @see maxEpochTime, minEpochIterations
|
||||
@@ -1007,7 +1012,21 @@ public:
|
||||
Bench& config(Config const& benchmarkConfig);
|
||||
ANKERL_NANOBENCH(NODISCARD) Config const& config() const noexcept;
|
||||
|
||||
/**
|
||||
* @brief Configure an untimed setup step per epoch (fluent API).
|
||||
*
|
||||
* Example: `bench.setup(...).run(...);`
|
||||
*/
|
||||
template <typename SetupOp>
|
||||
detail::SetupRunner<SetupOp> setup(SetupOp setupOp);
|
||||
|
||||
private:
|
||||
template <typename SetupOp, typename Op>
|
||||
Bench& runImpl(SetupOp& setupOp, Op&& op);
|
||||
|
||||
template <typename SetupOp>
|
||||
friend class detail::SetupRunner;
|
||||
|
||||
Config mConfig{};
|
||||
std::vector<Result> mResults{};
|
||||
};
|
||||
@@ -1207,14 +1226,44 @@ constexpr uint64_t Rng::rotl(uint64_t x, unsigned k) noexcept {
|
||||
return (x << k) | (x >> (64U - k));
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
|
||||
template <typename SetupOp>
|
||||
class SetupRunner {
|
||||
public:
|
||||
explicit SetupRunner(SetupOp setupOp, Bench& bench)
|
||||
: mSetupOp(std::move(setupOp))
|
||||
, mBench(bench) {}
|
||||
|
||||
template <typename Op>
|
||||
ANKERL_NANOBENCH_NO_SANITIZE("integer")
|
||||
Bench& run(Op&& op) {
|
||||
return mBench.runImpl(mSetupOp, std::forward<Op>(op));
|
||||
}
|
||||
|
||||
private:
|
||||
SetupOp mSetupOp;
|
||||
Bench& mBench;
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <typename Op>
|
||||
ANKERL_NANOBENCH_NO_SANITIZE("integer")
|
||||
Bench& Bench::run(Op&& op) {
|
||||
auto setupOp = [] {};
|
||||
return runImpl(setupOp, std::forward<Op>(op));
|
||||
}
|
||||
|
||||
template <typename SetupOp, typename Op>
|
||||
ANKERL_NANOBENCH_NO_SANITIZE("integer")
|
||||
Bench& Bench::runImpl(SetupOp& setupOp, Op&& op) {
|
||||
// It is important that this method is kept short so the compiler can do better optimizations/ inlining of op()
|
||||
detail::IterationLogic iterationLogic(*this);
|
||||
auto& pc = detail::performanceCounters();
|
||||
|
||||
while (auto n = iterationLogic.numIters()) {
|
||||
setupOp();
|
||||
|
||||
pc.beginMeasure();
|
||||
Clock::time_point const before = Clock::now();
|
||||
while (n-- > 0) {
|
||||
@@ -1229,6 +1278,11 @@ Bench& Bench::run(Op&& op) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename SetupOp>
|
||||
detail::SetupRunner<SetupOp> Bench::setup(SetupOp setupOp) {
|
||||
return detail::SetupRunner<SetupOp>(std::move(setupOp), *this);
|
||||
}
|
||||
|
||||
// Performs all evaluations.
|
||||
template <typename Op>
|
||||
Bench& Bench::run(char const* benchmarkName, Op&& op) {
|
||||
|
||||
@@ -22,10 +22,9 @@ static void FindByte(benchmark::Bench& bench)
|
||||
file.seek(0, SEEK_SET);
|
||||
BufferedFile bf{file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size};
|
||||
|
||||
bench.run([&] {
|
||||
bf.SetPos(0);
|
||||
bf.FindByte(std::byte(1));
|
||||
});
|
||||
bench.epochIterations(1)
|
||||
.setup([&] { bf.SetPos(0); })
|
||||
.run([&] { bf.FindByte(std::byte(1)); });
|
||||
|
||||
assert(file.fclose() == 0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user