mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-01-21 15:50:07 +01:00
Merge bitcoin/bitcoin#27598: bench: Add SHA256 implementation specific benchmarks
ce6df7df9bbench: Add SHA256 implementation specific benchmarks (Hennadii Stepanov)5f72417176Add ability to specify SHA256 implementation for benchmark purposes (Hennadii Stepanov) Pull request description: On the master branch, only the best available `SHA256` implementation is being benchmarked. This PR makes `bench_bitcoin` benchmark all `SHA256` implementations that are available on the system. For example: - on Linux: ``` $ ./src/bench/bench_bitcoin -filter=SHA.* Using the 'x86_shani(1way,2way)' SHA256 implementation | ns/byte | byte/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1.00 | 1,002,545,462.93 | 0.4% | 0.01 | `SHA1` | 2.91 | 344,117,991.18 | 0.1% | 0.03 | `SHA256 using the 'standard' SHA256 implementation` | 2.21 | 453,081,794.40 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 2.21 | 453,396,506.58 | 0.1% | 0.02 | `SHA256 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 0.53 | 1,870,520,687.49 | 0.1% | 0.01 | `SHA256 using the 'x86_shani(1way,2way)' SHA256 implementation` | 7.90 | 126,627,134.33 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation` | 3.94 | 253,850,206.07 | 0.0% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 1.40 | 716,247,553.38 | 0.4% | 0.01 | `SHA256D64_1024 using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 1.26 | 792,706,270.13 | 0.9% | 0.01 | `SHA256D64_1024 using the 'x86_shani(1way,2way)' SHA256 implementation` | 6.75 | 148,172,097.64 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation` | 4.90 | 204,156,289.96 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way)' SHA256 implementation` | 4.90 | 204,101,274.22 | 0.1% | 0.01 | `SHA256_32b using the 'sse4(1way),sse41(4way),avx2(8way)' SHA256 implementation` | 1.70 | 589,052,595.35 | 0.4% | 0.01 | `SHA256_32b using the 'x86_shani(1way,2way)' SHA256 implementation` | 2.21 | 453,441,736.14 | 1.0% | 0.02 | `SHA3_256_1M` | 1.92 | 521,807,101.48 | 1.0% | 0.02 | `SHA512` ``` - on macOS (M1): ``` % ./src/bench/bench_bitcoin -filter=SHA.\* Using the 'arm_shani(1way,2way)' SHA256 implementation | ns/byte | byte/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1.36 | 737,644,274.00 | 0.6% | 0.02 | `SHA1` | 3.08 | 324,556,777.15 | 0.2% | 0.03 | `SHA256 using the 'standard' SHA256 implementation` | 0.45 | 2,198,104,135.18 | 0.3% | 0.01 | `SHA256 using the 'arm_shani(1way,2way)' SHA256 implementation` | 8.84 | 113,131,299.18 | 0.0% | 0.01 | `SHA256D64_1024 using the 'standard' SHA256 implementation` | 0.94 | 1,059,406,239.36 | 0.0% | 0.01 | `SHA256D64_1024 using the 'arm_shani(1way,2way)' SHA256 implementation` | 6.17 | 162,050,659.51 | 0.2% | 0.01 | `SHA256_32b using the 'standard' SHA256 implementation` | 1.15 | 866,637,155.98 | 0.0% | 0.01 | `SHA256_32b using the 'arm_shani(1way,2way)' SHA256 implementation` | 1.69 | 592,636,491.59 | 0.2% | 0.02 | `SHA3_256_1M` | 1.89 | 528,785,775.66 | 0.0% | 0.02 | `SHA512` ``` Found it useful, while working on https://github.com/bitcoin/bitcoin/pull/24773. ACKs for top commit: martinus: ACKce6df7df9b. I would have created a helper function in the test to avoid the code duplication for each test, but that's just me nitpicking. Here are results from my Ryzen 7950X, with `./src/bench/bench_bitcoin -filter="SHA256.*" -min-time=1000`: MarcoFalke: review ACKce6df7df9b🏵 sipa: ACKce6df7df9bTree-SHA512: e3de50e11b9a3a0d1e05583786041d4dc9afa2022e2115d75d6d1f63b11f62f6336f093001e53a631431d558c4dae29c596755c9e2d6aa78c382270116cc1f7f
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
#include <crypto/siphash.h>
|
||||
#include <hash.h>
|
||||
#include <random.h>
|
||||
#include <tinyformat.h>
|
||||
#include <uint256.h>
|
||||
|
||||
/* Number of bytes to hash per iteration */
|
||||
@@ -36,13 +37,48 @@ static void SHA1(benchmark::Bench& bench)
|
||||
});
|
||||
}
|
||||
|
||||
static void SHA256(benchmark::Bench& bench)
|
||||
static void SHA256_STANDARD(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
|
||||
uint8_t hash[CSHA256::OUTPUT_SIZE];
|
||||
std::vector<uint8_t> in(BUFFER_SIZE,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256().Write(in.data(), in.size()).Finalize(hash);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256_SSE4(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
|
||||
uint8_t hash[CSHA256::OUTPUT_SIZE];
|
||||
std::vector<uint8_t> in(BUFFER_SIZE,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256().Write(in.data(), in.size()).Finalize(hash);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256_AVX2(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
|
||||
uint8_t hash[CSHA256::OUTPUT_SIZE];
|
||||
std::vector<uint8_t> in(BUFFER_SIZE,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256().Write(in.data(), in.size()).Finalize(hash);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256_SHANI(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
|
||||
uint8_t hash[CSHA256::OUTPUT_SIZE];
|
||||
std::vector<uint8_t> in(BUFFER_SIZE,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256().Write(in.data(), in.size()).Finalize(hash);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA3_256_1M(benchmark::Bench& bench)
|
||||
@@ -54,22 +90,92 @@ static void SHA3_256_1M(benchmark::Bench& bench)
|
||||
});
|
||||
}
|
||||
|
||||
static void SHA256_32b(benchmark::Bench& bench)
|
||||
static void SHA256_32b_STANDARD(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
|
||||
std::vector<uint8_t> in(32,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256()
|
||||
.Write(in.data(), in.size())
|
||||
.Finalize(in.data());
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256D64_1024(benchmark::Bench& bench)
|
||||
static void SHA256_32b_SSE4(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
|
||||
std::vector<uint8_t> in(32,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256()
|
||||
.Write(in.data(), in.size())
|
||||
.Finalize(in.data());
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256_32b_AVX2(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
|
||||
std::vector<uint8_t> in(32,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256()
|
||||
.Write(in.data(), in.size())
|
||||
.Finalize(in.data());
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256_32b_SHANI(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
|
||||
std::vector<uint8_t> in(32,0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
CSHA256()
|
||||
.Write(in.data(), in.size())
|
||||
.Finalize(in.data());
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256D64_1024_STANDARD(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::STANDARD)));
|
||||
std::vector<uint8_t> in(64 * 1024, 0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
SHA256D64(in.data(), in.data(), 1024);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256D64_1024_SSE4(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4)));
|
||||
std::vector<uint8_t> in(64 * 1024, 0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
SHA256D64(in.data(), in.data(), 1024);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256D64_1024_AVX2(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_AVX2)));
|
||||
std::vector<uint8_t> in(64 * 1024, 0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
SHA256D64(in.data(), in.data(), 1024);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA256D64_1024_SHANI(benchmark::Bench& bench)
|
||||
{
|
||||
bench.name(strprintf("%s using the '%s' SHA256 implementation", __func__, SHA256AutoDetect(sha256_implementation::USE_SSE4_AND_SHANI)));
|
||||
std::vector<uint8_t> in(64 * 1024, 0);
|
||||
bench.batch(in.size()).unit("byte").run([&] {
|
||||
SHA256D64(in.data(), in.data(), 1024);
|
||||
});
|
||||
SHA256AutoDetect();
|
||||
}
|
||||
|
||||
static void SHA512(benchmark::Bench& bench)
|
||||
@@ -152,13 +258,22 @@ static void MuHashPrecompute(benchmark::Bench& bench)
|
||||
|
||||
BENCHMARK(BenchRIPEMD160, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA1, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_STANDARD, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_SSE4, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_AVX2, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_SHANI, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA512, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA3_256_1M, benchmark::PriorityLevel::HIGH);
|
||||
|
||||
BENCHMARK(SHA256_32b, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_32b_STANDARD, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_32b_SSE4, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_32b_AVX2, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256_32b_SHANI, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SipHash_32b, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256D64_1024, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256D64_1024_STANDARD, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256D64_1024_SSE4, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256D64_1024_AVX2, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(SHA256D64_1024_SHANI, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(FastRandom_32bit, benchmark::PriorityLevel::HIGH);
|
||||
BENCHMARK(FastRandom_1bit, benchmark::PriorityLevel::HIGH);
|
||||
|
||||
|
||||
@@ -579,9 +579,15 @@ bool AVXEnabled()
|
||||
} // namespace
|
||||
|
||||
|
||||
std::string SHA256AutoDetect()
|
||||
std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation)
|
||||
{
|
||||
std::string ret = "standard";
|
||||
Transform = sha256::Transform;
|
||||
TransformD64 = sha256::TransformD64;
|
||||
TransformD64_2way = nullptr;
|
||||
TransformD64_4way = nullptr;
|
||||
TransformD64_8way = nullptr;
|
||||
|
||||
#if defined(USE_ASM) && defined(HAVE_GETCPUID)
|
||||
bool have_sse4 = false;
|
||||
bool have_xsave = false;
|
||||
@@ -592,7 +598,9 @@ std::string SHA256AutoDetect()
|
||||
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
GetCPUID(1, 0, eax, ebx, ecx, edx);
|
||||
have_sse4 = (ecx >> 19) & 1;
|
||||
if (use_implementation & sha256_implementation::USE_SSE4) {
|
||||
have_sse4 = (ecx >> 19) & 1;
|
||||
}
|
||||
have_xsave = (ecx >> 27) & 1;
|
||||
have_avx = (ecx >> 28) & 1;
|
||||
if (have_xsave && have_avx) {
|
||||
@@ -600,8 +608,12 @@ std::string SHA256AutoDetect()
|
||||
}
|
||||
if (have_sse4) {
|
||||
GetCPUID(7, 0, eax, ebx, ecx, edx);
|
||||
have_avx2 = (ebx >> 5) & 1;
|
||||
have_x86_shani = (ebx >> 29) & 1;
|
||||
if (use_implementation & sha256_implementation::USE_AVX2) {
|
||||
have_avx2 = (ebx >> 5) & 1;
|
||||
}
|
||||
if (use_implementation & sha256_implementation::USE_SHANI) {
|
||||
have_x86_shani = (ebx >> 29) & 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(ENABLE_X86_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
|
||||
@@ -637,27 +649,28 @@ std::string SHA256AutoDetect()
|
||||
|
||||
#if defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
|
||||
bool have_arm_shani = false;
|
||||
|
||||
if (use_implementation & sha256_implementation::USE_SHANI) {
|
||||
#if defined(__linux__)
|
||||
#if defined(__arm__) // 32-bit
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
|
||||
have_arm_shani = true;
|
||||
}
|
||||
if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
|
||||
have_arm_shani = true;
|
||||
}
|
||||
#endif
|
||||
#if defined(__aarch64__) // 64-bit
|
||||
if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
|
||||
have_arm_shani = true;
|
||||
}
|
||||
if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
|
||||
have_arm_shani = true;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MAC_OSX)
|
||||
int val = 0;
|
||||
size_t len = sizeof(val);
|
||||
if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
|
||||
have_arm_shani = val != 0;
|
||||
}
|
||||
int val = 0;
|
||||
size_t len = sizeof(val);
|
||||
if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
|
||||
have_arm_shani = val != 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (have_arm_shani) {
|
||||
Transform = sha256_arm_shani::Transform;
|
||||
|
||||
@@ -26,10 +26,22 @@ public:
|
||||
CSHA256& Reset();
|
||||
};
|
||||
|
||||
namespace sha256_implementation {
|
||||
enum UseImplementation : uint8_t {
|
||||
STANDARD = 0,
|
||||
USE_SSE4 = 1 << 0,
|
||||
USE_AVX2 = 1 << 1,
|
||||
USE_SHANI = 1 << 2,
|
||||
USE_SSE4_AND_AVX2 = USE_SSE4 | USE_AVX2,
|
||||
USE_SSE4_AND_SHANI = USE_SSE4 | USE_SHANI,
|
||||
USE_ALL = USE_SSE4 | USE_AVX2 | USE_SHANI,
|
||||
};
|
||||
}
|
||||
|
||||
/** Autodetect the best available SHA256 implementation.
|
||||
* Returns the name of the implementation.
|
||||
*/
|
||||
std::string SHA256AutoDetect();
|
||||
std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implementation = sha256_implementation::USE_ALL);
|
||||
|
||||
/** Compute multiple double-SHA256's of 64-byte blobs.
|
||||
* output: pointer to a blocks*32 byte output buffer
|
||||
|
||||
Reference in New Issue
Block a user