mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-07-04 12:42:05 +02:00
Merge #13191: Specialized double-SHA256 with 64 byte inputs with SSE4.1 and AVX2
4defdfab94
[MOVEONLY] Move unused Merkle branch code to tests (Pieter Wuille)4437d6e1f3
8-way AVX2 implementation for double SHA256 on 64-byte inputs (Pieter Wuille)230294bf5f
4-way SSE4.1 implementation for double SHA256 on 64-byte inputs (Pieter Wuille)1f0e7ca09c
Use SHA256D64 in Merkle root computation (Pieter Wuille)d0c9632883
Specialized double sha256 for 64 byte inputs (Pieter Wuille)57f34630fb
Refactor SHA256 code (Pieter Wuille)0df017889b
Benchmark Merkle root computation (Pieter Wuille) Pull request description: This introduces a framework for specialized double-SHA256 with 64 byte inputs. 4 different implementations are provided: * Generic C++ (reusing the normal SHA256 code) * Specialized C++ for 64-byte inputs, but no special instructions * 4-way using SSE4.1 intrinsics * 8-way using AVX2 intrinsics On my own system (AVX2 capable), I get these benchmarks for computing the Merkle root of 9001 leaves (supported lengths / special instructions / parallellism): * 7.2 ms with varsize/naive/1way (master, non-SSE4 hardware) * 5.8 ms with size64/naive/1way (this PR, non-SSE4 capable systems) * 4.8 ms with varsize/SSE4/1way (master, SSE4 hardware) * 2.9 ms with size64/SSE4/4way (this PR, SSE4 hardware) * 1.1 ms with size64/AVX2/8way (this PR, AVX2 hardware) Tree-SHA512: efa32d48b32820d9ce788ead4eb583949265be8c2e5f538c94bc914e92d131a57f8c1ee26c6f998e81fb0e30675d4e2eddc3360bcf632676249036018cff343e
This commit is contained in:
44
configure.ac
44
configure.ac
@ -312,6 +312,8 @@ fi
|
||||
# be compiled with them, rather that specific objects/libs may use them after checking for runtime
|
||||
# compatibility.
|
||||
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
|
||||
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
|
||||
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
|
||||
|
||||
TEMP_CXXFLAGS="$CXXFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
|
||||
@ -335,6 +337,44 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
)
|
||||
CXXFLAGS="$TEMP_CXXFLAGS"
|
||||
|
||||
TEMP_CXXFLAGS="$CXXFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
|
||||
AC_MSG_CHECKING(for SSE4.1 intrinsics)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__GNUC__)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
]],[[
|
||||
__m128i l = _mm_set1_epi32(0);
|
||||
return _mm_extract_epi32(l, 3);
|
||||
]])],
|
||||
[ AC_MSG_RESULT(yes); enable_sse41=yes; AC_DEFINE(ENABLE_SSE41, 1, [Define this symbol to build code that uses SSE4.1 intrinsics]) ],
|
||||
[ AC_MSG_RESULT(no)]
|
||||
)
|
||||
CXXFLAGS="$TEMP_CXXFLAGS"
|
||||
|
||||
TEMP_CXXFLAGS="$CXXFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
|
||||
AC_MSG_CHECKING(for AVX2 intrinsics)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__GNUC__) && defined(__AVX2__)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
]],[[
|
||||
__m256i l = _mm256_set1_epi32(0);
|
||||
return _mm256_extract_epi32(l, 7);
|
||||
]])],
|
||||
[ AC_MSG_RESULT(yes); enable_avx2=yes; AC_DEFINE(ENABLE_AVX2, 1, [Define this symbol to build code that uses AVX2 intrinsics]) ],
|
||||
[ AC_MSG_RESULT(no)]
|
||||
)
|
||||
CXXFLAGS="$TEMP_CXXFLAGS"
|
||||
|
||||
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
|
||||
|
||||
AC_ARG_WITH([utils],
|
||||
@ -1253,6 +1293,8 @@ AM_CONDITIONAL([USE_LCOV],[test x$use_lcov = xyes])
|
||||
AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
|
||||
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
|
||||
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
|
||||
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
|
||||
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
|
||||
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
|
||||
|
||||
AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
|
||||
@ -1295,6 +1337,8 @@ AC_SUBST(PIE_FLAGS)
|
||||
AC_SUBST(SANITIZER_CXXFLAGS)
|
||||
AC_SUBST(SANITIZER_LDFLAGS)
|
||||
AC_SUBST(SSE42_CXXFLAGS)
|
||||
AC_SUBST(SSE41_CXXFLAGS)
|
||||
AC_SUBST(AVX2_CXXFLAGS)
|
||||
AC_SUBST(LIBTOOL_APP_LDFLAGS)
|
||||
AC_SUBST(USE_UPNP)
|
||||
AC_SUBST(USE_QRCODE)
|
||||
|
Reference in New Issue
Block a user