mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-01-18 22:35:39 +01:00
Merge #13386: SHA256 implementations based on Intel SHA Extensions
66b2cf1ccfUse immintrin.h everywhere for intrinsics (Pieter Wuille)4c935e2eeeAdd SHA256 implementation using using Intel SHA intrinsics (Pieter Wuille)268400d318[Refactor] CPU feature detection logic for SHA256 (Pieter Wuille) Pull request description: Based on #13191. This adds SHA256 implementations that use Intel's SHA Extension instructions (using intrinsics). This needs GCC 4.9 or Clang 3.4. In addition to #13191, two extra implementations are provided: * (a) A variable-length SHA256 implementation using SHA extensions. * (b) A 2-way 64-byte input double-SHA256 implementation using SHA extensions. Benchmarks for 9001-element Merkle tree root computation on an AMD Ryzen 1800X system: * Using generic C++ code (pre-#10821): 6.1ms * Using SSE4 (master, #10821): 4.6ms * Using 4-way SSE4 specialized for 64-byte inputs (#13191): 2.8ms * Using 8-way AVX2 specialized for 64-byte inputs (#13191): 2.1ms * Using 2-way SHA-NI specialized for 64-byte inputs (this PR): 0.56ms Benchmarks for 32-byte SHA256 on the same system: * Using SSE4 (master, #10821): 190ns * Using SHA-NI (this PR): 53ns Benchmarks for 1000000-byte SHA256 on the same system: * Using SSE4 (master, #10821): 2.5ms * Using SHA-NI (this PR): 0.51ms Tree-SHA512: 2b319e33b22579f815d91f9daf7994a5e1e799c4f73c13e15070dd54ba71f3f6438ccf77ae9cbd1ce76f972d9cbeb5f0edfea3d86f101bbc1055db70e42743b7
This commit is contained in:
28
configure.ac
28
configure.ac
@@ -320,6 +320,7 @@ fi
|
||||
AX_CHECK_COMPILE_FLAG([-msse4.2],[[SSE42_CXXFLAGS="-msse4.2"]],,[[$CXXFLAG_WERROR]])
|
||||
AX_CHECK_COMPILE_FLAG([-msse4.1],[[SSE41_CXXFLAGS="-msse4.1"]],,[[$CXXFLAG_WERROR]])
|
||||
AX_CHECK_COMPILE_FLAG([-mavx -mavx2],[[AVX2_CXXFLAGS="-mavx -mavx2"]],,[[$CXXFLAG_WERROR]])
|
||||
AX_CHECK_COMPILE_FLAG([-msse4 -msha],[[SHANI_CXXFLAGS="-msse4 -msha"]],,[[$CXXFLAG_WERROR]])
|
||||
|
||||
TEMP_CXXFLAGS="$CXXFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $SSE42_CXXFLAGS"
|
||||
@@ -348,11 +349,7 @@ CXXFLAGS="$CXXFLAGS $SSE41_CXXFLAGS"
|
||||
AC_MSG_CHECKING(for SSE4.1 intrinsics)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__GNUC__)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
]],[[
|
||||
__m128i l = _mm_set1_epi32(0);
|
||||
return _mm_extract_epi32(l, 3);
|
||||
@@ -367,11 +364,7 @@ CXXFLAGS="$CXXFLAGS $AVX2_CXXFLAGS"
|
||||
AC_MSG_CHECKING(for AVX2 intrinsics)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdint.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__GNUC__) && defined(__AVX2__)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
]],[[
|
||||
__m256i l = _mm256_set1_epi32(0);
|
||||
return _mm256_extract_epi32(l, 7);
|
||||
@@ -381,6 +374,23 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
)
|
||||
CXXFLAGS="$TEMP_CXXFLAGS"
|
||||
|
||||
TEMP_CXXFLAGS="$CXXFLAGS"
|
||||
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
|
||||
AC_MSG_CHECKING(for SHA-NI intrinsics)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||
#include <stdint.h>
|
||||
#include <immintrin.h>
|
||||
]],[[
|
||||
__m128i i = _mm_set1_epi32(0);
|
||||
__m128i j = _mm_set1_epi32(1);
|
||||
__m128i k = _mm_set1_epi32(2);
|
||||
return _mm_extract_epi32(_mm_sha256rnds2_epu32(i, i, k), 0);
|
||||
]])],
|
||||
[ AC_MSG_RESULT(yes); enable_shani=yes; AC_DEFINE(ENABLE_SHANI, 1, [Define this symbol to build code that uses SHA-NI intrinsics]) ],
|
||||
[ AC_MSG_RESULT(no)]
|
||||
)
|
||||
CXXFLAGS="$TEMP_CXXFLAGS"
|
||||
|
||||
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO -D__STDC_FORMAT_MACROS"
|
||||
|
||||
AC_ARG_WITH([utils],
|
||||
@@ -1309,6 +1319,7 @@ AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
|
||||
AM_CONDITIONAL([ENABLE_HWCRC32],[test x$enable_hwcrc32 = xyes])
|
||||
AM_CONDITIONAL([ENABLE_SSE41],[test x$enable_sse41 = xyes])
|
||||
AM_CONDITIONAL([ENABLE_AVX2],[test x$enable_avx2 = xyes])
|
||||
AM_CONDITIONAL([ENABLE_SHANI],[test x$enable_shani = xyes])
|
||||
AM_CONDITIONAL([USE_ASM],[test x$use_asm = xyes])
|
||||
|
||||
AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
|
||||
@@ -1353,6 +1364,7 @@ AC_SUBST(SANITIZER_LDFLAGS)
|
||||
AC_SUBST(SSE42_CXXFLAGS)
|
||||
AC_SUBST(SSE41_CXXFLAGS)
|
||||
AC_SUBST(AVX2_CXXFLAGS)
|
||||
AC_SUBST(SHANI_CXXFLAGS)
|
||||
AC_SUBST(LIBTOOL_APP_LDFLAGS)
|
||||
AC_SUBST(USE_UPNP)
|
||||
AC_SUBST(USE_QRCODE)
|
||||
|
||||
Reference in New Issue
Block a user