Merge bitcoin/bitcoin#24115: ARMv8 SHA2 Intrinsics

aaa1d03d3a Add optimized sha256d64_arm_shani::Transform_2way (Pieter Wuille)
fe0629852a Implement sha256_arm_shani::Transform (Pavol Rusnak)
48a72fa81f Add sha256_arm_shani to build system (Pavol Rusnak)
c2b7934250 Rename SHANI to X86_SHANI to allow future implementation of ARM_SHANI (Pavol Rusnak)

Pull request description:

  This PR adds support for ARMv8 SHA2 Intrinsics.

  Fixes https://github.com/bitcoin/bitcoin/issues/13401 and https://github.com/bitcoin/bitcoin/issues/17414

  * Integration part was done by me.
  * The original SHA2 NI code comes from https://github.com/noloader/SHA-Intrinsics/blob/master/sha256-arm.c
  * Minor optimizations from https://github.com/rollmeister/bitcoin-armv8/blob/master/src/crypto/sha256.cpp are applied too.
  * The 2-way transform added by @sipa

ACKs for top commit:
  laanwj:
    Code review and lightly tested ACK aaa1d03d3a

Tree-SHA512: 9689d6390c004269cb1ee79ed05430d7d35a6efef2554a2b6732f7258a11e7e959b3306c04b4e8637a9623fb4c12d1c1b3592da0ff0dc6d737932db302509669
This commit is contained in:
laanwj
2022-02-14 20:47:24 +01:00
5 changed files with 1012 additions and 30 deletions

View File

@@ -469,7 +469,7 @@ AX_CHECK_COMPILE_FLAG([-fno-extended-identifiers], [CXXFLAGS="$CXXFLAGS -fno-ext
enable_sse42=no
enable_sse41=no
enable_avx2=no
enable_shani=no
enable_x86_shani=no
if test "$use_asm" = "yes"; then
@@ -481,7 +481,7 @@ dnl x86
AX_CHECK_COMPILE_FLAG([-msse4.2], [SSE42_CXXFLAGS="-msse4.2"], [], [$CXXFLAG_WERROR])
AX_CHECK_COMPILE_FLAG([-msse4.1], [SSE41_CXXFLAGS="-msse4.1"], [], [$CXXFLAG_WERROR])
AX_CHECK_COMPILE_FLAG([-mavx -mavx2], [AVX2_CXXFLAGS="-mavx -mavx2"], [], [$CXXFLAG_WERROR])
AX_CHECK_COMPILE_FLAG([-msse4 -msha], [SHANI_CXXFLAGS="-msse4 -msha"], [], [$CXXFLAG_WERROR])
AX_CHECK_COMPILE_FLAG([-msse4 -msha], [X86_SHANI_CXXFLAGS="-msse4 -msha"], [], [$CXXFLAG_WERROR])
enable_clmul=
AX_CHECK_COMPILE_FLAG([-mpclmul], [enable_clmul=yes], [], [$CXXFLAG_WERROR], [AC_LANG_PROGRAM([
@@ -554,8 +554,8 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
CXXFLAGS="$TEMP_CXXFLAGS"
TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $SHANI_CXXFLAGS"
AC_MSG_CHECKING([for SHA-NI intrinsics])
CXXFLAGS="$CXXFLAGS $X86_SHANI_CXXFLAGS"
AC_MSG_CHECKING([for x86 SHA-NI intrinsics])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>
#include <immintrin.h>
@@ -565,17 +565,18 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
__m128i k = _mm_set1_epi32(2);
return _mm_extract_epi32(_mm_sha256rnds2_epu32(i, i, k), 0);
]])],
[ AC_MSG_RESULT([yes]); enable_shani=yes; AC_DEFINE([ENABLE_SHANI], [1], [Define this symbol to build code that uses SHA-NI intrinsics]) ],
[ AC_MSG_RESULT([yes]); enable_x86_shani=yes; AC_DEFINE([ENABLE_X86_SHANI], [1], [Define this symbol to build code that uses x86 SHA-NI intrinsics]) ],
[ AC_MSG_RESULT([no])]
)
CXXFLAGS="$TEMP_CXXFLAGS"
# ARM
AX_CHECK_COMPILE_FLAG([-march=armv8-a+crc+crypto], [ARM_CRC_CXXFLAGS="-march=armv8-a+crc+crypto"], [], [$CXXFLAG_WERROR])
AX_CHECK_COMPILE_FLAG([-march=armv8-a+crc+crypto], [ARM_SHANI_CXXFLAGS="-march=armv8-a+crc+crypto"], [], [$CXXFLAG_WERROR])
TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $ARM_CRC_CXXFLAGS"
AC_MSG_CHECKING([for AArch64 CRC32 intrinsics])
AC_MSG_CHECKING([for ARMv8 CRC32 intrinsics])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <arm_acle.h>
#include <arm_neon.h>
@@ -592,6 +593,24 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
)
CXXFLAGS="$TEMP_CXXFLAGS"
TEMP_CXXFLAGS="$CXXFLAGS"
CXXFLAGS="$CXXFLAGS $ARM_SHANI_CXXFLAGS"
AC_MSG_CHECKING([for ARMv8 SHA-NI intrinsics])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <arm_acle.h>
#include <arm_neon.h>
]],[[
uint32x4_t a, b, c;
vsha256h2q_u32(a, b, c);
vsha256hq_u32(a, b, c);
vsha256su0q_u32(a, b);
vsha256su1q_u32(a, b, c);
]])],
[ AC_MSG_RESULT([yes]); enable_arm_shani=yes; AC_DEFINE([ENABLE_ARM_SHANI], [1], [Define this symbol to build code that uses ARMv8 SHA-NI intrinsics]) ],
[ AC_MSG_RESULT([no])]
)
CXXFLAGS="$TEMP_CXXFLAGS"
fi
CPPFLAGS="$CPPFLAGS -DHAVE_BUILD_INFO"
@@ -1774,8 +1793,9 @@ AM_CONDITIONAL([HARDEN], [test "$use_hardening" = "yes"])
AM_CONDITIONAL([ENABLE_SSE42], [test "$enable_sse42" = "yes"])
AM_CONDITIONAL([ENABLE_SSE41], [test "$enable_sse41" = "yes"])
AM_CONDITIONAL([ENABLE_AVX2], [test "$enable_avx2" = "yes"])
AM_CONDITIONAL([ENABLE_SHANI], [test "$enable_shani" = "yes"])
AM_CONDITIONAL([ENABLE_X86_SHANI], [test "$enable_x86_shani" = "yes"])
AM_CONDITIONAL([ENABLE_ARM_CRC], [test "$enable_arm_crc" = "yes"])
AM_CONDITIONAL([ENABLE_ARM_SHANI], [test "$enable_arm_shani" = "yes"])
AM_CONDITIONAL([USE_ASM], [test "$use_asm" = "yes"])
AM_CONDITIONAL([WORDS_BIGENDIAN], [test "$ac_cv_c_bigendian" = "yes"])
AM_CONDITIONAL([USE_NATPMP], [test "$use_natpmp" = "yes"])
@@ -1832,8 +1852,9 @@ AC_SUBST(SSE42_CXXFLAGS)
AC_SUBST(SSE41_CXXFLAGS)
AC_SUBST(CLMUL_CXXFLAGS)
AC_SUBST(AVX2_CXXFLAGS)
AC_SUBST(SHANI_CXXFLAGS)
AC_SUBST(X86_SHANI_CXXFLAGS)
AC_SUBST(ARM_CRC_CXXFLAGS)
AC_SUBST(ARM_SHANI_CXXFLAGS)
AC_SUBST(LIBTOOL_APP_LDFLAGS)
AC_SUBST(USE_SQLITE)
AC_SUBST(USE_BDB)