From 6ec1ca7c85a4009b77e149a798a331592b96ea42 Mon Sep 17 00:00:00 2001 From: Hennadii Stepanov <32963518+hebasto@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:26:00 +0000 Subject: [PATCH 1/2] build: Fix test for SSE4.1 intrinsics This change uses the `_mm_blend_epi16` SSE4.1 function used in our code and fixes false-positive cases, for example, when CXXFLAGS="-mno-sse4.1" provided. --- configure.ac | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index 4f715158731..2c73e0920c9 100644 --- a/configure.ac +++ b/configure.ac @@ -504,11 +504,12 @@ TEMP_CXXFLAGS="$CXXFLAGS" CXXFLAGS="$SSE41_CXXFLAGS $CXXFLAGS" AC_MSG_CHECKING([for SSE4.1 intrinsics]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ - #include #include ]],[[ - __m128i l = _mm_set1_epi32(0); - return _mm_extract_epi32(l, 3); + __m128i a = _mm_set1_epi32(0); + __m128i b = _mm_set1_epi32(1); + __m128i r = _mm_blend_epi16(a, b, 0xFF); + return _mm_extract_epi32(r, 3); ]])], [ AC_MSG_RESULT([yes]); enable_sse41=yes; AC_DEFINE([ENABLE_SSE41], [1], [Define this symbol to build code that uses SSE4.1 intrinsics]) ], [ AC_MSG_RESULT([no])] From d440f13db02c82c842000abe4fe4d0c721a4ad3b Mon Sep 17 00:00:00 2001 From: Hennadii Stepanov <32963518+hebasto@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:26:16 +0000 Subject: [PATCH 2/2] crypto: Guard code with `ENABLE_SSE41` macro The code in `sha_x86_shani.cpp` uses the `_mm_blend_epi16` function from the SSE4.1 instruction set. However, it is possible that SHA-NI is enabled even when SSE4.1 is disabled. This changes avoid compilation errors in such a condition. --- src/Makefile.am | 10 +++++----- src/crypto/sha256.cpp | 2 +- src/crypto/sha256_x86_shani.cpp | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index b5d5c4652ab..9c1d890f5e5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,15 +53,15 @@ LIBBITCOIN_CRYPTO = $(LIBBITCOIN_CRYPTO_BASE) if ENABLE_SSE41 LIBBITCOIN_CRYPTO_SSE41 = crypto/libbitcoin_crypto_sse41.la LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_SSE41) +if ENABLE_X86_SHANI +LIBBITCOIN_CRYPTO_X86_SHANI = crypto/libbitcoin_crypto_x86_shani.la +LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_X86_SHANI) +endif endif if ENABLE_AVX2 LIBBITCOIN_CRYPTO_AVX2 = crypto/libbitcoin_crypto_avx2.la LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_AVX2) endif -if ENABLE_X86_SHANI -LIBBITCOIN_CRYPTO_X86_SHANI = crypto/libbitcoin_crypto_x86_shani.la -LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_X86_SHANI) -endif if ENABLE_ARM_SHANI LIBBITCOIN_CRYPTO_ARM_SHANI = crypto/libbitcoin_crypto_arm_shani.la LIBBITCOIN_CRYPTO += $(LIBBITCOIN_CRYPTO_ARM_SHANI) @@ -612,7 +612,7 @@ crypto_libbitcoin_crypto_x86_shani_la_LDFLAGS = $(AM_LDFLAGS) -static crypto_libbitcoin_crypto_x86_shani_la_CXXFLAGS = $(AM_CXXFLAGS) $(PIE_FLAGS) -static crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS = $(AM_CPPFLAGS) crypto_libbitcoin_crypto_x86_shani_la_CXXFLAGS += $(X86_SHANI_CXXFLAGS) -crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS += -DENABLE_X86_SHANI +crypto_libbitcoin_crypto_x86_shani_la_CPPFLAGS += -DENABLE_SSE41 -DENABLE_X86_SHANI crypto_libbitcoin_crypto_x86_shani_la_SOURCES = crypto/sha256_x86_shani.cpp # See explanation for -static in crypto_libbitcoin_crypto_base_la's LDFLAGS and diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 4c7bb6f20ff..bb98cd09d2d 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -623,7 +623,7 @@ std::string SHA256AutoDetect(sha256_implementation::UseImplementation use_implem } } -#if defined(ENABLE_X86_SHANI) +#if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI) if (have_x86_shani) { Transform = sha256_x86_shani::Transform; TransformD64 = TransformD64Wrapper; diff --git a/src/crypto/sha256_x86_shani.cpp b/src/crypto/sha256_x86_shani.cpp index 79871bfcc11..74718281939 100644 --- a/src/crypto/sha256_x86_shani.cpp +++ b/src/crypto/sha256_x86_shani.cpp @@ -6,7 +6,7 @@ // Written and placed in public domain by Jeffrey Walton. // Based on code from Intel, and by Sean Gulley for the miTLS project. -#ifdef ENABLE_X86_SHANI +#if defined(ENABLE_SSE41) && defined(ENABLE_X86_SHANI) #include #include