Squashed 'src/crc32c/' changes from 224988680f..b5ef9be675

b5ef9be675 Merge #1: Merge changes from upstream
9e7f512430 Merge remote-tracking branch 'origin/master' into bitcoin-fork
1f85030246 Add support for ARM64 darwin (#43)
3bb959c982 Remove unnecessary reinterpret_cast (#42)
2e97ab26b1 Fix (unused) ReadUint64LE for BE machines (#41)
47b40d2209 Bump dependencies. (#40)
ba74185625 Move CI to Visual Studio 2019.
efa301a7e5 Allow different C/C++ standards when this is used as a subproject.
cc6d71465e CMake: Use configure_package_config_file()

git-subtree-dir: src/crc32c
git-subtree-split: b5ef9be6755a2e61e2988bb238f13d1c0ee1fa0a
This commit is contained in:
MarcoFalke
2020-12-08 17:08:07 +01:00
parent 2e1819311a
commit 90c0f267bd
10 changed files with 87 additions and 60 deletions

View File

@@ -8,7 +8,7 @@
#include <cstdint>
#include "./crc32c_arm64.h"
#include "./crc32c_arm64_linux_check.h"
#include "./crc32c_arm64_check.h"
#include "./crc32c_internal.h"
#include "./crc32c_sse42.h"
#include "./crc32c_sse42_check.h"
@@ -20,8 +20,8 @@ uint32_t Extend(uint32_t crc, const uint8_t* data, size_t count) {
static bool can_use_sse42 = CanUseSse42();
if (can_use_sse42) return ExtendSse42(crc, data, count);
#elif HAVE_ARM64_CRC32C
static bool can_use_arm_linux = CanUseArm64Linux();
if (can_use_arm_linux) return ExtendArm64(crc, data, count);
static bool can_use_arm64_crc32 = CanUseArm64Crc32();
if (can_use_arm64_crc32) return ExtendArm64(crc, data, count);
#endif // HAVE_SSE42 && (defined(_M_X64) || defined(__x86_64__))
return ExtendPortable(crc, data, count);

View File

@@ -64,7 +64,7 @@
namespace crc32c {
uint32_t ExtendArm64(uint32_t crc, const uint8_t *buf, size_t size) {
uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
int64_t length = size;
uint32_t crc0, crc1, crc2, crc3;
uint64_t t0, t1, t2;
@@ -74,7 +74,6 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *buf, size_t size) {
const poly64_t k0 = 0x8d96551c, k1 = 0xbd6f81f8, k2 = 0xdcb17aa4;
crc = crc ^ kCRC32Xor;
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
while (length >= KBYTES) {
crc0 = crc;
@@ -83,14 +82,14 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *buf, size_t size) {
crc3 = 0;
// Process 1024 bytes in parallel.
CRC32C1024BYTES(p);
CRC32C1024BYTES(data);
// Merge the 4 partial CRC32C values.
t2 = (uint64_t)vmull_p64(crc2, k2);
t1 = (uint64_t)vmull_p64(crc1, k1);
t0 = (uint64_t)vmull_p64(crc0, k0);
crc = __crc32cd(crc3, *(uint64_t *)p);
p += sizeof(uint64_t);
crc = __crc32cd(crc3, *(uint64_t *)data);
data += sizeof(uint64_t);
crc ^= __crc32cd(0, t2);
crc ^= __crc32cd(0, t1);
crc ^= __crc32cd(0, t0);
@@ -99,23 +98,23 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *buf, size_t size) {
}
while (length >= 8) {
crc = __crc32cd(crc, *(uint64_t *)p);
p += 8;
crc = __crc32cd(crc, *(uint64_t *)data);
data += 8;
length -= 8;
}
if (length & 4) {
crc = __crc32cw(crc, *(uint32_t *)p);
p += 4;
crc = __crc32cw(crc, *(uint32_t *)data);
data += 4;
}
if (length & 2) {
crc = __crc32ch(crc, *(uint16_t *)p);
p += 2;
crc = __crc32ch(crc, *(uint16_t *)data);
data += 2;
}
if (length & 1) {
crc = __crc32cb(crc, *p);
crc = __crc32cb(crc, *data);
}
return crc ^ kCRC32Xor;

View File

@@ -2,10 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Linux-specific code checking the availability for ARM CRC32C instructions.
// ARM-specific code
#ifndef CRC32C_CRC32C_ARM_LINUX_H_
#define CRC32C_CRC32C_ARM_LINUX_H_
#ifndef CRC32C_CRC32C_ARM_H_
#define CRC32C_CRC32C_ARM_H_
#include <cstddef>
#include <cstdint>
@@ -24,4 +24,4 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t* data, size_t count);
#endif // HAVE_ARM64_CRC32C
#endif // CRC32C_CRC32C_ARM_LINUX_H_
#endif // CRC32C_CRC32C_ARM_H_

View File

@@ -2,12 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// ARM Linux-specific code checking for the availability of CRC32C instructions.
// ARM-specific code checking for the availability of CRC32C instructions.
#ifndef CRC32C_CRC32C_ARM_LINUX_CHECK_H_
#define CRC32C_CRC32C_ARM_LINUX_CHECK_H_
// X86-specific code checking for the availability of SSE4.2 instructions.
#ifndef CRC32C_CRC32C_ARM_CHECK_H_
#define CRC32C_CRC32C_ARM_CHECK_H_
#include <cstddef>
#include <cstdint>
@@ -18,6 +16,7 @@
#if HAVE_ARM64_CRC32C
#ifdef __linux__
#if HAVE_STRONG_GETAUXVAL
#include <sys/auxv.h>
#elif HAVE_WEAK_GETAUXVAL
@@ -27,17 +26,28 @@ extern "C" unsigned long getauxval(unsigned long type) __attribute__((weak));
#define AT_HWCAP 16
#endif // HAVE_STRONG_GETAUXVAL || HAVE_WEAK_GETAUXVAL
#endif // defined (__linux__)
#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#endif // defined (__APPLE__)
namespace crc32c {
inline bool CanUseArm64Linux() {
#if HAVE_STRONG_GETAUXVAL || HAVE_WEAK_GETAUXVAL
inline bool CanUseArm64Crc32() {
#if defined (__linux__) && (HAVE_STRONG_GETAUXVAL || HAVE_WEAK_GETAUXVAL)
// From 'arch/arm64/include/uapi/asm/hwcap.h' in Linux kernel source code.
constexpr unsigned long kHWCAP_PMULL = 1 << 4;
constexpr unsigned long kHWCAP_CRC32 = 1 << 7;
unsigned long hwcap = (&getauxval != nullptr) ? getauxval(AT_HWCAP) : 0;
return (hwcap & (kHWCAP_PMULL | kHWCAP_CRC32)) ==
(kHWCAP_PMULL | kHWCAP_CRC32);
#elif defined(__APPLE__)
int val = 0;
size_t len = sizeof(val);
return sysctlbyname("hw.optional.armv8_crc32", &val, &len, nullptr, 0) == 0
&& val != 0;
#else
return false;
#endif // HAVE_STRONG_GETAUXVAL || HAVE_WEAK_GETAUXVAL
@@ -47,4 +57,4 @@ inline bool CanUseArm64Linux() {
#endif // HAVE_ARM64_CRC32C
#endif // CRC32C_CRC32C_ARM_LINUX_CHECK_H_
#endif // CRC32C_CRC32C_ARM_CHECK_H_

View File

@@ -16,7 +16,7 @@
#endif // CRC32C_TESTS_BUILT_WITH_GLOG
#include "./crc32c_arm64.h"
#include "./crc32c_arm64_linux_check.h"
#include "./crc32c_arm64_check.h"
#include "./crc32c_internal.h"
#include "./crc32c_sse42.h"
#include "./crc32c_sse42_check.h"
@@ -58,8 +58,8 @@ BENCHMARK_REGISTER_F(CRC32CBenchmark, Portable)
#if HAVE_ARM64_CRC32C
BENCHMARK_DEFINE_F(CRC32CBenchmark, ArmLinux)(benchmark::State& state) {
if (!crc32c::CanUseArm64Linux()) {
BENCHMARK_DEFINE_F(CRC32CBenchmark, ArmCRC32C)(benchmark::State& state) {
if (!crc32c::CanUseArm64Crc32()) {
state.SkipWithError("ARM CRC32C instructions not available or not enabled");
return;
}
@@ -69,7 +69,7 @@ BENCHMARK_DEFINE_F(CRC32CBenchmark, ArmLinux)(benchmark::State& state) {
crc = crc32c::ExtendArm64(crc, block_buffer_, block_size_);
state.SetBytesProcessed(state.iterations() * block_size_);
}
BENCHMARK_REGISTER_F(CRC32CBenchmark, ArmLinux)
BENCHMARK_REGISTER_F(CRC32CBenchmark, ArmCRC32C)
->RangeMultiplier(16)
->Range(256, 16777216); // Block size.

View File

@@ -32,14 +32,14 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) {
// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer.
inline uint64_t ReadUint64LE(const uint8_t* buffer) {
#if BYTE_ORDER_BIG_ENDIAN
return ((static_cast<uint32_t>(static_cast<uint8_t>(buffer[0]))) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[1])) << 8) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[2])) << 16) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[3])) << 24) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[4])) << 32) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[5])) << 40) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[6])) << 48) |
(static_cast<uint32_t>(static_cast<uint8_t>(buffer[7])) << 56));
return ((static_cast<uint64_t>(static_cast<uint8_t>(buffer[0]))) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[1])) << 8) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[2])) << 16) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[3])) << 24) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[4])) << 32) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[5])) << 40) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[6])) << 48) |
(static_cast<uint64_t>(static_cast<uint8_t>(buffer[7])) << 56));
#else // !BYTE_ORDER_BIG_ENDIAN
uint64_t result;
// This should be optimized to a single instruction.