Merge #19055: Add MuHash3072 implementation

9815332d51 test: Change MuHash Python implementation to match cpp version again (Fabian Jahr)
01297fb3ca fuzz: Add MuHash consistency fuzz test (Fabian Jahr)
b111410914 test: Add MuHash3072 fuzz test (Fabian Jahr)
c122527385 bench: Add Muhash benchmarks (Fabian Jahr)
7b1242229d test: Add MuHash3072 unit tests (Fabian Jahr)
adc708c98d crypto: Add MuHash3072 implementation (Fabian Jahr)
0b4d290bf5 crypto: Add Num3072 implementation (Fabian Jahr)
589f958662 build: Check for 128 bit integer support (Fabian Jahr)

Pull request description:

  This is the first split of #18000 which implements the Muhash algorithm and uses it to calculate the UTXO set hash in `gettxoutsetinfo`.

ACKs for top commit:
  laanwj:
    Code review ACK 9815332d51

Tree-SHA512: 4bc090738f0e3d80b74bdd8122e24a8ce80121120fd37c7e4335a73e7ba4fcd7643f2a2d559e2eebf54b8e3a3bd5f12cfb27ba61ded135fda210a07a233eae45
This commit is contained in:
Wladimir J. van der Laan
2021-01-07 17:16:47 +01:00
10 changed files with 694 additions and 7 deletions

338
src/crypto/muhash.cpp Normal file
View File

@@ -0,0 +1,338 @@
// Copyright (c) 2017-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <crypto/muhash.h>
#include <crypto/chacha20.h>
#include <crypto/common.h>
#include <hash.h>
#include <cassert>
#include <cstdio>
#include <limits>
namespace {
using limb_t = Num3072::limb_t;
using double_limb_t = Num3072::double_limb_t;
constexpr int LIMB_SIZE = Num3072::LIMB_SIZE;
constexpr int LIMBS = Num3072::LIMBS;
/** 2^3072 - 1103717, the largest 3072-bit safe prime number, is used as the modulus. */
constexpr limb_t MAX_PRIME_DIFF = 1103717;
/** Extract the lowest limb of [c0,c1,c2] into n, and left shift the number by 1 limb. */
inline void extract3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& n)
{
n = c0;
c0 = c1;
c1 = c2;
c2 = 0;
}
/** [c0,c1] = a * b */
inline void mul(limb_t& c0, limb_t& c1, const limb_t& a, const limb_t& b)
{
double_limb_t t = (double_limb_t)a * b;
c1 = t >> LIMB_SIZE;
c0 = t;
}
/* [c0,c1,c2] += n * [d0,d1,d2]. c2 is 0 initially */
inline void mulnadd3(limb_t& c0, limb_t& c1, limb_t& c2, limb_t& d0, limb_t& d1, limb_t& d2, const limb_t& n)
{
double_limb_t t = (double_limb_t)d0 * n + c0;
c0 = t;
t >>= LIMB_SIZE;
t += (double_limb_t)d1 * n + c1;
c1 = t;
t >>= LIMB_SIZE;
c2 = t + d2 * n;
}
/* [c0,c1] *= n */
inline void muln2(limb_t& c0, limb_t& c1, const limb_t& n)
{
double_limb_t t = (double_limb_t)c0 * n;
c0 = t;
t >>= LIMB_SIZE;
t += (double_limb_t)c1 * n;
c1 = t;
}
/** [c0,c1,c2] += a * b */
inline void muladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b)
{
double_limb_t t = (double_limb_t)a * b;
limb_t th = t >> LIMB_SIZE;
limb_t tl = t;
c0 += tl;
th += (c0 < tl) ? 1 : 0;
c1 += th;
c2 += (c1 < th) ? 1 : 0;
}
/** [c0,c1,c2] += 2 * a * b */
inline void muldbladd3(limb_t& c0, limb_t& c1, limb_t& c2, const limb_t& a, const limb_t& b)
{
double_limb_t t = (double_limb_t)a * b;
limb_t th = t >> LIMB_SIZE;
limb_t tl = t;
c0 += tl;
limb_t tt = th + ((c0 < tl) ? 1 : 0);
c1 += tt;
c2 += (c1 < tt) ? 1 : 0;
c0 += tl;
th += (c0 < tl) ? 1 : 0;
c1 += th;
c2 += (c1 < th) ? 1 : 0;
}
/**
* Add limb a to [c0,c1]: [c0,c1] += a. Then extract the lowest
* limb of [c0,c1] into n, and left shift the number by 1 limb.
* */
inline void addnextract2(limb_t& c0, limb_t& c1, const limb_t& a, limb_t& n)
{
limb_t c2 = 0;
// add
c0 += a;
if (c0 < a) {
c1 += 1;
// Handle case when c1 has overflown
if (c1 == 0)
c2 = 1;
}
// extract
n = c0;
c0 = c1;
c1 = c2;
}
/** in_out = in_out^(2^sq) * mul */
inline void square_n_mul(Num3072& in_out, const int sq, const Num3072& mul)
{
for (int j = 0; j < sq; ++j) in_out.Square();
in_out.Multiply(mul);
}
} // namespace
/** Indicates wether d is larger than the modulus. */
bool Num3072::IsOverflow() const
{
if (this->limbs[0] <= std::numeric_limits<limb_t>::max() - MAX_PRIME_DIFF) return false;
for (int i = 1; i < LIMBS; ++i) {
if (this->limbs[i] != std::numeric_limits<limb_t>::max()) return false;
}
return true;
}
void Num3072::FullReduce()
{
limb_t c0 = MAX_PRIME_DIFF;
limb_t c1 = 0;
for (int i = 0; i < LIMBS; ++i) {
addnextract2(c0, c1, this->limbs[i], this->limbs[i]);
}
}
Num3072 Num3072::GetInverse() const
{
// For fast exponentiation a sliding window exponentiation with repunit
// precomputation is utilized. See "Fast Point Decompression for Standard
// Elliptic Curves" (Brumley, Järvinen, 2008).
Num3072 p[12]; // p[i] = a^(2^(2^i)-1)
Num3072 out;
p[0] = *this;
for (int i = 0; i < 11; ++i) {
p[i + 1] = p[i];
for (int j = 0; j < (1 << i); ++j) p[i + 1].Square();
p[i + 1].Multiply(p[i]);
}
out = p[11];
square_n_mul(out, 512, p[9]);
square_n_mul(out, 256, p[8]);
square_n_mul(out, 128, p[7]);
square_n_mul(out, 64, p[6]);
square_n_mul(out, 32, p[5]);
square_n_mul(out, 8, p[3]);
square_n_mul(out, 2, p[1]);
square_n_mul(out, 1, p[0]);
square_n_mul(out, 5, p[2]);
square_n_mul(out, 3, p[0]);
square_n_mul(out, 2, p[0]);
square_n_mul(out, 4, p[0]);
square_n_mul(out, 4, p[1]);
square_n_mul(out, 3, p[0]);
return out;
}
void Num3072::Multiply(const Num3072& a)
{
limb_t c0 = 0, c1 = 0, c2 = 0;
Num3072 tmp;
/* Compute limbs 0..N-2 of this*a into tmp, including one reduction. */
for (int j = 0; j < LIMBS - 1; ++j) {
limb_t d0 = 0, d1 = 0, d2 = 0;
mul(d0, d1, this->limbs[1 + j], a.limbs[LIMBS + j - (1 + j)]);
for (int i = 2 + j; i < LIMBS; ++i) muladd3(d0, d1, d2, this->limbs[i], a.limbs[LIMBS + j - i]);
mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
for (int i = 0; i < j + 1; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[j - i]);
extract3(c0, c1, c2, tmp.limbs[j]);
}
/* Compute limb N-1 of a*b into tmp. */
assert(c2 == 0);
for (int i = 0; i < LIMBS; ++i) muladd3(c0, c1, c2, this->limbs[i], a.limbs[LIMBS - 1 - i]);
extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]);
/* Perform a second reduction. */
muln2(c0, c1, MAX_PRIME_DIFF);
for (int j = 0; j < LIMBS; ++j) {
addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]);
}
assert(c1 == 0);
assert(c0 == 0 || c0 == 1);
/* Perform up to two more reductions if the internal state has already
* overflown the MAX of Num3072 or if it is larger than the modulus or
* if both are the case.
* */
if (this->IsOverflow()) this->FullReduce();
if (c0) this->FullReduce();
}
void Num3072::Square()
{
limb_t c0 = 0, c1 = 0, c2 = 0;
Num3072 tmp;
/* Compute limbs 0..N-2 of this*this into tmp, including one reduction. */
for (int j = 0; j < LIMBS - 1; ++j) {
limb_t d0 = 0, d1 = 0, d2 = 0;
for (int i = 0; i < (LIMBS - 1 - j) / 2; ++i) muldbladd3(d0, d1, d2, this->limbs[i + j + 1], this->limbs[LIMBS - 1 - i]);
if ((j + 1) & 1) muladd3(d0, d1, d2, this->limbs[(LIMBS - 1 - j) / 2 + j + 1], this->limbs[LIMBS - 1 - (LIMBS - 1 - j) / 2]);
mulnadd3(c0, c1, c2, d0, d1, d2, MAX_PRIME_DIFF);
for (int i = 0; i < (j + 1) / 2; ++i) muldbladd3(c0, c1, c2, this->limbs[i], this->limbs[j - i]);
if ((j + 1) & 1) muladd3(c0, c1, c2, this->limbs[(j + 1) / 2], this->limbs[j - (j + 1) / 2]);
extract3(c0, c1, c2, tmp.limbs[j]);
}
assert(c2 == 0);
for (int i = 0; i < LIMBS / 2; ++i) muldbladd3(c0, c1, c2, this->limbs[i], this->limbs[LIMBS - 1 - i]);
extract3(c0, c1, c2, tmp.limbs[LIMBS - 1]);
/* Perform a second reduction. */
muln2(c0, c1, MAX_PRIME_DIFF);
for (int j = 0; j < LIMBS; ++j) {
addnextract2(c0, c1, tmp.limbs[j], this->limbs[j]);
}
assert(c1 == 0);
assert(c0 == 0 || c0 == 1);
/* Perform up to two more reductions if the internal state has already
* overflown the MAX of Num3072 or if it is larger than the modulus or
* if both are the case.
* */
if (this->IsOverflow()) this->FullReduce();
if (c0) this->FullReduce();
}
void Num3072::SetToOne()
{
this->limbs[0] = 1;
for (int i = 1; i < LIMBS; ++i) this->limbs[i] = 0;
}
void Num3072::Divide(const Num3072& a)
{
if (this->IsOverflow()) this->FullReduce();
Num3072 inv{};
if (a.IsOverflow()) {
Num3072 b = a;
b.FullReduce();
inv = b.GetInverse();
} else {
inv = a.GetInverse();
}
this->Multiply(inv);
if (this->IsOverflow()) this->FullReduce();
}
Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
Num3072 out{};
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
unsigned char tmp[BYTE_SIZE];
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
out.limbs[i] = ReadLE32(tmp + 4 * i);
} else if (sizeof(limb_t) == 8) {
out.limbs[i] = ReadLE64(tmp + 8 * i);
}
}
return out;
}
MuHash3072::MuHash3072(Span<const unsigned char> in) noexcept
{
m_numerator = ToNum3072(in);
}
void MuHash3072::Finalize(uint256& out) noexcept
{
m_numerator.Divide(m_denominator);
m_denominator.SetToOne(); // Needed to keep the MuHash object valid
unsigned char data[384];
for (int i = 0; i < LIMBS; ++i) {
if (sizeof(limb_t) == 4) {
WriteLE32(data + i * 4, m_numerator.limbs[i]);
} else if (sizeof(limb_t) == 8) {
WriteLE64(data + i * 8, m_numerator.limbs[i]);
}
}
out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
}
MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
{
m_numerator.Multiply(mul.m_numerator);
m_denominator.Multiply(mul.m_denominator);
return *this;
}
MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
{
m_numerator.Multiply(div.m_denominator);
m_denominator.Multiply(div.m_numerator);
return *this;
}
MuHash3072& MuHash3072::Insert(Span<const unsigned char> in) noexcept {
m_numerator.Multiply(ToNum3072(in));
return *this;
}
MuHash3072& MuHash3072::Remove(Span<const unsigned char> in) noexcept {
m_numerator.Divide(ToNum3072(in));
return *this;
}

130
src/crypto/muhash.h Normal file
View File

@@ -0,0 +1,130 @@
// Copyright (c) 2017-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_CRYPTO_MUHASH_H
#define BITCOIN_CRYPTO_MUHASH_H
#if defined(HAVE_CONFIG_H)
#include <config/bitcoin-config.h>
#endif
#include <serialize.h>
#include <uint256.h>
#include <stdint.h>
class Num3072
{
private:
void FullReduce();
bool IsOverflow() const;
Num3072 GetInverse() const;
public:
#ifdef HAVE___INT128
typedef unsigned __int128 double_limb_t;
typedef uint64_t limb_t;
static constexpr int LIMBS = 48;
static constexpr int LIMB_SIZE = 64;
#else
typedef uint64_t double_limb_t;
typedef uint32_t limb_t;
static constexpr int LIMBS = 96;
static constexpr int LIMB_SIZE = 32;
#endif
limb_t limbs[LIMBS];
// Sanity check for Num3072 constants
static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits");
static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t");
static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect");
// Hard coded values in MuHash3072 constructor and Finalize
static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t");
void Multiply(const Num3072& a);
void Divide(const Num3072& a);
void SetToOne();
void Square();
Num3072() { this->SetToOne(); };
SERIALIZE_METHODS(Num3072, obj)
{
for (auto& limb : obj.limbs) {
READWRITE(limb);
}
}
};
/** A class representing MuHash sets
*
* MuHash is a hashing algorithm that supports adding set elements in any
* order but also deleting in any order. As a result, it can maintain a
* running sum for a set of data as a whole, and add/remove when data
* is added to or removed from it. A downside of MuHash is that computing
* an inverse is relatively expensive. This is solved by representing
* the running value as a fraction, and multiplying added elements into
* the numerator and removed elements into the denominator. Only when the
* final hash is desired, a single modular inverse and multiplication is
* needed to combine the two. The combination is also run on serialization
* to allow for space-efficient storage on disk.
*
* As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
* in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
* all of this is perfectly parallellizable: each thread can process an
* arbitrary subset of the update operations, allowing them to be
* efficiently combined later.
*
* Muhash does not support checking if an element is already part of the
* set. That is why this class does not enforce the use of a set as the
* data it represents because there is no efficient way to do so.
* It is possible to add elements more than once and also to remove
* elements that have not been added before. However, this implementation
* is intended to represent a set of elements.
*
* See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
* https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
*/
class MuHash3072
{
private:
static constexpr size_t BYTE_SIZE = 384;
Num3072 m_numerator;
Num3072 m_denominator;
Num3072 ToNum3072(Span<const unsigned char> in);
public:
/* The empty set. */
MuHash3072() noexcept {};
/* A singleton with variable sized data in it. */
explicit MuHash3072(Span<const unsigned char> in) noexcept;
/* Insert a single piece of data into the set. */
MuHash3072& Insert(Span<const unsigned char> in) noexcept;
/* Remove a single piece of data from the set. */
MuHash3072& Remove(Span<const unsigned char> in) noexcept;
/* Multiply (resulting in a hash for the union of the sets) */
MuHash3072& operator*=(const MuHash3072& mul) noexcept;
/* Divide (resulting in a hash for the difference of the sets) */
MuHash3072& operator/=(const MuHash3072& div) noexcept;
/* Finalize into a 32-byte hash. Does not change this object's value. */
void Finalize(uint256& out) noexcept;
SERIALIZE_METHODS(MuHash3072, obj)
{
READWRITE(obj.m_numerator);
READWRITE(obj.m_denominator);
}
};
#endif // BITCOIN_CRYPTO_MUHASH_H