Merge bitcoin/bitcoin#26153: Reduce wasted pseudorandom bytes in ChaCha20 + various improvements

511aa4f1c7 Add unit test for ChaCha20's new caching (Pieter Wuille)
fb243d25f7 Improve test vectors for ChaCha20 (Pieter Wuille)
93aee8bbda Inline ChaCha20 32-byte specific constants (Pieter Wuille)
62ec713961 Only support 32-byte keys in ChaCha20{,Aligned} (Pieter Wuille)
f21994a02e Use ChaCha20Aligned in MuHash3072 code (Pieter Wuille)
5d16f75763 Use ChaCha20 caching in FastRandomContext (Pieter Wuille)
38eaece67b Add fuzz test for testing that ChaCha20 works as a stream (Pieter Wuille)
5f05b27841 Add xoroshiro128++ PRNG (Martin Leitner-Ankerl)
12ff72476a Make unrestricted ChaCha20 cipher not waste keystream bytes (Pieter Wuille)
6babf40213 Rename ChaCha20::Seek -> Seek64 to clarify multiple of 64 (Pieter Wuille)
e37bcaa0a6 Split ChaCha20 into aligned/unaligned variants (Pieter Wuille)

Pull request description:

  This is an alternative to #25354 (by my benchmarking, somewhat faster), subsumes #25712, and adds additional test vectors.

  It separates the multiple-of-64-bytes-only "core" logic (which becomes simpler) from a layer around which performs caching/slicing to support arbitrary byte amounts. Both have their uses (in particular, the MuHash3072 code can benefit from multiple-of-64-bytes assumptions), plus the separation results in more readable code. Also, since FastRandomContext effectively had its own (more naive) caching on top of ChaCha20, that can be dropped in favor of ChaCha20's new built-in caching.

  I thought about rebasing #25712 on top of this, but the changes before are fairly extensive, so redid it instead.

ACKs for top commit:
  ajtowns:
    ut reACK 511aa4f1c7
  dhruv:
    tACK crACK 511aa4f1c7

Tree-SHA512: 3aa80971322a93e780c75a8d35bd39da3a9ea570fbae4491eaf0c45242f5f670a24a592c50ad870d5fd09b9f88ec06e274e8aa3cefd9561d623c63f7198cf2c7
This commit is contained in:
fanquake
2023-02-15 14:51:38 +00:00
15 changed files with 578 additions and 221 deletions

View File

@@ -8,6 +8,7 @@
#include <crypto/common.h>
#include <crypto/chacha20.h>
#include <algorithm>
#include <string.h>
constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (v >> (32 - c)); }
@@ -20,95 +21,69 @@ constexpr static inline uint32_t rotl32(uint32_t v, int c) { return (v << c) | (
#define REPEAT10(a) do { {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; {a}; } while(0)
static const unsigned char sigma[] = "expand 32-byte k";
static const unsigned char tau[] = "expand 16-byte k";
void ChaCha20::SetKey(const unsigned char* k, size_t keylen)
void ChaCha20Aligned::SetKey32(const unsigned char* k)
{
const unsigned char *constants;
input[4] = ReadLE32(k + 0);
input[5] = ReadLE32(k + 4);
input[6] = ReadLE32(k + 8);
input[7] = ReadLE32(k + 12);
if (keylen == 32) { /* recommended */
k += 16;
constants = sigma;
} else { /* keylen == 16 */
constants = tau;
}
input[8] = ReadLE32(k + 0);
input[9] = ReadLE32(k + 4);
input[10] = ReadLE32(k + 8);
input[11] = ReadLE32(k + 12);
input[0] = ReadLE32(constants + 0);
input[1] = ReadLE32(constants + 4);
input[2] = ReadLE32(constants + 8);
input[3] = ReadLE32(constants + 12);
input[12] = 0;
input[13] = 0;
input[14] = 0;
input[15] = 0;
input[0] = ReadLE32(k + 0);
input[1] = ReadLE32(k + 4);
input[2] = ReadLE32(k + 8);
input[3] = ReadLE32(k + 12);
input[4] = ReadLE32(k + 16);
input[5] = ReadLE32(k + 20);
input[6] = ReadLE32(k + 24);
input[7] = ReadLE32(k + 28);
input[8] = 0;
input[9] = 0;
input[10] = 0;
input[11] = 0;
}
ChaCha20::ChaCha20()
ChaCha20Aligned::ChaCha20Aligned()
{
memset(input, 0, sizeof(input));
}
ChaCha20::ChaCha20(const unsigned char* k, size_t keylen)
ChaCha20Aligned::ChaCha20Aligned(const unsigned char* key32)
{
SetKey(k, keylen);
SetKey32(key32);
}
void ChaCha20::SetIV(uint64_t iv)
void ChaCha20Aligned::SetIV(uint64_t iv)
{
input[14] = iv;
input[15] = iv >> 32;
input[10] = iv;
input[11] = iv >> 32;
}
void ChaCha20::Seek(uint64_t pos)
void ChaCha20Aligned::Seek64(uint64_t pos)
{
input[12] = pos;
input[13] = pos >> 32;
input[8] = pos;
input[9] = pos >> 32;
}
void ChaCha20::Keystream(unsigned char* c, size_t bytes)
inline void ChaCha20Aligned::Keystream64(unsigned char* c, size_t blocks)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
unsigned char *ctarget = nullptr;
unsigned char tmp[64];
unsigned int i;
uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
if (!bytes) return;
if (!blocks) return;
j0 = input[0];
j1 = input[1];
j2 = input[2];
j3 = input[3];
j4 = input[4];
j5 = input[5];
j6 = input[6];
j7 = input[7];
j8 = input[8];
j9 = input[9];
j10 = input[10];
j11 = input[11];
j12 = input[12];
j13 = input[13];
j14 = input[14];
j15 = input[15];
j4 = input[0];
j5 = input[1];
j6 = input[2];
j7 = input[3];
j8 = input[4];
j9 = input[5];
j10 = input[6];
j11 = input[7];
j12 = input[8];
j13 = input[9];
j14 = input[10];
j15 = input[11];
for (;;) {
if (bytes < 64) {
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x0 = 0x61707865;
x1 = 0x3320646e;
x2 = 0x79622d32;
x3 = 0x6b206574;
x4 = j4;
x5 = j5;
x6 = j6;
@@ -134,10 +109,10 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
QUARTERROUND( x3, x4, x9,x14);
);
x0 += j0;
x1 += j1;
x2 += j2;
x3 += j3;
x0 += 0x61707865;
x1 += 0x3320646e;
x2 += 0x79622d32;
x3 += 0x6b206574;
x4 += j4;
x5 += j5;
x6 += j6;
@@ -171,59 +146,41 @@ void ChaCha20::Keystream(unsigned char* c, size_t bytes)
WriteLE32(c + 56, x14);
WriteLE32(c + 60, x15);
if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
input[12] = j12;
input[13] = j13;
if (blocks == 1) {
input[8] = j12;
input[9] = j13;
return;
}
bytes -= 64;
blocks -= 1;
c += 64;
}
}
void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
inline void ChaCha20Aligned::Crypt64(const unsigned char* m, unsigned char* c, size_t blocks)
{
uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
unsigned char *ctarget = nullptr;
unsigned char tmp[64];
unsigned int i;
uint32_t j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
if (!bytes) return;
if (!blocks) return;
j0 = input[0];
j1 = input[1];
j2 = input[2];
j3 = input[3];
j4 = input[4];
j5 = input[5];
j6 = input[6];
j7 = input[7];
j8 = input[8];
j9 = input[9];
j10 = input[10];
j11 = input[11];
j12 = input[12];
j13 = input[13];
j14 = input[14];
j15 = input[15];
j4 = input[0];
j5 = input[1];
j6 = input[2];
j7 = input[3];
j8 = input[4];
j9 = input[5];
j10 = input[6];
j11 = input[7];
j12 = input[8];
j13 = input[9];
j14 = input[10];
j15 = input[11];
for (;;) {
if (bytes < 64) {
// if m has fewer than 64 bytes available, copy m to tmp and
// read from tmp instead
for (i = 0;i < bytes;++i) tmp[i] = m[i];
m = tmp;
ctarget = c;
c = tmp;
}
x0 = j0;
x1 = j1;
x2 = j2;
x3 = j3;
x0 = 0x61707865;
x1 = 0x3320646e;
x2 = 0x79622d32;
x3 = 0x6b206574;
x4 = j4;
x5 = j5;
x6 = j6;
@@ -249,10 +206,10 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
QUARTERROUND( x3, x4, x9,x14);
);
x0 += j0;
x1 += j1;
x2 += j2;
x3 += j3;
x0 += 0x61707865;
x1 += 0x3320646e;
x2 += 0x79622d32;
x3 += 0x6b206574;
x4 += j4;
x5 += j5;
x6 += j6;
@@ -303,16 +260,65 @@ void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
WriteLE32(c + 56, x14);
WriteLE32(c + 60, x15);
if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
input[12] = j12;
input[13] = j13;
if (blocks == 1) {
input[8] = j12;
input[9] = j13;
return;
}
bytes -= 64;
blocks -= 1;
c += 64;
m += 64;
}
}
void ChaCha20::Keystream(unsigned char* c, size_t bytes)
{
if (!bytes) return;
if (m_bufleft) {
unsigned reuse = std::min<size_t>(m_bufleft, bytes);
memcpy(c, m_buffer + 64 - m_bufleft, reuse);
m_bufleft -= reuse;
bytes -= reuse;
c += reuse;
}
if (bytes >= 64) {
size_t blocks = bytes / 64;
m_aligned.Keystream64(c, blocks);
c += blocks * 64;
bytes -= blocks * 64;
}
if (bytes) {
m_aligned.Keystream64(m_buffer, 1);
memcpy(c, m_buffer, bytes);
m_bufleft = 64 - bytes;
}
}
void ChaCha20::Crypt(const unsigned char* m, unsigned char* c, size_t bytes)
{
if (!bytes) return;
if (m_bufleft) {
unsigned reuse = std::min<size_t>(m_bufleft, bytes);
for (unsigned i = 0; i < reuse; i++) {
c[i] = m[i] ^ m_buffer[64 - m_bufleft + i];
}
m_bufleft -= reuse;
bytes -= reuse;
c += reuse;
m += reuse;
}
if (bytes >= 64) {
size_t blocks = bytes / 64;
m_aligned.Crypt64(m, c, blocks);
c += blocks * 64;
m += blocks * 64;
bytes -= blocks * 64;
}
if (bytes) {
m_aligned.Keystream64(m_buffer, 1);
for (unsigned i = 0; i < bytes; i++) {
c[i] = m[i] ^ m_buffer[i];
}
m_bufleft = 64 - bytes;
}
}

View File

@@ -8,19 +8,69 @@
#include <cstdlib>
#include <stdint.h>
/** A class for ChaCha20 256-bit stream cipher developed by Daniel J. Bernstein
https://cr.yp.to/chacha/chacha-20080128.pdf */
// classes for ChaCha20 256-bit stream cipher developed by Daniel J. Bernstein
// https://cr.yp.to/chacha/chacha-20080128.pdf */
/** ChaCha20 cipher that only operates on multiples of 64 bytes. */
class ChaCha20Aligned
{
private:
uint32_t input[12];
public:
ChaCha20Aligned();
/** Initialize a cipher with specified 32-byte key. */
ChaCha20Aligned(const unsigned char* key32);
/** set 32-byte key. */
void SetKey32(const unsigned char* key32);
/** set the 64-bit nonce. */
void SetIV(uint64_t iv);
/** set the 64bit block counter (pos seeks to byte position 64*pos). */
void Seek64(uint64_t pos);
/** outputs the keystream of size <64*blocks> into <c> */
void Keystream64(unsigned char* c, size_t blocks);
/** enciphers the message <input> of length <64*blocks> and write the enciphered representation into <output>
* Used for encryption and decryption (XOR)
*/
void Crypt64(const unsigned char* input, unsigned char* output, size_t blocks);
};
/** Unrestricted ChaCha20 cipher. */
class ChaCha20
{
private:
uint32_t input[16];
ChaCha20Aligned m_aligned;
unsigned char m_buffer[64] = {0};
unsigned m_bufleft{0};
public:
ChaCha20();
ChaCha20(const unsigned char* key, size_t keylen);
void SetKey(const unsigned char* key, size_t keylen); //!< set key with flexible keylength; 256bit recommended */
void SetIV(uint64_t iv); // set the 64bit nonce
void Seek(uint64_t pos); // set the 64bit block counter
ChaCha20() = default;
/** Initialize a cipher with specified 32-byte key. */
ChaCha20(const unsigned char* key32) : m_aligned(key32) {}
/** set 32-byte key. */
void SetKey32(const unsigned char* key32)
{
m_aligned.SetKey32(key32);
m_bufleft = 0;
}
/** set the 64-bit nonce. */
void SetIV(uint64_t iv) { m_aligned.SetIV(iv); }
/** set the 64bit block counter (pos seeks to byte position 64*pos). */
void Seek64(uint64_t pos)
{
m_aligned.Seek64(pos);
m_bufleft = 0;
}
/** outputs the keystream of size <bytes> into <c> */
void Keystream(unsigned char* c, size_t bytes);

View File

@@ -36,8 +36,9 @@ ChaCha20Poly1305AEAD::ChaCha20Poly1305AEAD(const unsigned char* K_1, size_t K_1_
assert(K_1_len == CHACHA20_POLY1305_AEAD_KEY_LEN);
assert(K_2_len == CHACHA20_POLY1305_AEAD_KEY_LEN);
m_chacha_header.SetKey(K_1, CHACHA20_POLY1305_AEAD_KEY_LEN);
m_chacha_main.SetKey(K_2, CHACHA20_POLY1305_AEAD_KEY_LEN);
static_assert(CHACHA20_POLY1305_AEAD_KEY_LEN == 32);
m_chacha_header.SetKey32(K_1);
m_chacha_main.SetKey32(K_2);
// set the cached sequence number to uint64 max which hints for an unset cache.
// we can't hit uint64 max since the rekey rule (which resets the sequence number) is 1GB
@@ -62,7 +63,7 @@ bool ChaCha20Poly1305AEAD::Crypt(uint64_t seqnr_payload, uint64_t seqnr_aad, int
// block counter 0 for the poly1305 key
// use lower 32bytes for the poly1305 key
// (throws away 32 unused bytes (upper 32) from this ChaCha20 round)
m_chacha_main.Seek(0);
m_chacha_main.Seek64(0);
m_chacha_main.Crypt(poly_key, poly_key, sizeof(poly_key));
// if decrypting, verify the tag prior to decryption
@@ -85,7 +86,7 @@ bool ChaCha20Poly1305AEAD::Crypt(uint64_t seqnr_payload, uint64_t seqnr_aad, int
if (m_cached_aad_seqnr != seqnr_aad) {
m_cached_aad_seqnr = seqnr_aad;
m_chacha_header.SetIV(seqnr_aad);
m_chacha_header.Seek(0);
m_chacha_header.Seek64(0);
m_chacha_header.Keystream(m_aad_keystream_buffer, CHACHA20_ROUND_OUTPUT);
}
// crypt the AAD (3 bytes message length) with given position in AAD cipher instance keystream
@@ -94,7 +95,7 @@ bool ChaCha20Poly1305AEAD::Crypt(uint64_t seqnr_payload, uint64_t seqnr_aad, int
dest[2] = src[2] ^ m_aad_keystream_buffer[aad_pos + 2];
// Set the playload ChaCha instance block counter to 1 and crypt the payload
m_chacha_main.Seek(1);
m_chacha_main.Seek64(1);
m_chacha_main.Crypt(src + CHACHA20_POLY1305_AEAD_AAD_LEN, dest + CHACHA20_POLY1305_AEAD_AAD_LEN, src_len - CHACHA20_POLY1305_AEAD_AAD_LEN);
// If encrypting, calculate and append tag
@@ -117,7 +118,7 @@ bool ChaCha20Poly1305AEAD::GetLength(uint32_t* len24_out, uint64_t seqnr_aad, in
// we need to calculate the 64 keystream bytes since we reached a new aad sequence number
m_cached_aad_seqnr = seqnr_aad;
m_chacha_header.SetIV(seqnr_aad); // use LE for the nonce
m_chacha_header.Seek(0); // block counter 0
m_chacha_header.Seek64(0); // block counter 0
m_chacha_header.Keystream(m_aad_keystream_buffer, CHACHA20_ROUND_OUTPUT); // write keystream to the cache
}

View File

@@ -299,7 +299,7 @@ Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
unsigned char tmp[Num3072::BYTE_SIZE];
uint256 hashed_in{(HashWriter{} << in).GetSHA256()};
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, Num3072::BYTE_SIZE);
ChaCha20Aligned(hashed_in.data()).Keystream64(tmp, Num3072::BYTE_SIZE / 64);
Num3072 out{tmp};
return out;