Files
bitcoin/src/test/fuzz/integer.cpp
Ava Chow 8c07800b19 Merge bitcoin/bitcoin#32497: merkle: pre‑reserve leaves to prevent reallocs with odd vtx count
3dd815f048 validation: pre-reserve leaves to prevent reallocs with odd vtx count (Lőrinc)
7fd47e0e56 bench: make `MerkleRoot` benchmark more representative (Lőrinc)
f0a2183108 test: adjust `ComputeMerkleRoot` tests (Lőrinc)

Pull request description:

  #### Summary

  `ComputeMerkleRoot` [duplicates the last hash](39b6c139bd/src/consensus/merkle.cpp (L54-L56)) when the input size is odd. If the caller provides a `std::vector` whose capacity equals its size, that extra `push_back` forces a reallocation, doubling its capacity (causing peak memory usage of 3x the necessary size).

  This affects roughly half of the created blocks (those with odd transaction counts), causing unnecessary memory fragmentation during every block validation.

  #### Fix

  * Pre-reserves vector capacity to account for the odd-count duplication using `(size + 1) & ~1ULL`.
      * This syntax produces [optimal assembly](https://github.com/bitcoin/bitcoin/pull/32497#discussion_r2553107836) across x86/ARM and 32/64-bit platforms for GCC & Clang.
  * Eliminates default construction of `uint256` objects that are immediately overwritten by switching from `resize` to `reserve` + `push_back`.

  #### Memory Impact

  [Memory profiling](https://github.com/bitcoin/bitcoin/pull/32497#issuecomment-3563724551) shows **50% reduction in peak allocation** (576KB → 288KB) and elimination of reallocation overhead.

  #### Validation

  The benchmark was updated to use an odd leaf count to demonstrate the real-world scenario where the reallocation occurs.

  A full `-reindex-chainstate` up to block **896 408** ran without triggering the asserts.

  <details>
  <summary>Validation asserts</summary>

  Temporary asserts (not included in this PR) confirm that `push_back` never reallocates and that the coinbase witness hash remains null:
  ```cpp
  if (hashes.size() & 1) {
      assert(hashes.size() < hashes.capacity()); // TODO remove
      hashes.push_back(hashes.back());
  }

  leaves.reserve((block.vtx.size() + 1) & ~1ULL); // capacity rounded up to even
  leaves.emplace_back();
  assert(leaves.back().IsNull()); // TODO remove
  ```

  </details>

  #### Benchmark Performance

  While the main purpose is to improve predictability, the reduced memory operations also improve hashing throughput slightly.

ACKs for top commit:
  achow101:
    ACK 3dd815f048
  optout21:
    reACK 3dd815f048
  hodlinator:
    re-ACK 3dd815f048
  vasild:
    ACK 3dd815f048
  w0xlt:
    ACK 3dd815f048 with minor nits.
  danielabrozzoni:
    Code review ACK 3dd815f048

Tree-SHA512: e7b578f9deadc0de7d61c062c7f65c5e1d347548ead4a4bb74b056396ad7df3f1c564327edc219670e6e2b2cb51f4e1ccfd4f58dd414aeadf2008d427065c11f
2026-01-20 15:47:17 -08:00

255 lines
8.6 KiB
C++

// Copyright (c) 2019-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <arith_uint256.h>
#include <common/args.h>
#include <common/system.h>
#include <compressor.h>
#include <consensus/amount.h>
#include <consensus/merkle.h>
#include <core_io.h>
#include <crypto/common.h>
#include <crypto/siphash.h>
#include <key_io.h>
#include <memusage.h>
#include <netbase.h>
#include <policy/policy.h>
#include <policy/settings.h>
#include <pow.h>
#include <protocol.h>
#include <pubkey.h>
#include <script/script.h>
#include <serialize.h>
#include <streams.h>
#include <test/fuzz/FuzzedDataProvider.h>
#include <test/fuzz/fuzz.h>
#include <test/fuzz/util.h>
#include <uint256.h>
#include <univalue.h>
#include <util/chaintype.h>
#include <util/check.h>
#include <util/moneystr.h>
#include <util/overflow.h>
#include <util/strencodings.h>
#include <util/string.h>
#include <cassert>
#include <chrono>
#include <limits>
#include <set>
#include <vector>
using util::ToString;
void initialize_integer()
{
SelectParams(ChainType::REGTEST);
}
FUZZ_TARGET(integer, .init = initialize_integer)
{
if (buffer.size() < sizeof(uint256) + sizeof(uint160)) {
return;
}
FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size());
const uint256 u256(fuzzed_data_provider.ConsumeBytes<unsigned char>(sizeof(uint256)));
const uint160 u160(fuzzed_data_provider.ConsumeBytes<unsigned char>(sizeof(uint160)));
const uint64_t u64 = fuzzed_data_provider.ConsumeIntegral<uint64_t>();
const int64_t i64 = fuzzed_data_provider.ConsumeIntegral<int64_t>();
const uint32_t u32 = fuzzed_data_provider.ConsumeIntegral<uint32_t>();
const int32_t i32 = fuzzed_data_provider.ConsumeIntegral<int32_t>();
const uint16_t u16 = fuzzed_data_provider.ConsumeIntegral<uint16_t>();
const int16_t i16 = fuzzed_data_provider.ConsumeIntegral<int16_t>();
const uint8_t u8 = fuzzed_data_provider.ConsumeIntegral<uint8_t>();
const int8_t i8 = fuzzed_data_provider.ConsumeIntegral<int8_t>();
// We cannot assume a specific value of std::is_signed_v<char>:
// ConsumeIntegral<char>() instead of casting from {u,}int8_t.
const char ch = fuzzed_data_provider.ConsumeIntegral<char>();
const bool b = fuzzed_data_provider.ConsumeBool();
const Consensus::Params& consensus_params = Params().GetConsensus();
(void)CheckProofOfWorkImpl(u256, u32, consensus_params);
if (u64 <= MAX_MONEY) {
const uint64_t compressed_money_amount = CompressAmount(u64);
assert(u64 == DecompressAmount(compressed_money_amount));
static const uint64_t compressed_money_amount_max = CompressAmount(MAX_MONEY - 1);
assert(compressed_money_amount <= compressed_money_amount_max);
} else {
(void)CompressAmount(u64);
}
constexpr uint256 u256_min{"0000000000000000000000000000000000000000000000000000000000000000"};
constexpr uint256 u256_max{"ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"};
std::vector v256{u256, u256_min, u256_max};
(void)ComputeMerkleRoot(std::move(v256));
(void)DecompressAmount(u64);
{
if (std::optional<CAmount> parsed = ParseMoney(FormatMoney(i64))) {
assert(parsed.value() == i64);
}
}
(void)GetSizeOfCompactSize(u64);
(void)GetSpecialScriptSize(u32);
if (!MultiplicationOverflow(i64, static_cast<int64_t>(u32)) && !AdditionOverflow(i64, static_cast<int64_t>(4)) && !AdditionOverflow(i64 * u32, static_cast<int64_t>(4))) {
(void)GetVirtualTransactionSize(i64, i64, u32);
}
(void)HexDigit(ch);
(void)MoneyRange(i64);
(void)ToString(i64);
(void)IsDigit(ch);
(void)IsSpace(ch);
(void)IsSwitchChar(ch);
(void)memusage::DynamicUsage(ch);
(void)memusage::DynamicUsage(i16);
(void)memusage::DynamicUsage(i32);
(void)memusage::DynamicUsage(i64);
(void)memusage::DynamicUsage(i8);
(void)memusage::DynamicUsage(u16);
(void)memusage::DynamicUsage(u32);
(void)memusage::DynamicUsage(u64);
(void)memusage::DynamicUsage(u8);
const unsigned char uch = static_cast<unsigned char>(u8);
(void)memusage::DynamicUsage(uch);
{
const std::set<int64_t> i64s{i64, static_cast<int64_t>(u64)};
const size_t dynamic_usage = memusage::DynamicUsage(i64s);
const size_t incremental_dynamic_usage = memusage::IncrementalDynamicUsage(i64s);
assert(dynamic_usage == incremental_dynamic_usage * i64s.size());
}
(void)MillisToTimeval(i64);
(void)SighashToStr(uch);
(void)PresaltedSipHasher(u64, u64)(u256);
(void)PresaltedSipHasher(u64, u64)(u256, u32);
(void)ToLower(ch);
(void)ToUpper(ch);
{
if (std::optional<CAmount> parsed = ParseMoney(ValueFromAmount(i64).getValStr())) {
assert(parsed.value() == i64);
}
}
if (i32 >= 0 && i32 <= 16) {
assert(i32 == CScript::DecodeOP_N(CScript::EncodeOP_N(i32)));
}
const std::chrono::seconds seconds{i64};
assert(count_seconds(seconds) == i64);
const CScriptNum script_num{i64};
(void)script_num.getint();
(void)script_num.getvch();
const arith_uint256 au256 = UintToArith256(u256);
assert(ArithToUint256(au256) == u256);
assert(uint256::FromHex(au256.GetHex()).value() == u256);
(void)au256.bits();
(void)au256.GetCompact(/* fNegative= */ false);
(void)au256.GetCompact(/* fNegative= */ true);
(void)au256.getdouble();
(void)au256.GetHex();
(void)au256.GetLow64();
(void)au256.size();
(void)au256.ToString();
const CKeyID key_id{u160};
const CScriptID script_id{u160};
{
DataStream stream{};
uint256 deserialized_u256;
stream << u256;
stream >> deserialized_u256;
assert(u256 == deserialized_u256 && stream.empty());
uint160 deserialized_u160;
stream << u160;
stream >> deserialized_u160;
assert(u160 == deserialized_u160 && stream.empty());
uint64_t deserialized_u64;
stream << u64;
stream >> deserialized_u64;
assert(u64 == deserialized_u64 && stream.empty());
int64_t deserialized_i64;
stream << i64;
stream >> deserialized_i64;
assert(i64 == deserialized_i64 && stream.empty());
uint32_t deserialized_u32;
stream << u32;
stream >> deserialized_u32;
assert(u32 == deserialized_u32 && stream.empty());
int32_t deserialized_i32;
stream << i32;
stream >> deserialized_i32;
assert(i32 == deserialized_i32 && stream.empty());
uint16_t deserialized_u16;
stream << u16;
stream >> deserialized_u16;
assert(u16 == deserialized_u16 && stream.empty());
int16_t deserialized_i16;
stream << i16;
stream >> deserialized_i16;
assert(i16 == deserialized_i16 && stream.empty());
uint8_t deserialized_u8;
stream << u8;
stream >> deserialized_u8;
assert(u8 == deserialized_u8 && stream.empty());
int8_t deserialized_i8;
stream << i8;
stream >> deserialized_i8;
assert(i8 == deserialized_i8 && stream.empty());
bool deserialized_b;
stream << b;
stream >> deserialized_b;
assert(b == deserialized_b && stream.empty());
}
{
const ServiceFlags service_flags = (ServiceFlags)u64;
(void)MayHaveUsefulAddressDB(service_flags);
}
{
DataStream stream{};
ser_writedata64(stream, u64);
const uint64_t deserialized_u64 = ser_readdata64(stream);
assert(u64 == deserialized_u64 && stream.empty());
ser_writedata32(stream, u32);
const uint32_t deserialized_u32 = ser_readdata32(stream);
assert(u32 == deserialized_u32 && stream.empty());
ser_writedata32be(stream, u32);
const uint32_t deserialized_u32be = ser_readdata32be(stream);
assert(u32 == deserialized_u32be && stream.empty());
ser_writedata16(stream, u16);
const uint16_t deserialized_u16 = ser_readdata16(stream);
assert(u16 == deserialized_u16 && stream.empty());
ser_writedata8(stream, u8);
const uint8_t deserialized_u8 = ser_readdata8(stream);
assert(u8 == deserialized_u8 && stream.empty());
}
{
DataStream stream{};
WriteCompactSize(stream, u64);
try {
const uint64_t deserialized_u64 = ReadCompactSize(stream);
assert(u64 == deserialized_u64 && stream.empty());
} catch (const std::ios_base::failure&) {
}
}
}