Merge bitcoin/bitcoin#35156: dbwrapper: reuse scratch DataStream buffers

032223f403 dbwrapper: reuse iterator scratch stream (Lőrinc)
7403c0f907 dbwrapper: guard `CDBBatch` scratch streams (Lőrinc)
cb1ab0a716 test: cover repeated dbwrapper stream use (Lőrinc)
31ce729b28 streams: add `ScopedDataStreamUsage` (Lőrinc)

Pull request description:

  ### Problem
  `CDBIterator::GetValue()` cannot use `SpanReader` the same way as `::GetKey()` because values are deobfuscated in place before deserialization, so it still needs an owning mutable buffer.
  However, the current path allocates a fresh `DataStream` for every value read.

  The same local-stream pattern also exists in `CDBIterator::Seek()`, while `CDBBatch` already owns reusable key/value buffers but still manually reserves and clears them on every `Write()` and `Erase()` call.

  ### Fix
  Add `ScopedDataStreamUsage`, a small RAII helper for caller-owned scratch streams.
  It asserts that the stream is empty on entry (making accidental re-entry or concurrent use of the same scratch stream fail fast), and clears it on scope exit.

  Use it to guard the reusable scratch streams in `CDBBatch::Write()`, `::Erase()` and `CDBIterator::Seek()`, `::GetValue()`.
  The const read-side `CDBWrapper` helpers stay unchanged, since they can be called concurrently on the same wrapper and should keep using local streams.

  The production changes are preceded by tests covering repeated reuse on the same owning objects, including a failed iterator value decode followed by a successful read from the same iterator entry.

  ### Context
  Follow-up to #35128, #34483 and #35025.

  ### Reproducer
  `gettxoutsetinfo` gets an additional ~6% speedup on top of the previous iterator-key optimization:

  <details><summary>2026-04-24 | gettxoutsetinfo | i9-ssd | x86_64 | Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz | 16 cores | 62Gi RAM | xfs | SSD</summary>

  ```bash
  COMMITS="2d5ab09f0dca4bfec0b365f5f431def2c0c9d70f 9e5fd595ae8ebeec74678c31a547fbc14f87bf89"; \
  BASE_DIR="/mnt/my_storage"; DATA_DIR="$BASE_DIR/BitcoinData"; LOG_DIR="$BASE_DIR/logs"; \
  mkdir -p "$LOG_DIR" && \
  (echo ""; for c in $COMMITS; do git cat-file -e "$c^{commit}" 2>/dev/null || git fetch -q origin "$c" || exit 1; git log -1 --pretty='%h %s' "$c" || exit 1; done) && \
  (echo "" && echo "$(date -I) | gettxoutsetinfo | $(hostname) | $(uname -m) | $(lscpu | grep 'Model name' | head -1 | cut -d: -f2 | xargs) | $(nproc) cores | $(free -h | awk '/^Mem:/{print $2}') RAM | $(df -T $BASE_DIR | awk 'NR==2{print $2}') | $(lsblk -no ROTA $(df --output=source $BASE_DIR | tail -1) | grep -q 1 && echo HDD || echo SSD)"; echo "") && \
  hyperfine \
    --sort command \
    --runs 3 \
    --export-json "$BASE_DIR/gettxoutsetinfo-$(sed -E 's/([a-f0-9]{8})[a-f0-9]* ?/\1-/g;s/-$//'<<<"$COMMITS")-$(date +%s).json" \
    --parameter-list COMMIT ${COMMITS// /,} \
    --prepare "killall -9 bitcoind 2>/dev/null || true; rm -f $DATA_DIR/debug.log; git clean -fxd && git reset --hard {COMMIT} && \
      cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release && ninja -C build bitcoind bitcoin-cli -j$(nproc) && \
      ./build/bin/bitcoind -datadir=$DATA_DIR -connect=0 -listen=0 -dnsseed=0 -coinstatsindex=0 -txindex=0 -blockfilterindex=0 -daemon -printtoconsole=0; \
      ./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcwait getblockcount >/dev/null" \
    --conclude "./build/bin/bitcoin-cli -datadir=$DATA_DIR stop 2>/dev/null || true; killall bitcoind 2>/dev/null || true; sleep 10; \
      grep -q 'Done loading' $DATA_DIR/debug.log && grep 'Bitcoin Core version' $DATA_DIR/debug.log | grep -q \"\$(git rev-parse --short=12 {COMMIT})\"; \
      cp $DATA_DIR/debug.log $LOG_DIR/gettxoutsetinfo-{COMMIT}-$(date +%s).log" \
    "./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null"

  2d5ab09f0d Merge bitcoin/bitcoin#35124: bench: fix benchmark fixtures and setup checks
  cb63e158d9 walletdb: reuse batch scratch streams

  2026-04-24 | gettxoutsetinfo | i9-ssd | x86_64 | Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz | 16 cores | 62Gi RAM | xfs | SSD

  Benchmark 1: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 2d5ab09f0d)
    Time (mean ± σ):     60.063 s ±  1.623 s    [User: 0.001 s, System: 0.002 s]
    Range (min … max):   59.020 s … 61.933 s    3 runs

  Benchmark 2: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = cb63e158d9ae8276dbe54bba0b0cf8f35378ec71)
    Time (mean ± σ):     56.853 s ±  0.179 s    [User: 0.002 s, System: 0.001 s]
    Range (min … max):   56.675 s … 57.033 s    3 runs

  Relative speed comparison
          1.06 ±  0.03  ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 2d5ab09f0d)
          1.00          ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = cb63e158d9ae8276dbe54bba0b0cf8f35378ec71)
  ```

  </details>

ACKs for top commit:
  andrewtoth:
    re-ACK 032223f403
  CruzMolina:
    tACK 032223f403
  achow101:
    ACK 032223f403
  optout21:
    ACK 032223f403
  sedited:
    ACK 032223f403

Tree-SHA512: 6ed51d1a492ca216108b10c01668b01f986260641714951da1d282f1dacf87f0df2b312108f24c06151d3b81eaa4ca6eb4e9ab4e2d829346b0e8f07d0c569a1e
This commit is contained in:
Ava Chow
2026-05-13 14:27:11 -07:00
5 changed files with 89 additions and 28 deletions

View File

@@ -163,6 +163,8 @@ CDBBatch::CDBBatch(const CDBWrapper& _parent)
: parent{_parent},
m_impl_batch{std::make_unique<CDBBatch::WriteBatchImpl>()}
{
m_key_scratch.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
m_value_scratch.reserve(DBWRAPPER_PREALLOC_VALUE_SIZE);
Clear();
};
@@ -171,13 +173,15 @@ CDBBatch::~CDBBatch() = default;
void CDBBatch::Clear()
{
m_impl_batch->batch.Clear();
assert(m_key_scratch.empty());
assert(m_value_scratch.empty());
}
void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& ssValue)
void CDBBatch::WriteImpl(std::span<const std::byte> key, DataStream& value)
{
leveldb::Slice slKey(CharCast(key.data()), key.size());
dbwrapper_private::GetObfuscation(parent)(ssValue);
leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size());
dbwrapper_private::GetObfuscation(parent)(value);
leveldb::Slice slValue(CharCast(value.data()), value.size());
m_impl_batch->batch.Put(slKey, slValue);
}
@@ -356,7 +360,10 @@ struct CDBIterator::IteratorImpl {
};
CDBIterator::CDBIterator(const CDBWrapper& _parent, std::unique_ptr<IteratorImpl> _piter) : parent(_parent),
m_impl_iter(std::move(_piter)) {}
m_impl_iter(std::move(_piter))
{
m_scratch.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
}
CDBIterator* CDBWrapper::NewIterator()
{

View File

@@ -79,10 +79,10 @@ private:
struct WriteBatchImpl;
const std::unique_ptr<WriteBatchImpl> m_impl_batch;
DataStream ssKey{};
DataStream ssValue{};
DataStream m_key_scratch{};
DataStream m_value_scratch{};
void WriteImpl(std::span<const std::byte> key, DataStream& ssValue);
void WriteImpl(std::span<const std::byte> key, DataStream& value);
void EraseImpl(std::span<const std::byte> key);
public:
@@ -96,22 +96,18 @@ public:
template <typename K, typename V>
void Write(const K& key, const V& value)
{
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssValue.reserve(DBWRAPPER_PREALLOC_VALUE_SIZE);
ssKey << key;
ssValue << value;
WriteImpl(ssKey, ssValue);
ssKey.clear();
ssValue.clear();
ScopedDataStreamUsage scoped_key{m_key_scratch}, scoped_value{m_value_scratch};
m_key_scratch << key;
m_value_scratch << value;
WriteImpl(m_key_scratch, m_value_scratch);
}
template <typename K>
void Erase(const K& key)
{
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
EraseImpl(ssKey);
ssKey.clear();
ScopedDataStreamUsage scoped_key{m_key_scratch};
m_key_scratch << key;
EraseImpl(m_key_scratch);
}
size_t ApproximateSize() const;
@@ -125,6 +121,7 @@ public:
private:
const CDBWrapper &parent;
const std::unique_ptr<IteratorImpl> m_impl_iter;
DataStream m_scratch{};
void SeekImpl(std::span<const std::byte> key);
std::span<const std::byte> GetKeyImpl() const;
@@ -144,10 +141,9 @@ public:
void SeekToFirst();
template<typename K> void Seek(const K& key) {
DataStream ssKey{};
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
SeekImpl(ssKey);
ScopedDataStreamUsage scoped_scratch{m_scratch};
m_scratch << key;
SeekImpl(m_scratch);
}
void Next();
@@ -164,9 +160,10 @@ public:
template<typename V> bool GetValue(V& value) {
try {
DataStream ssValue{GetValueImpl()};
dbwrapper_private::GetObfuscation(parent)(ssValue);
ssValue >> value;
ScopedDataStreamUsage scoped_scratch{m_scratch};
m_scratch.write(GetValueImpl());
dbwrapper_private::GetObfuscation(parent)(m_scratch);
m_scratch >> value;
} catch (const std::exception&) {
return false;
}

View File

@@ -265,6 +265,20 @@ public:
size_t GetMemoryUsage() const noexcept;
};
// Require empty scratch streams on entry and reset them on exit.
class ScopedDataStreamUsage
{
DataStream& m_stream;
public:
explicit ScopedDataStreamUsage(DataStream& stream) : m_stream{stream} { assert(m_stream.empty()); }
ScopedDataStreamUsage(const ScopedDataStreamUsage&) = delete;
ScopedDataStreamUsage& operator=(const ScopedDataStreamUsage&) = delete;
~ScopedDataStreamUsage() { m_stream.clear(); }
};
template <typename IStream>
class BitStreamReader
{

View File

@@ -184,6 +184,13 @@ BOOST_AUTO_TEST_CASE(dbwrapper_batch)
// key3 should've never been written
BOOST_CHECK(dbw.Read(key3, res) == false);
batch.Clear();
batch.Write(key3, in3);
dbw.WriteBatch(batch);
BOOST_CHECK(dbw.Read(key3, res));
BOOST_CHECK_EQUAL(res.ToString(), in3.ToString());
}
}
@@ -212,18 +219,36 @@ BOOST_AUTO_TEST_CASE(dbwrapper_iterator)
BOOST_CHECK(!it->GetKey(key_too_large));
uint8_t key_res;
uint256 val_res;
BOOST_REQUIRE(it->GetKey(key_res));
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(key_res, key);
// A failed value decode must not leave the iterator's scratch stream dirty.
std::pair<uint256, uint8_t> value_too_large;
BOOST_CHECK(!it->GetValue(value_too_large));
uint256 val_res;
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(val_res.ToString(), in.ToString());
it->Seek(key2);
BOOST_REQUIRE(it->GetKey(key_res));
BOOST_CHECK_EQUAL(key_res, key2);
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(val_res.ToString(), in2.ToString());
it->Seek(key);
BOOST_REQUIRE(it->GetKey(key_res));
BOOST_CHECK_EQUAL(key_res, key);
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(val_res.ToString(), in.ToString());
it->Next();
BOOST_REQUIRE(it->GetKey(key_res));
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(key_res, key2);
BOOST_REQUIRE(it->GetValue(val_res));
BOOST_CHECK_EQUAL(val_res.ToString(), in2.ToString());
it->Next();

View File

@@ -88,6 +88,24 @@ BOOST_AUTO_TEST_CASE(obfuscation_empty)
BOOST_CHECK(non_null_obf);
}
BOOST_AUTO_TEST_CASE(streams_scoped_data_stream_usage)
{
DataStream stream{};
{
ScopedDataStreamUsage usage{stream};
stream << uint8_t{42};
BOOST_CHECK_GT(stream.size(), 0U);
}
BOOST_CHECK(stream.empty());
{
ScopedDataStreamUsage usage{stream};
stream << uint16_t{42};
BOOST_CHECK_GT(stream.size(), 0U);
}
BOOST_CHECK(stream.empty());
}
BOOST_AUTO_TEST_CASE(xor_file)
{
fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"};