Files
bitcoin/src/dbwrapper.h
Ava Chow 290e48fbf0 Merge bitcoin/bitcoin#35128: dbwrapper: avoid copying CDBIterator keys in GetKey()
5de2f97a05 dbwrapper: use `SpanReader` for iterator keys (Lőrinc)
f0e498af5c test: cover failed `CDBIterator::GetKey()` deserialization (Lőrinc)

Pull request description:

  ### Problem
  `CDBIterator::GetKey()` only deserializes the current LevelDB key once and `GetKeyImpl()` already exposes that key as a contiguous borrowed byte span, and `GetKey()` creates a fresh local reader and only performs immediate forward reads before returning.

  The copied `DataStream` currently insulates the iterator entry from a failed decode, so switching to a borrowed reader is only safe if a deserialization failure still returns false and leaves the same key/value readable afterward.

  > [!NOTE]
  > The same simplification does not apply to `GetValue()`, because that path deobfuscates the value bytes in place first and still needs an owning mutable buffer.

  ### Fix
  Add a preparatory test with an invalid reads and checks that the failed decode [does not consume](eb85cacd29/src/leveldb/include/leveldb/iterator.h (L60-L62)) the current iterator entry.
  Then switch `GetKey()` to `SpanReader` so the key bytes are read in place instead of being copied into a temporary `DataStream`.

  This keeps the same exception swallowing and `bool` return semantics while avoiding the extra allocation and copy.

  ### Context
  Related to https://github.com/bitcoin/bitcoin/pull/34483 and https://github.com/bitcoin/bitcoin/pull/35025

  ### Reproducer
  `gettxoutsetinfo` is ~10-12% faster for up-to-date blocks (run on SSD), see:

  <details><summary>2026-04-20 | gettxoutsetinfo | rpi5-8 | aarch64 | Cortex-A76 | 4 cores | 7.7Gi RAM | ext4 | SSD</summary>

  ```
  COMMITS="64a88c8c1edc7ee5cef623d9aa8179a239e27ce9 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf"; \
  BASE_DIR="/mnt/my_storage"; DATA_DIR="$BASE_DIR/BitcoinData"; LOG_DIR="$BASE_DIR/logs"; \
  mkdir -p "$LOG_DIR" && \
  (echo ""; for c in $COMMITS; do git cat-file -e "$c^{commit}" 2>/dev/null || git fetch -q origin "$c" || exit 1; git log -1 --pretty='%h %s' "$c" || exit 1; done) && \
  (echo "" && echo "$(date -I) | gettxoutsetinfo | $(hostname) | $(uname -m) | $(lscpu | grep 'Model name' | head -1 | cut -d: -f2 | xargs) | $(nproc) cores | $(free -h | awk '/^Mem:/{print $2}') RAM | $(df -T $BASE_DIR | awk 'NR==2{print $2}') | $(lsblk -no ROTA $(df --output=source $BASE_DIR | tail -1) | grep -q 1 && echo HDD || echo SSD)"; echo "") && \
  hyperfine \
    --sort command \
    --runs 10 \
    --export-json "$BASE_DIR/gettxoutsetinfo-$(sed -E 's/([a-f0-9]{8})[a-f0-9]* ?/\1-/g;s/-$//'<<<"$COMMITS")-$(date +%s).json" \
    --parameter-list COMMIT ${COMMITS// /,} \
    --prepare "killall -9 bitcoind 2>/dev/null || true; rm -f $DATA_DIR/debug.log; git clean -fxd && git reset --hard {COMMIT} && \
      cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release && ninja -C build bitcoind bitcoin-cli -j$(nproc) && \
      ./build/bin/bitcoind -datadir=$DATA_DIR -connect=0 -listen=0 -dnsseed=0 -coinstatsindex=0 -txindex=0 -blockfilterindex=0 -daemon -printtoconsole=0; \
      ./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcwait getblockcount >/dev/null" \
    --conclude "./build/bin/bitcoin-cli -datadir=$DATA_DIR stop 2>/dev/null || true; killall bitcoind 2>/dev/null || true; sleep 10; \
      grep -q 'Done loading' $DATA_DIR/debug.log && grep 'Bitcoin Core version' $DATA_DIR/debug.log | grep -q \"\$(git rev-parse --short=12 {COMMIT})\"; \
      cp $DATA_DIR/debug.log $LOG_DIR/gettxoutsetinfo-{COMMIT}-$(date +%s).log" \
    "./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null"

  64a88c8c1e Merge bitcoin/bitcoin#35096: kernel: align height parameters to int32_t in btck API
  57dc0202dd dbwrapper: use SpanReader for iterator keys

  Benchmark 1: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 64a88c8c1e)
    Time (mean ± σ):     109.002 s ±  3.091 s    [User: 0.003 s, System: 0.004 s]
    Range (min … max):   106.191 s … 113.608 s    10 runs

  Benchmark 2: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf)
    Time (mean ± σ):     97.711 s ±  1.172 s    [User: 0.003 s, System: 0.004 s]
    Range (min … max):   96.651 s … 100.104 s    10 runs

  Relative speed comparison
          1.12 ±  0.03  ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 64a88c8c1e)
          1.00          ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf)
  ```

  </details>

ACKs for top commit:
  achow101:
    ACK 5de2f97a05
  sedited:
    ACK 5de2f97a05
  andrewtoth:
    ACK 5de2f97a05
  optout21:
    ACK 5de2f97a05
  theStack:
    ACK 5de2f97a05

Tree-SHA512: 33b62149625b3ce2a378be9b4dffa361f11e324a2768e460c549b9b704efa78bf96ef5e24487d0cec82c18dafff6ba4571c06ad545684cf8738f38b9d21e9b0c
2026-04-23 11:50:44 -07:00

277 lines
7.3 KiB
C++

// Copyright (c) 2012-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_DBWRAPPER_H
#define BITCOIN_DBWRAPPER_H
#include <attributes.h>
#include <serialize.h>
#include <span.h>
#include <streams.h>
#include <util/byte_units.h>
#include <util/check.h>
#include <util/fs.h>
#include <cstddef>
#include <exception>
#include <memory>
#include <optional>
#include <stdexcept>
#include <string>
static const size_t DBWRAPPER_PREALLOC_KEY_SIZE = 64;
static const size_t DBWRAPPER_PREALLOC_VALUE_SIZE = 1024;
static const size_t DBWRAPPER_MAX_FILE_SIZE{32_MiB};
//! User-controlled performance and debug options.
struct DBOptions {
//! Compact database on startup.
bool force_compact = false;
};
//! Application-specific storage settings.
struct DBParams {
//! Location in the filesystem where leveldb data will be stored.
fs::path path;
//! Configures various leveldb cache settings.
size_t cache_bytes;
//! If true, use leveldb's memory environment.
bool memory_only = false;
//! If true, remove all existing data.
bool wipe_data = false;
//! If true, store data obfuscated via simple XOR. If false, XOR with a
//! zero'd byte array.
bool obfuscate = false;
//! Passed-through options.
DBOptions options{};
};
class dbwrapper_error : public std::runtime_error
{
public:
explicit dbwrapper_error(const std::string& msg) : std::runtime_error(msg) {}
};
class CDBWrapper;
/** These should be considered an implementation detail of the specific database.
*/
namespace dbwrapper_private {
/** Work around circular dependency, as well as for testing in dbwrapper_tests.
* Database obfuscation should be considered an implementation detail of the
* specific database.
*/
const Obfuscation& GetObfuscation(const CDBWrapper&);
}; // namespace dbwrapper_private
bool DestroyDB(const std::string& path_str);
/** Batch of changes queued to be written to a CDBWrapper */
class CDBBatch
{
friend class CDBWrapper;
private:
const CDBWrapper &parent;
struct WriteBatchImpl;
const std::unique_ptr<WriteBatchImpl> m_impl_batch;
DataStream ssKey{};
DataStream ssValue{};
void WriteImpl(std::span<const std::byte> key, DataStream& ssValue);
void EraseImpl(std::span<const std::byte> key);
public:
/**
* @param[in] _parent CDBWrapper that this batch is to be submitted to
*/
explicit CDBBatch(const CDBWrapper& _parent);
~CDBBatch();
void Clear();
template <typename K, typename V>
void Write(const K& key, const V& value)
{
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssValue.reserve(DBWRAPPER_PREALLOC_VALUE_SIZE);
ssKey << key;
ssValue << value;
WriteImpl(ssKey, ssValue);
ssKey.clear();
ssValue.clear();
}
template <typename K>
void Erase(const K& key)
{
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
EraseImpl(ssKey);
ssKey.clear();
}
size_t ApproximateSize() const;
};
class CDBIterator
{
public:
struct IteratorImpl;
private:
const CDBWrapper &parent;
const std::unique_ptr<IteratorImpl> m_impl_iter;
void SeekImpl(std::span<const std::byte> key);
std::span<const std::byte> GetKeyImpl() const;
std::span<const std::byte> GetValueImpl() const;
public:
/**
* @param[in] _parent Parent CDBWrapper instance.
* @param[in] _piter The original leveldb iterator.
*/
CDBIterator(const CDBWrapper& _parent, std::unique_ptr<IteratorImpl> _piter);
~CDBIterator();
bool Valid() const;
void SeekToFirst();
template<typename K> void Seek(const K& key) {
DataStream ssKey{};
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
SeekImpl(ssKey);
}
void Next();
template<typename K> bool GetKey(K& key) {
try {
SpanReader ssKey{GetKeyImpl()};
ssKey >> key;
} catch (const std::exception&) {
return false;
}
return true;
}
template<typename V> bool GetValue(V& value) {
try {
DataStream ssValue{GetValueImpl()};
dbwrapper_private::GetObfuscation(parent)(ssValue);
ssValue >> value;
} catch (const std::exception&) {
return false;
}
return true;
}
};
struct LevelDBContext;
class CDBWrapper
{
friend const Obfuscation& dbwrapper_private::GetObfuscation(const CDBWrapper&);
private:
//! holds all leveldb-specific fields of this class
std::unique_ptr<LevelDBContext> m_db_context;
//! the name of this database
std::string m_name;
//! optional XOR-obfuscation of the database
Obfuscation m_obfuscation;
//! obfuscation key storage key, null-prefixed to avoid collisions
inline static const std::string OBFUSCATION_KEY{"\000obfuscate_key", 14}; // explicit size to avoid truncation at leading \0
std::optional<std::string> ReadImpl(std::span<const std::byte> key) const;
bool ExistsImpl(std::span<const std::byte> key) const;
size_t EstimateSizeImpl(std::span<const std::byte> key1, std::span<const std::byte> key2) const;
auto& DBContext() const LIFETIMEBOUND { return *Assert(m_db_context); }
public:
CDBWrapper(const DBParams& params);
~CDBWrapper();
CDBWrapper(const CDBWrapper&) = delete;
CDBWrapper& operator=(const CDBWrapper&) = delete;
template <typename K, typename V>
bool Read(const K& key, V& value) const
{
DataStream ssKey{};
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
std::optional<std::string> strValue{ReadImpl(ssKey)};
if (!strValue) {
return false;
}
try {
std::span ssValue{MakeWritableByteSpan(*strValue)};
m_obfuscation(ssValue);
SpanReader{ssValue} >> value;
} catch (const std::exception&) {
return false;
}
return true;
}
template <typename K, typename V>
void Write(const K& key, const V& value, bool fSync = false)
{
CDBBatch batch(*this);
batch.Write(key, value);
WriteBatch(batch, fSync);
}
template <typename K>
bool Exists(const K& key) const
{
DataStream ssKey{};
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey << key;
return ExistsImpl(ssKey);
}
template <typename K>
void Erase(const K& key, bool fSync = false)
{
CDBBatch batch(*this);
batch.Erase(key);
WriteBatch(batch, fSync);
}
void WriteBatch(CDBBatch& batch, bool fSync = false);
// Get an estimate of LevelDB memory usage (in bytes).
size_t DynamicMemoryUsage() const;
CDBIterator* NewIterator();
/**
* Return true if the database managed by this class contains no entries.
*/
bool IsEmpty();
template<typename K>
size_t EstimateSize(const K& key_begin, const K& key_end) const
{
DataStream ssKey1{}, ssKey2{};
ssKey1.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey2.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
ssKey1 << key_begin;
ssKey2 << key_end;
return EstimateSizeImpl(ssKey1, ssKey2);
}
};
#endif // BITCOIN_DBWRAPPER_H