mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-05-13 15:33:51 +02:00
5de2f97a05dbwrapper: use `SpanReader` for iterator keys (Lőrinc)f0e498af5ctest: cover failed `CDBIterator::GetKey()` deserialization (Lőrinc) Pull request description: ### Problem `CDBIterator::GetKey()` only deserializes the current LevelDB key once and `GetKeyImpl()` already exposes that key as a contiguous borrowed byte span, and `GetKey()` creates a fresh local reader and only performs immediate forward reads before returning. The copied `DataStream` currently insulates the iterator entry from a failed decode, so switching to a borrowed reader is only safe if a deserialization failure still returns false and leaves the same key/value readable afterward. > [!NOTE] > The same simplification does not apply to `GetValue()`, because that path deobfuscates the value bytes in place first and still needs an owning mutable buffer. ### Fix Add a preparatory test with an invalid reads and checks that the failed decode [does not consume](eb85cacd29/src/leveldb/include/leveldb/iterator.h (L60-L62)) the current iterator entry. Then switch `GetKey()` to `SpanReader` so the key bytes are read in place instead of being copied into a temporary `DataStream`. This keeps the same exception swallowing and `bool` return semantics while avoiding the extra allocation and copy. ### Context Related to https://github.com/bitcoin/bitcoin/pull/34483 and https://github.com/bitcoin/bitcoin/pull/35025 ### Reproducer `gettxoutsetinfo` is ~10-12% faster for up-to-date blocks (run on SSD), see: <details><summary>2026-04-20 | gettxoutsetinfo | rpi5-8 | aarch64 | Cortex-A76 | 4 cores | 7.7Gi RAM | ext4 | SSD</summary> ``` COMMITS="64a88c8c1edc7ee5cef623d9aa8179a239e27ce9 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf"; \ BASE_DIR="/mnt/my_storage"; DATA_DIR="$BASE_DIR/BitcoinData"; LOG_DIR="$BASE_DIR/logs"; \ mkdir -p "$LOG_DIR" && \ (echo ""; for c in $COMMITS; do git cat-file -e "$c^{commit}" 2>/dev/null || git fetch -q origin "$c" || exit 1; git log -1 --pretty='%h %s' "$c" || exit 1; done) && \ (echo "" && echo "$(date -I) | gettxoutsetinfo | $(hostname) | $(uname -m) | $(lscpu | grep 'Model name' | head -1 | cut -d: -f2 | xargs) | $(nproc) cores | $(free -h | awk '/^Mem:/{print $2}') RAM | $(df -T $BASE_DIR | awk 'NR==2{print $2}') | $(lsblk -no ROTA $(df --output=source $BASE_DIR | tail -1) | grep -q 1 && echo HDD || echo SSD)"; echo "") && \ hyperfine \ --sort command \ --runs 10 \ --export-json "$BASE_DIR/gettxoutsetinfo-$(sed -E 's/([a-f0-9]{8})[a-f0-9]* ?/\1-/g;s/-$//'<<<"$COMMITS")-$(date +%s).json" \ --parameter-list COMMIT ${COMMITS// /,} \ --prepare "killall -9 bitcoind 2>/dev/null || true; rm -f $DATA_DIR/debug.log; git clean -fxd && git reset --hard {COMMIT} && \ cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Release && ninja -C build bitcoind bitcoin-cli -j$(nproc) && \ ./build/bin/bitcoind -datadir=$DATA_DIR -connect=0 -listen=0 -dnsseed=0 -coinstatsindex=0 -txindex=0 -blockfilterindex=0 -daemon -printtoconsole=0; \ ./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcwait getblockcount >/dev/null" \ --conclude "./build/bin/bitcoin-cli -datadir=$DATA_DIR stop 2>/dev/null || true; killall bitcoind 2>/dev/null || true; sleep 10; \ grep -q 'Done loading' $DATA_DIR/debug.log && grep 'Bitcoin Core version' $DATA_DIR/debug.log | grep -q \"\$(git rev-parse --short=12 {COMMIT})\"; \ cp $DATA_DIR/debug.log $LOG_DIR/gettxoutsetinfo-{COMMIT}-$(date +%s).log" \ "./build/bin/bitcoin-cli -datadir=$DATA_DIR -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null"64a88c8c1eMerge bitcoin/bitcoin#35096: kernel: align height parameters to int32_t in btck API 57dc0202dd dbwrapper: use SpanReader for iterator keys Benchmark 1: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT =64a88c8c1e) Time (mean ± σ): 109.002 s ± 3.091 s [User: 0.003 s, System: 0.004 s] Range (min … max): 106.191 s … 113.608 s 10 runs Benchmark 2: ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf) Time (mean ± σ): 97.711 s ± 1.172 s [User: 0.003 s, System: 0.004 s] Range (min … max): 96.651 s … 100.104 s 10 runs Relative speed comparison 1.12 ± 0.03 ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT =64a88c8c1e) 1.00 ./build/bin/bitcoin-cli -datadir=/mnt/my_storage/BitcoinData -rpcclienttimeout=0 -named gettxoutsetinfo hash_type='none' use_index='false' >/dev/null (COMMIT = 57dc0202ddb7b4cbdd521fb237a25fc4d7f28ddf) ``` </details> ACKs for top commit: achow101: ACK5de2f97a05sedited: ACK5de2f97a05andrewtoth: ACK5de2f97a05optout21: ACK5de2f97a05theStack: ACK5de2f97a05Tree-SHA512: 33b62149625b3ce2a378be9b4dffa361f11e324a2768e460c549b9b704efa78bf96ef5e24487d0cec82c18dafff6ba4571c06ad545684cf8738f38b9d21e9b0c
277 lines
7.3 KiB
C++
277 lines
7.3 KiB
C++
// Copyright (c) 2012-present The Bitcoin Core developers
|
|
// Distributed under the MIT software license, see the accompanying
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
#ifndef BITCOIN_DBWRAPPER_H
|
|
#define BITCOIN_DBWRAPPER_H
|
|
|
|
#include <attributes.h>
|
|
#include <serialize.h>
|
|
#include <span.h>
|
|
#include <streams.h>
|
|
#include <util/byte_units.h>
|
|
#include <util/check.h>
|
|
#include <util/fs.h>
|
|
|
|
#include <cstddef>
|
|
#include <exception>
|
|
#include <memory>
|
|
#include <optional>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
|
|
static const size_t DBWRAPPER_PREALLOC_KEY_SIZE = 64;
|
|
static const size_t DBWRAPPER_PREALLOC_VALUE_SIZE = 1024;
|
|
static const size_t DBWRAPPER_MAX_FILE_SIZE{32_MiB};
|
|
|
|
//! User-controlled performance and debug options.
|
|
struct DBOptions {
|
|
//! Compact database on startup.
|
|
bool force_compact = false;
|
|
};
|
|
|
|
//! Application-specific storage settings.
|
|
struct DBParams {
|
|
//! Location in the filesystem where leveldb data will be stored.
|
|
fs::path path;
|
|
//! Configures various leveldb cache settings.
|
|
size_t cache_bytes;
|
|
//! If true, use leveldb's memory environment.
|
|
bool memory_only = false;
|
|
//! If true, remove all existing data.
|
|
bool wipe_data = false;
|
|
//! If true, store data obfuscated via simple XOR. If false, XOR with a
|
|
//! zero'd byte array.
|
|
bool obfuscate = false;
|
|
//! Passed-through options.
|
|
DBOptions options{};
|
|
};
|
|
|
|
class dbwrapper_error : public std::runtime_error
|
|
{
|
|
public:
|
|
explicit dbwrapper_error(const std::string& msg) : std::runtime_error(msg) {}
|
|
};
|
|
|
|
class CDBWrapper;
|
|
|
|
/** These should be considered an implementation detail of the specific database.
|
|
*/
|
|
namespace dbwrapper_private {
|
|
|
|
/** Work around circular dependency, as well as for testing in dbwrapper_tests.
|
|
* Database obfuscation should be considered an implementation detail of the
|
|
* specific database.
|
|
*/
|
|
const Obfuscation& GetObfuscation(const CDBWrapper&);
|
|
}; // namespace dbwrapper_private
|
|
|
|
bool DestroyDB(const std::string& path_str);
|
|
|
|
/** Batch of changes queued to be written to a CDBWrapper */
|
|
class CDBBatch
|
|
{
|
|
friend class CDBWrapper;
|
|
|
|
private:
|
|
const CDBWrapper &parent;
|
|
|
|
struct WriteBatchImpl;
|
|
const std::unique_ptr<WriteBatchImpl> m_impl_batch;
|
|
|
|
DataStream ssKey{};
|
|
DataStream ssValue{};
|
|
|
|
void WriteImpl(std::span<const std::byte> key, DataStream& ssValue);
|
|
void EraseImpl(std::span<const std::byte> key);
|
|
|
|
public:
|
|
/**
|
|
* @param[in] _parent CDBWrapper that this batch is to be submitted to
|
|
*/
|
|
explicit CDBBatch(const CDBWrapper& _parent);
|
|
~CDBBatch();
|
|
void Clear();
|
|
|
|
template <typename K, typename V>
|
|
void Write(const K& key, const V& value)
|
|
{
|
|
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssValue.reserve(DBWRAPPER_PREALLOC_VALUE_SIZE);
|
|
ssKey << key;
|
|
ssValue << value;
|
|
WriteImpl(ssKey, ssValue);
|
|
ssKey.clear();
|
|
ssValue.clear();
|
|
}
|
|
|
|
template <typename K>
|
|
void Erase(const K& key)
|
|
{
|
|
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey << key;
|
|
EraseImpl(ssKey);
|
|
ssKey.clear();
|
|
}
|
|
|
|
size_t ApproximateSize() const;
|
|
};
|
|
|
|
class CDBIterator
|
|
{
|
|
public:
|
|
struct IteratorImpl;
|
|
|
|
private:
|
|
const CDBWrapper &parent;
|
|
const std::unique_ptr<IteratorImpl> m_impl_iter;
|
|
|
|
void SeekImpl(std::span<const std::byte> key);
|
|
std::span<const std::byte> GetKeyImpl() const;
|
|
std::span<const std::byte> GetValueImpl() const;
|
|
|
|
public:
|
|
|
|
/**
|
|
* @param[in] _parent Parent CDBWrapper instance.
|
|
* @param[in] _piter The original leveldb iterator.
|
|
*/
|
|
CDBIterator(const CDBWrapper& _parent, std::unique_ptr<IteratorImpl> _piter);
|
|
~CDBIterator();
|
|
|
|
bool Valid() const;
|
|
|
|
void SeekToFirst();
|
|
|
|
template<typename K> void Seek(const K& key) {
|
|
DataStream ssKey{};
|
|
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey << key;
|
|
SeekImpl(ssKey);
|
|
}
|
|
|
|
void Next();
|
|
|
|
template<typename K> bool GetKey(K& key) {
|
|
try {
|
|
SpanReader ssKey{GetKeyImpl()};
|
|
ssKey >> key;
|
|
} catch (const std::exception&) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template<typename V> bool GetValue(V& value) {
|
|
try {
|
|
DataStream ssValue{GetValueImpl()};
|
|
dbwrapper_private::GetObfuscation(parent)(ssValue);
|
|
ssValue >> value;
|
|
} catch (const std::exception&) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
struct LevelDBContext;
|
|
|
|
class CDBWrapper
|
|
{
|
|
friend const Obfuscation& dbwrapper_private::GetObfuscation(const CDBWrapper&);
|
|
private:
|
|
//! holds all leveldb-specific fields of this class
|
|
std::unique_ptr<LevelDBContext> m_db_context;
|
|
|
|
//! the name of this database
|
|
std::string m_name;
|
|
|
|
//! optional XOR-obfuscation of the database
|
|
Obfuscation m_obfuscation;
|
|
|
|
//! obfuscation key storage key, null-prefixed to avoid collisions
|
|
inline static const std::string OBFUSCATION_KEY{"\000obfuscate_key", 14}; // explicit size to avoid truncation at leading \0
|
|
|
|
std::optional<std::string> ReadImpl(std::span<const std::byte> key) const;
|
|
bool ExistsImpl(std::span<const std::byte> key) const;
|
|
size_t EstimateSizeImpl(std::span<const std::byte> key1, std::span<const std::byte> key2) const;
|
|
auto& DBContext() const LIFETIMEBOUND { return *Assert(m_db_context); }
|
|
|
|
public:
|
|
CDBWrapper(const DBParams& params);
|
|
~CDBWrapper();
|
|
|
|
CDBWrapper(const CDBWrapper&) = delete;
|
|
CDBWrapper& operator=(const CDBWrapper&) = delete;
|
|
|
|
template <typename K, typename V>
|
|
bool Read(const K& key, V& value) const
|
|
{
|
|
DataStream ssKey{};
|
|
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey << key;
|
|
std::optional<std::string> strValue{ReadImpl(ssKey)};
|
|
if (!strValue) {
|
|
return false;
|
|
}
|
|
try {
|
|
std::span ssValue{MakeWritableByteSpan(*strValue)};
|
|
m_obfuscation(ssValue);
|
|
SpanReader{ssValue} >> value;
|
|
} catch (const std::exception&) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename K, typename V>
|
|
void Write(const K& key, const V& value, bool fSync = false)
|
|
{
|
|
CDBBatch batch(*this);
|
|
batch.Write(key, value);
|
|
WriteBatch(batch, fSync);
|
|
}
|
|
|
|
template <typename K>
|
|
bool Exists(const K& key) const
|
|
{
|
|
DataStream ssKey{};
|
|
ssKey.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey << key;
|
|
return ExistsImpl(ssKey);
|
|
}
|
|
|
|
template <typename K>
|
|
void Erase(const K& key, bool fSync = false)
|
|
{
|
|
CDBBatch batch(*this);
|
|
batch.Erase(key);
|
|
WriteBatch(batch, fSync);
|
|
}
|
|
|
|
void WriteBatch(CDBBatch& batch, bool fSync = false);
|
|
|
|
// Get an estimate of LevelDB memory usage (in bytes).
|
|
size_t DynamicMemoryUsage() const;
|
|
|
|
CDBIterator* NewIterator();
|
|
|
|
/**
|
|
* Return true if the database managed by this class contains no entries.
|
|
*/
|
|
bool IsEmpty();
|
|
|
|
template<typename K>
|
|
size_t EstimateSize(const K& key_begin, const K& key_end) const
|
|
{
|
|
DataStream ssKey1{}, ssKey2{};
|
|
ssKey1.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey2.reserve(DBWRAPPER_PREALLOC_KEY_SIZE);
|
|
ssKey1 << key_begin;
|
|
ssKey2 << key_end;
|
|
return EstimateSizeImpl(ssKey1, ssKey2);
|
|
}
|
|
};
|
|
|
|
#endif // BITCOIN_DBWRAPPER_H
|