optimization: Bulk serialization reads in UndoRead and ReadBlock

The Obfuscation (XOR) operations are currently done byte-by-byte during serialization, buffering the reads will enable batching the obfuscation operations later (not yet done here).

Also, different operating systems seem to handle file caching differently, so reading bigger batches (and processing those from memory) is also a bit faster (likely because of fewer native fread calls or less locking).

Since `ReadBlock[Undo]` is called with the file position being set after the [undo]block size, we have to start by backtracking 4 bytes to be able to read the expected size first.
As a consequence, the `FlatFilePos pos` parameter in `ReadBlock` is copied now.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

Before:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        2,242,815.02 |              445.87 |    0.4% |     11.03 | `ReadBlockBench`

After:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        1,717,451.57 |              582.26 |    0.1% |     11.01 | `ReadBlockBench`

> C++ compiler .......................... GNU 13.3.0

Before:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        7,786,309.20 |              128.43 |    0.0% |   70,832,812.80 |   23,803,523.16 |  2.976 |   5,073,002.56 |    0.4% |     10.72 | `ReadBlockBench`

After:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        6,272,557.28 |              159.42 |    0.0% |   63,251,231.42 |   19,739,780.92 |  3.204 |   3,589,886.66 |    0.3% |     10.57 | `ReadBlockBench`

Co-authored-by: Cory Fields <cory-nospam-@coryfields.com>
Co-authored-by: Martin Leitner-Ankerl <martin.ankerl@gmail.com>
Co-authored-by: Ryan Ofsky <ryan@ofsky.org>
This commit is contained in:
Lőrinc 2025-03-14 20:11:57 +01:00
parent 79b65ad6db
commit b3ab94b12c
4 changed files with 80 additions and 5 deletions

View File

@ -657,7 +657,13 @@ CBlockFileInfo* BlockManager::GetBlockFileInfo(size_t n)
bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const
{
const FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())};
FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())};
if (pos.nPos < HEADER_BYTE_SIZE) {
LogError("%s: OpenUndoFile failed for %s while reading", __func__, pos.ToString());
return false;
}
uint32_t undo_size;
pos.nPos -= sizeof(undo_size);
// Open history file to read
AutoFile filein{OpenUndoFile(pos, true)};
@ -668,7 +674,14 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index
try {
// Read block
HashVerifier verifier{filein}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243
filein >> undo_size;
if (undo_size > MAX_SIZE) {
LogError("Refusing to read undo data of size: %d", undo_size);
return false;
}
BufferedFileR buff(filein, undo_size);
HashVerifier verifier{buff}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243
verifier << index.pprev->GetBlockHash();
verifier >> blockundo;
@ -982,10 +995,17 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt
return true;
}
bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const
bool BlockManager::ReadBlock(CBlock& block, FlatFilePos pos) const
{
block.SetNull();
if (pos.nPos < HEADER_BYTE_SIZE) {
LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString());
return false;
}
uint32_t blk_size;
pos.nPos -= sizeof(blk_size);
// Open history file to read
AutoFile filein{OpenBlockFile(pos, true)};
if (filein.IsNull()) {
@ -995,7 +1015,13 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const
try {
// Read block
filein >> TX_WITH_WITNESS(block);
filein >> blk_size;
if (blk_size > MAX_SIZE) {
LogError("Refusing to read block of size: %d", blk_size);
return false;
}
BufferedFileR(filein, blk_size) >> TX_WITH_WITNESS(block);
} catch (const std::exception& e) {
LogError("%s: Deserialize or I/O error - %s at %s", __func__, e.what(), pos.ToString());
return false;

View File

@ -411,7 +411,7 @@ public:
void UnlinkPrunedFiles(const std::set<int>& setFilesToPrune) const;
/** Functions for disk access for blocks */
bool ReadBlock(CBlock& block, const FlatFilePos& pos) const;
bool ReadBlock(CBlock& block, FlatFilePos pos) const;
bool ReadBlock(CBlock& block, const CBlockIndex& index) const;
bool ReadRawBlock(std::vector<uint8_t>& block, const FlatFilePos& pos) const;

View File

@ -23,6 +23,7 @@
#include <string>
#include <utility>
#include <vector>
#include <util/check.h>
namespace util {
inline void Xor(Span<std::byte> write, Span<const std::byte> key, size_t key_offset = 0)
@ -467,6 +468,28 @@ public:
}
};
class BufferedFileR
{
DataStream m_buf;
public:
explicit BufferedFileR(AutoFile& file, const uint32_t buffer_size)
{
m_buf.resize(buffer_size);
file.read(m_buf);
Assert(m_buf.size() == buffer_size);
}
void read(Span<std::byte> dst) { m_buf.read(dst); }
template <typename T>
BufferedFileR& operator>>(T&& obj)
{
Unserialize(m_buf, obj);
return *this;
}
};
/** Wrapper around an AutoFile& that implements a ring buffer to
* deserialize from. It guarantees the ability to rewind a given number of bytes.
*

View File

@ -567,4 +567,30 @@ BOOST_AUTO_TEST_CASE(streams_hashed)
BOOST_CHECK_EQUAL(hash_writer.GetHash(), hash_verifier.GetHash());
}
BOOST_AUTO_TEST_CASE(streams_datastream_write_large)
{
const uint32_t v1{m_rng.rand32()}, v2{m_rng.rand32()}, v3{m_rng.rand32()};
const fs::path tmp_path{m_args.GetDataDirBase() / "test_datastream_write_large.bin"};
// Write out the values to file
{
AutoFile file{fsbridge::fopen(tmp_path, "w+b")};
file << v1 << v2;
file.write(AsBytes(Span{&v3, 1}));
}
// Read back and verify using BufferedFileR
{
AutoFile file{fsbridge::fopen(tmp_path, "rb")};
uint32_t _v1{0}, _v2{0}, _v3{0};
BufferedFileR f(file, sizeof(v1) + sizeof(v2) + sizeof(v3));
f >> _v1 >> _v2;
f.read(AsWritableBytes(Span{&_v3, 1}));
BOOST_CHECK_EQUAL(_v1, v1);
BOOST_CHECK_EQUAL(_v2, v2);
BOOST_CHECK_EQUAL(_v3, v3);
}
fs::remove(tmp_path);
}
BOOST_AUTO_TEST_SUITE_END()