From d41a4c0157c3a4922fefd408aad1cd284de5c8a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 22:08:25 +0100 Subject: [PATCH 1/6] refactor: rename leftover WriteBlockBench The benchmark was referencing the old name of the method --- src/bench/readwriteblock.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bench/readwriteblock.cpp b/src/bench/readwriteblock.cpp index cdf86185ae9..10434b15d20 100644 --- a/src/bench/readwriteblock.cpp +++ b/src/bench/readwriteblock.cpp @@ -27,7 +27,7 @@ static CBlock CreateTestBlock() return block; } -static void SaveBlockBench(benchmark::Bench& bench) +static void WriteBlockBench(benchmark::Bench& bench) { const auto testing_setup{MakeNoLogFileContext(ChainType::MAIN)}; auto& blockman{testing_setup->m_node.chainman->m_blockman}; @@ -63,6 +63,6 @@ static void ReadRawBlockBench(benchmark::Bench& bench) }); } -BENCHMARK(SaveBlockBench, benchmark::PriorityLevel::HIGH); +BENCHMARK(WriteBlockBench, benchmark::PriorityLevel::HIGH); BENCHMARK(ReadBlockBench, benchmark::PriorityLevel::HIGH); BENCHMARK(ReadRawBlockBench, benchmark::PriorityLevel::HIGH); From 4ab35007abda60a68076bfa9d47e3930f2744799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 20:11:52 +0100 Subject: [PATCH 2/6] refactor: collect block read operations into try block In preparation for upcoming changes, `HashVerifier` was included in the try/catch to include the `undo_size` serialization there as well since the try is about `Deserialize` errors. This is why the final checksum verification was also included in the try. --- src/node/blockstorage.cpp | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index fe44aa8a3f2..2e49443b569 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -666,24 +666,26 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index return false; } - // Read block - uint256 hashChecksum; - HashVerifier verifier{filein}; // Use HashVerifier as reserializing may lose data, c.f. commit d342424301013ec47dc146a4beb49d5c9319d80a try { + // Read block + HashVerifier verifier{filein}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243 + verifier << index.pprev->GetBlockHash(); verifier >> blockundo; + + uint256 hashChecksum; filein >> hashChecksum; + + // Verify checksum + if (hashChecksum != verifier.GetHash()) { + LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); + return false; + } } catch (const std::exception& e) { LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); return false; } - // Verify checksum - if (hashChecksum != verifier.GetHash()) { - LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); - return false; - } - return true; } @@ -945,15 +947,14 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Write index header fileout << GetParams().MessageStart() << blockundo_size; - // Write undo data pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; - fileout << blockundo; - - // Calculate & write checksum - HashWriter hasher{}; - hasher << block.pprev->GetBlockHash(); - hasher << blockundo; - fileout << hasher.GetHash(); + { + // Calculate checksum + HashWriter hasher{}; + hasher << block.pprev->GetBlockHash() << blockundo; + // Write undo data & checksum + fileout << blockundo << hasher.GetHash(); + } // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) // we want to flush the rev (undo) file once we've written the last block, which is indicated by the last height @@ -992,8 +993,8 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const return false; } - // Read block try { + // Read block filein >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); @@ -1091,8 +1092,8 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) // Write index header fileout << GetParams().MessageStart() << block_size; - // Write block pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + // Write block fileout << TX_WITH_WITNESS(block); return pos; } From a07e4c2abf3e010a5e1390a14cd124596dcf31bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 22:09:38 +0100 Subject: [PATCH 3/6] refactor: shorten BLOCK_SERIALIZATION_HEADER_SIZE We will be using it inline, the name should be shorter --- src/node/blockstorage.cpp | 6 +++--- src/node/blockstorage.h | 6 +++--- src/test/blockmanager_tests.cpp | 12 ++++++------ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 2e49443b569..5fe29dcc1df 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -947,7 +947,7 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt // Write index header fileout << GetParams().MessageStart() << blockundo_size; - pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + pos.nPos += HEADER_BYTE_SIZE; { // Calculate checksum HashWriter hasher{}; @@ -1078,7 +1078,7 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) { const unsigned int block_size{static_cast(GetSerializeSize(TX_WITH_WITNESS(block)))}; - FlatFilePos pos{FindNextBlockPos(block_size + BLOCK_SERIALIZATION_HEADER_SIZE, nHeight, block.GetBlockTime())}; + FlatFilePos pos{FindNextBlockPos(block_size + HEADER_BYTE_SIZE, nHeight, block.GetBlockTime())}; if (pos.IsNull()) { LogError("FindNextBlockPos failed"); return FlatFilePos(); @@ -1092,7 +1092,7 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) // Write index header fileout << GetParams().MessageStart() << block_size; - pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE; + pos.nPos += HEADER_BYTE_SIZE; // Write block fileout << TX_WITH_WITNESS(block); return pos; diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 665c2ccd834..7d68e5145cb 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -75,10 +75,10 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB /** Size of header written by WriteBlock before a serialized CBlock (8 bytes) */ -static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE{std::tuple_size_v + sizeof(unsigned int)}; +static constexpr size_t HEADER_BYTE_SIZE{std::tuple_size_v + sizeof(unsigned int)}; /** Total overhead when writing undo data: header (8 bytes) plus checksum (32 bytes) */ -static constexpr size_t UNDO_DATA_DISK_OVERHEAD{BLOCK_SERIALIZATION_HEADER_SIZE + uint256::size()}; +static constexpr size_t UNDO_DATA_DISK_OVERHEAD{HEADER_BYTE_SIZE + uint256::size()}; // Because validation code takes pointers to the map's CBlockIndex objects, if // we ever switch to another associative container, we need to either use a @@ -164,7 +164,7 @@ private: * blockfile info, and checks if there is enough disk space to save the block. * * The nAddSize argument passed to this function should include not just the size of the serialized CBlock, but also the size of - * separator fields (BLOCK_SERIALIZATION_HEADER_SIZE). + * separator fields (HEADER_BYTE_SIZE). */ [[nodiscard]] FlatFilePos FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime); [[nodiscard]] bool FlushChainstateBlockFile(int tip_height); diff --git a/src/test/blockmanager_tests.cpp b/src/test/blockmanager_tests.cpp index 8f8cce687f5..7ec5c4073aa 100644 --- a/src/test/blockmanager_tests.cpp +++ b/src/test/blockmanager_tests.cpp @@ -17,7 +17,7 @@ #include #include -using node::BLOCK_SERIALIZATION_HEADER_SIZE; +using node::HEADER_BYTE_SIZE; using node::BlockManager; using node::KernelNotifications; using node::MAX_BLOCKFILE_SIZE; @@ -40,12 +40,12 @@ BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos) }; BlockManager blockman{*Assert(m_node.shutdown_signal), blockman_opts}; // simulate adding a genesis block normally - BOOST_CHECK_EQUAL(blockman.WriteBlock(params->GenesisBlock(), 0).nPos, BLOCK_SERIALIZATION_HEADER_SIZE); + BOOST_CHECK_EQUAL(blockman.WriteBlock(params->GenesisBlock(), 0).nPos, HEADER_BYTE_SIZE); // simulate what happens during reindex // simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file // the block is found at offset 8 because there is an 8 byte serialization header // consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file. - const FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE}; + const FlatFilePos pos{0, HEADER_BYTE_SIZE}; blockman.UpdateBlockInfo(params->GenesisBlock(), 0, pos); // now simulate what happens after reindex for the first new block processed // the actual block contents don't matter, just that it's a block. @@ -54,7 +54,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos) // 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293 // add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301 FlatFilePos actual{blockman.WriteBlock(params->GenesisBlock(), 1)}; - BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(TX_WITH_WITNESS(params->GenesisBlock())) + BLOCK_SERIALIZATION_HEADER_SIZE); + BOOST_CHECK_EQUAL(actual.nPos, HEADER_BYTE_SIZE + ::GetSerializeSize(TX_WITH_WITNESS(params->GenesisBlock())) + HEADER_BYTE_SIZE); } BOOST_FIXTURE_TEST_CASE(blockmanager_scan_unlink_already_pruned_files, TestChain100Setup) @@ -172,7 +172,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file) FlatFilePos pos2{blockman.WriteBlock(block2, /*nHeight=*/2)}; // Two blocks in the file - BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2); + BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + HEADER_BYTE_SIZE) * 2); // First two blocks are written as expected // Errors are expected because block data is junk, thrown AFTER successful read @@ -199,7 +199,7 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file) // Metadata is updated... BOOST_CHECK_EQUAL(block_data->nBlocks, 3); // ...but there are still only two blocks in the file - BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2); + BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + HEADER_BYTE_SIZE) * 2); // Block 2 was not overwritten: blockman.ReadBlock(read_block, pos2); From 79b65ad6db0348aa21228e651f65d5beff70a3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 24 Jan 2025 15:12:28 +0100 Subject: [PATCH 4/6] log: unify error messages for (read/write)[undo]block Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> --- src/node/blockstorage.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 5fe29dcc1df..f4d33c03722 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -662,7 +662,7 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index // Open history file to read AutoFile filein{OpenUndoFile(pos, true)}; if (filein.IsNull()) { - LogError("OpenUndoFile failed for %s", pos.ToString()); + LogError("OpenUndoFile failed for %s while reading", pos.ToString()); return false; } @@ -678,11 +678,11 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index // Verify checksum if (hashChecksum != verifier.GetHash()) { - LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString()); + LogError("%s: Checksum mismatch at %s", __func__, pos.ToString()); return false; } } catch (const std::exception& e) { - LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); + LogError("%s: Deserialize or I/O error - %s at %s", __func__, e.what(), pos.ToString()); return false; } @@ -935,13 +935,13 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt FlatFilePos pos; const unsigned int blockundo_size{static_cast(GetSerializeSize(blockundo))}; if (!FindUndoPos(state, block.nFile, pos, blockundo_size + UNDO_DATA_DISK_OVERHEAD)) { - LogError("FindUndoPos failed"); + LogError("FindUndoPos failed for %s while writing", pos.ToString()); return false; } // Open history file to append AutoFile fileout{OpenUndoFile(pos)}; if (fileout.IsNull()) { - LogError("OpenUndoFile failed"); + LogError("OpenUndoFile failed for %s while writing", pos.ToString()); return FatalError(m_opts.notifications, state, _("Failed to write undo data.")); } @@ -989,7 +989,7 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const // Open history file to read AutoFile filein{OpenBlockFile(pos, true)}; if (filein.IsNull()) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString()); return false; } @@ -997,19 +997,19 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const // Read block filein >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { - LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString()); + LogError("%s: Deserialize or I/O error - %s at %s", __func__, e.what(), pos.ToString()); return false; } // Check the header if (!CheckProofOfWork(block.GetHash(), block.nBits, GetConsensus())) { - LogError("%s: Errors in block header at %s\n", __func__, pos.ToString()); + LogError("%s: Errors in block header at %s", __func__, pos.ToString()); return false; } // Signet only: check block solution if (GetConsensus().signet_blocks && !CheckSignetBlockSolution(block, GetConsensus())) { - LogError("%s: Errors in block solution at %s\n", __func__, pos.ToString()); + LogError("%s: Errors in block solution at %s", __func__, pos.ToString()); return false; } @@ -1024,7 +1024,7 @@ bool BlockManager::ReadBlock(CBlock& block, const CBlockIndex& index) const return false; } if (block.GetHash() != index.GetBlockHash()) { - LogError("%s: GetHash() doesn't match index for %s at %s\n", __func__, index.ToString(), block_pos.ToString()); + LogError("%s: GetHash() doesn't match index for %s at %s", __func__, index.ToString(), block_pos.ToString()); return false; } return true; @@ -1036,13 +1036,13 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& // If nPos is less than 8 the pos is null and we don't have the block data // Return early to prevent undefined behavior of unsigned int underflow if (hpos.nPos < 8) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString()); return false; } hpos.nPos -= 8; // Seek back 8 bytes for meta header AutoFile filein{OpenBlockFile(hpos, true)}; if (filein.IsNull()) { - LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString()); + LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString()); return false; } @@ -1053,14 +1053,14 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& filein >> blk_start >> blk_size; if (blk_start != GetParams().MessageStart()) { - LogError("%s: Block magic mismatch for %s: %s versus expected %s\n", __func__, pos.ToString(), + LogError("%s: Block magic mismatch for %s: %s versus expected %s", __func__, pos.ToString(), HexStr(blk_start), HexStr(GetParams().MessageStart())); return false; } if (blk_size > MAX_SIZE) { - LogError("%s: Block data is larger than maximum deserialization size for %s: %s versus %s\n", __func__, pos.ToString(), + LogError("%s: Block data is larger than maximum deserialization size for %s: %s versus %s", __func__, pos.ToString(), blk_size, MAX_SIZE); return false; } @@ -1068,7 +1068,7 @@ bool BlockManager::ReadRawBlock(std::vector& block, const FlatFilePos& block.resize(blk_size); // Zeroing of memory is intentional here filein.read(MakeWritableByteSpan(block)); } catch (const std::exception& e) { - LogError("%s: Read from block file failed: %s for %s\n", __func__, e.what(), pos.ToString()); + LogError("%s: Read from block file failed: %s for %s", __func__, e.what(), pos.ToString()); return false; } @@ -1080,12 +1080,12 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) const unsigned int block_size{static_cast(GetSerializeSize(TX_WITH_WITNESS(block)))}; FlatFilePos pos{FindNextBlockPos(block_size + HEADER_BYTE_SIZE, nHeight, block.GetBlockTime())}; if (pos.IsNull()) { - LogError("FindNextBlockPos failed"); + LogError("FindNextBlockPos failed for %s while writing", pos.ToString()); return FlatFilePos(); } AutoFile fileout{OpenBlockFile(pos)}; if (fileout.IsNull()) { - LogError("OpenBlockFile failed"); + LogError("OpenBlockFile failed for %s while writing", pos.ToString()); m_opts.notifications.fatalError(_("Failed to write block.")); return FlatFilePos(); } From b3ab94b12c3665b3db3f80436287e6ebd04ec3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Mar 2025 20:11:57 +0100 Subject: [PATCH 5/6] optimization: Bulk serialization reads in `UndoRead` and `ReadBlock` The Obfuscation (XOR) operations are currently done byte-by-byte during serialization, buffering the reads will enable batching the obfuscation operations later (not yet done here). Also, different operating systems seem to handle file caching differently, so reading bigger batches (and processing those from memory) is also a bit faster (likely because of fewer native fread calls or less locking). Since `ReadBlock[Undo]` is called with the file position being set after the [undo]block size, we have to start by backtracking 4 bytes to be able to read the expected size first. As a consequence, the `FlatFilePos pos` parameter in `ReadBlock` is copied now. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='ReadBlockBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 2,242,815.02 | 445.87 | 0.4% | 11.03 | `ReadBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,717,451.57 | 582.26 | 0.1% | 11.01 | `ReadBlockBench` > C++ compiler .......................... GNU 13.3.0 Before: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 7,786,309.20 | 128.43 | 0.0% | 70,832,812.80 | 23,803,523.16 | 2.976 | 5,073,002.56 | 0.4% | 10.72 | `ReadBlockBench` After: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 6,272,557.28 | 159.42 | 0.0% | 63,251,231.42 | 19,739,780.92 | 3.204 | 3,589,886.66 | 0.3% | 10.57 | `ReadBlockBench` Co-authored-by: Cory Fields Co-authored-by: Martin Leitner-Ankerl Co-authored-by: Ryan Ofsky --- src/node/blockstorage.cpp | 34 ++++++++++++++++++++++++++++++---- src/node/blockstorage.h | 2 +- src/streams.h | 23 +++++++++++++++++++++++ src/test/streams_tests.cpp | 26 ++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index f4d33c03722..414af2e4ac8 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -657,7 +657,13 @@ CBlockFileInfo* BlockManager::GetBlockFileInfo(size_t n) bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const { - const FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())}; + FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())}; + if (pos.nPos < HEADER_BYTE_SIZE) { + LogError("%s: OpenUndoFile failed for %s while reading", __func__, pos.ToString()); + return false; + } + uint32_t undo_size; + pos.nPos -= sizeof(undo_size); // Open history file to read AutoFile filein{OpenUndoFile(pos, true)}; @@ -668,7 +674,14 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index try { // Read block - HashVerifier verifier{filein}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243 + filein >> undo_size; + if (undo_size > MAX_SIZE) { + LogError("Refusing to read undo data of size: %d", undo_size); + return false; + } + + BufferedFileR buff(filein, undo_size); + HashVerifier verifier{buff}; // Use HashVerifier, as reserializing may lose data, c.f. commit d3424243 verifier << index.pprev->GetBlockHash(); verifier >> blockundo; @@ -982,10 +995,17 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt return true; } -bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const +bool BlockManager::ReadBlock(CBlock& block, FlatFilePos pos) const { block.SetNull(); + if (pos.nPos < HEADER_BYTE_SIZE) { + LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString()); + return false; + } + uint32_t blk_size; + pos.nPos -= sizeof(blk_size); + // Open history file to read AutoFile filein{OpenBlockFile(pos, true)}; if (filein.IsNull()) { @@ -995,7 +1015,13 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const try { // Read block - filein >> TX_WITH_WITNESS(block); + filein >> blk_size; + if (blk_size > MAX_SIZE) { + LogError("Refusing to read block of size: %d", blk_size); + return false; + } + + BufferedFileR(filein, blk_size) >> TX_WITH_WITNESS(block); } catch (const std::exception& e) { LogError("%s: Deserialize or I/O error - %s at %s", __func__, e.what(), pos.ToString()); return false; diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 7d68e5145cb..a519abc23c2 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -411,7 +411,7 @@ public: void UnlinkPrunedFiles(const std::set& setFilesToPrune) const; /** Functions for disk access for blocks */ - bool ReadBlock(CBlock& block, const FlatFilePos& pos) const; + bool ReadBlock(CBlock& block, FlatFilePos pos) const; bool ReadBlock(CBlock& block, const CBlockIndex& index) const; bool ReadRawBlock(std::vector& block, const FlatFilePos& pos) const; diff --git a/src/streams.h b/src/streams.h index e5316da7e70..6d84ea76f30 100644 --- a/src/streams.h +++ b/src/streams.h @@ -23,6 +23,7 @@ #include #include #include +#include namespace util { inline void Xor(Span write, Span key, size_t key_offset = 0) @@ -467,6 +468,28 @@ public: } }; +class BufferedFileR +{ + DataStream m_buf; + +public: + explicit BufferedFileR(AutoFile& file, const uint32_t buffer_size) + { + m_buf.resize(buffer_size); + file.read(m_buf); + Assert(m_buf.size() == buffer_size); + } + + void read(Span dst) { m_buf.read(dst); } + + template + BufferedFileR& operator>>(T&& obj) + { + Unserialize(m_buf, obj); + return *this; + } +}; + /** Wrapper around an AutoFile& that implements a ring buffer to * deserialize from. It guarantees the ability to rewind a given number of bytes. * diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 777122df6d0..7b5bea86060 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -567,4 +567,30 @@ BOOST_AUTO_TEST_CASE(streams_hashed) BOOST_CHECK_EQUAL(hash_writer.GetHash(), hash_verifier.GetHash()); } +BOOST_AUTO_TEST_CASE(streams_datastream_write_large) +{ + const uint32_t v1{m_rng.rand32()}, v2{m_rng.rand32()}, v3{m_rng.rand32()}; + const fs::path tmp_path{m_args.GetDataDirBase() / "test_datastream_write_large.bin"}; + + // Write out the values to file + { + AutoFile file{fsbridge::fopen(tmp_path, "w+b")}; + file << v1 << v2; + file.write(AsBytes(Span{&v3, 1})); + } + // Read back and verify using BufferedFileR + { + AutoFile file{fsbridge::fopen(tmp_path, "rb")}; + uint32_t _v1{0}, _v2{0}, _v3{0}; + BufferedFileR f(file, sizeof(v1) + sizeof(v2) + sizeof(v3)); + f >> _v1 >> _v2; + f.read(AsWritableBytes(Span{&_v3, 1})); + BOOST_CHECK_EQUAL(_v1, v1); + BOOST_CHECK_EQUAL(_v2, v2); + BOOST_CHECK_EQUAL(_v3, v3); + } + + fs::remove(tmp_path); +} + BOOST_AUTO_TEST_SUITE_END() From 5a1c2bd341c9586b090b6b40c20edb011cb616eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sun, 26 Jan 2025 19:44:03 +0100 Subject: [PATCH 6/6] optimization: Bulk serialization writes in `WriteBlockUndo` and `WriteBlock` Added `AutoFile::write_large` for batching obfuscation operations, so instead of copying the data and doing the xor in a 4096 byte array, we're doing it directly on the input. `DataStream` constructor was also added to enable presized serialization and writing in a single command. Similarly to the serialization reads, buffered writes will enable batched xor calculations - especially since currently we need to copy the write inputs Span to do the obfuscation on it, batching enables doing the xor on the internal buffer instead. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='WriteBlockBench' -min-time=10000 > C++ compiler .......................... AppleClang 16.0.0.16000026 Before: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 5,182,762.89 | 192.95 | 0.5% | 11.04 | `WriteBlockBench` After: | ns/op | op/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 1,791,444.08 | 558.21 | 0.9% | 11.08 | `WriteBlockBench` > C++ compiler .......................... GNU 13.3.0 Before: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 4,128,530.90 | 242.22 | 3.8% | 19,358,001.33 | 8,601,983.31 | 2.250 | 3,079,334.76 | 0.4% | 10.64 | `WriteBlockBench` After: | ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 3,130,556.05 | 319.43 | 4.7% | 17,305,378.56 | 6,457,946.37 | 2.680 | 2,579,854.87 | 0.3% | 10.83 | `WriteBlockBench` Co-authored-by: Cory Fields Co-authored-by: Ryan Ofsky --- src/node/blockstorage.cpp | 8 ++++---- src/streams.cpp | 10 ++++++++++ src/streams.h | 28 ++++++++++++++++++++++++++++ src/test/streams_tests.cpp | 7 ++++--- 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 414af2e4ac8..a061dbea11a 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -959,14 +959,14 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt } // Write index header - fileout << GetParams().MessageStart() << blockundo_size; + BufferedFileW(fileout, HEADER_BYTE_SIZE) << GetParams().MessageStart() << blockundo_size; pos.nPos += HEADER_BYTE_SIZE; { // Calculate checksum HashWriter hasher{}; hasher << block.pprev->GetBlockHash() << blockundo; // Write undo data & checksum - fileout << blockundo << hasher.GetHash(); + BufferedFileW(fileout, blockundo_size + sizeof(uint256)) << blockundo << hasher.GetHash(); } // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order) @@ -1117,10 +1117,10 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight) } // Write index header - fileout << GetParams().MessageStart() << block_size; + BufferedFileW(fileout, HEADER_BYTE_SIZE) << GetParams().MessageStart() << block_size; pos.nPos += HEADER_BYTE_SIZE; // Write block - fileout << TX_WITH_WITNESS(block); + BufferedFileW(fileout, block_size) << TX_WITH_WITNESS(block); return pos; } diff --git a/src/streams.cpp b/src/streams.cpp index cd496ee2be4..3fc58e260c9 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -102,6 +102,16 @@ void AutoFile::write(Span src) } } +void AutoFile::write_large(Span src) +{ + if (!m_file) throw std::ios_base::failure("AutoFile::write_large: file handle is nullptr"); + util::Xor(src, m_xor, *m_position); // obfuscate in-place + if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) { + throw std::ios_base::failure("AutoFile::write_large: write failed"); + } + if (m_position) *m_position += src.size(); +} + bool AutoFile::Commit() { return ::FileCommit(m_file); diff --git a/src/streams.h b/src/streams.h index 6d84ea76f30..9dbbb2ad0c9 100644 --- a/src/streams.h +++ b/src/streams.h @@ -163,6 +163,7 @@ public: typedef vector_type::reverse_iterator reverse_iterator; explicit DataStream() = default; + explicit DataStream(size_type n) { reserve(n); } explicit DataStream(Span sp) : DataStream{AsBytes(sp)} {} explicit DataStream(Span sp) : vch(sp.data(), sp.data() + sp.size()) {} @@ -452,6 +453,7 @@ public: void read(Span dst); void ignore(size_t nSize); void write(Span src); + void write_large(Span src); // Note that src will be mutated template AutoFile& operator<<(const T& obj) @@ -468,6 +470,32 @@ public: } }; +class BufferedFileW +{ + AutoFile& m_file; + uint32_t m_buffer_size; + DataStream m_buf; + +public: + explicit BufferedFileW(AutoFile& file, const uint32_t buffer_size) + : m_file(file), m_buffer_size{buffer_size}, m_buf{buffer_size} {} + + ~BufferedFileW() + { + Assert(m_buf.size() <= m_buffer_size); + m_file.write_large(m_buf); + } + + void write(Span src) { m_buf.write(src); } + + template + BufferedFileW& operator<<(const T& obj) + { + Serialize(m_buf, obj); + return *this; + } +}; + class BufferedFileR { DataStream m_buf; diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 7b5bea86060..c72f77325f7 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -572,11 +572,12 @@ BOOST_AUTO_TEST_CASE(streams_datastream_write_large) const uint32_t v1{m_rng.rand32()}, v2{m_rng.rand32()}, v3{m_rng.rand32()}; const fs::path tmp_path{m_args.GetDataDirBase() / "test_datastream_write_large.bin"}; - // Write out the values to file + // Write out the values through a precisely sized BufferedFileW { AutoFile file{fsbridge::fopen(tmp_path, "w+b")}; - file << v1 << v2; - file.write(AsBytes(Span{&v3, 1})); + BufferedFileW f(file, sizeof(v1) + sizeof(v2) + sizeof(v3)); + f << v1 << v2; + f.write(AsBytes(Span{&v3, 1})); } // Read back and verify using BufferedFileR {