From edb2575fb60e41c2a6cc83db03b125ff66a3ddbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Tue, 17 Dec 2024 13:05:13 +0100
Subject: [PATCH 01/24] optimization: Bulk serialization reads in `UndoRead`
 and `ReadBlock`

The Obfuscation (XOR) operations are currently done byte-by-byte during serialization, buffering the reads will enable batching the obfuscation operations later (not yet done here).

Also, different operating systems seem to handle file caching differently, so reading bigger batches (and processing those from memory) is also a bit faster (likely because of fewer native fread calls or less locking).

Since `ReadBlock[Undo]` is called with the file position being set after the [undo]block size, we have to start by backtracking 4 bytes to be able to read the expected size first.
As a consequence, the `FlatFilePos pos` parameter in `ReadBlock` is copied now.

`HashVerifier` was included in the try/catch to include the `undo_size` serialization there as well since the try is about `Deserialize` errors. This is why the final checksum verification was also included in the try.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='ReadBlockBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

Before:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        2,289,743.62 |              436.73 |    0.3% |     11.03 | `ReadBlockBench`

After:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        1,724,703.14 |              579.81 |    0.4% |     11.06 | `ReadBlockBench`

> C++ compiler .......................... GNU 13.3.0

Before:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        7,786,309.20 |              128.43 |    0.0% |   70,832,812.80 |   23,803,523.16 |  2.976 |   5,073,002.56 |    0.4% |     10.72 | `ReadBlockBench`

After:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        6,272,557.28 |              159.42 |    0.0% |   63,251,231.42 |   19,739,780.92 |  3.204 |   3,589,886.66 |    0.3% |     10.57 | `ReadBlockBench`

Co-authored-by: Cory Fields <cory-nospam-@coryfields.com>
---
 src/node/blockstorage.cpp | 56 ++++++++++++++++++++++++++++++---------
 src/node/blockstorage.h   |  2 +-
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index 372395dd24d..de4f893e513 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -672,7 +672,13 @@ CBlockFileInfo* BlockManager::GetBlockFileInfo(size_t n)
 
 bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const
 {
-    const FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())};
+    FlatFilePos pos{WITH_LOCK(::cs_main, return index.GetUndoPos())};
+    if (pos.nPos < BLOCK_SERIALIZATION_HEADER_SIZE) {
+        LogError("%s: OpenUndoFile failed for %s while reading", __func__, pos.ToString());
+        return false;
+    }
+    uint32_t undo_size;
+    pos.nPos -= sizeof undo_size;
 
     // Open history file to read
     AutoFile filein{OpenUndoFile(pos, true)};
@@ -681,24 +687,33 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index
         return false;
     }
 
-    // Read block
-    uint256 hashChecksum;
-    HashVerifier verifier{filein}; // Use HashVerifier as reserializing may lose data, c.f. commit d342424301013ec47dc146a4beb49d5c9319d80a
     try {
+        // Read block
+        filein >> undo_size;
+        if (undo_size > MAX_SIZE) {
+            LogError("Refusing to read undo data of size: %d", undo_size);
+            return false;
+        }
+
+        std::vector<uint8_t> mem(undo_size);
+        filein >> Span{mem};
+
+        SpanReader reader{mem};
+        HashVerifier verifier{reader}; // Use HashVerifier as reserializing may lose data, c.f. commit d342424301013ec47dc146a4beb49d5c9319d80a
         verifier << index.pprev->GetBlockHash();
         verifier >> blockundo;
+
+        uint256 hashChecksum;
         filein >> hashChecksum;
+        if (hashChecksum != verifier.GetHash()) {
+            LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString());
+            return false;
+        }
     } catch (const std::exception& e) {
         LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString());
         return false;
     }
 
-    // Verify checksum
-    if (hashChecksum != verifier.GetHash()) {
-        LogError("%s: Checksum mismatch at %s\n", __func__, pos.ToString());
-        return false;
-    }
-
     return true;
 }
 
@@ -996,10 +1011,17 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt
     return true;
 }
 
-bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const
+bool BlockManager::ReadBlock(CBlock& block, FlatFilePos pos) const
 {
     block.SetNull();
 
+    if (pos.nPos < BLOCK_SERIALIZATION_HEADER_SIZE) {
+        LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString());
+        return false;
+    }
+    uint32_t blk_size;
+    pos.nPos -= sizeof blk_size;
+
     // Open history file to read
     AutoFile filein{OpenBlockFile(pos, true)};
     if (filein.IsNull()) {
@@ -1007,9 +1029,17 @@ bool BlockManager::ReadBlock(CBlock& block, const FlatFilePos& pos) const
         return false;
     }
 
-    // Read block
     try {
-        filein >> TX_WITH_WITNESS(block);
+        // Read block
+        filein >> blk_size;
+        if (blk_size > MAX_SIZE) {
+            LogError("Refusing to read block of size: %s", blk_size);
+            return false;
+        }
+
+        std::vector<uint8_t> mem(blk_size);
+        filein >> Span{mem};
+        SpanReader(mem) >> TX_WITH_WITNESS(block);
     } catch (const std::exception& e) {
         LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString());
         return false;
diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h
index 8a34efadfea..13bb26470b5 100644
--- a/src/node/blockstorage.h
+++ b/src/node/blockstorage.h
@@ -414,7 +414,7 @@ public:
     void UnlinkPrunedFiles(const std::set<int>& setFilesToPrune) const;
 
     /** Functions for disk access for blocks */
-    bool ReadBlock(CBlock& block, const FlatFilePos& pos) const;
+    bool ReadBlock(CBlock& block, FlatFilePos pos) const;
     bool ReadBlock(CBlock& block, const CBlockIndex& index) const;
     bool ReadRawBlock(std::vector<uint8_t>& block, const FlatFilePos& pos) const;
 

From e18c96a3fe950c77069d80aa74c8fc667e8110f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sun, 26 Jan 2025 19:44:03 +0100
Subject: [PATCH 02/24] Add `AutoFile::write_large` for batching obfuscation
 operations

Instead of copying the data and doing the xor in a 4096 byte array, we're doing it directly on the input.

`DataStream` constructor was also added to enable presized serialization and writing in a single command.
---
 src/streams.cpp            | 10 ++++++++++
 src/streams.h              |  2 ++
 src/test/streams_tests.cpp | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/src/streams.cpp b/src/streams.cpp
index cd496ee2be4..3fc58e260c9 100644
--- a/src/streams.cpp
+++ b/src/streams.cpp
@@ -102,6 +102,16 @@ void AutoFile::write(Span<const std::byte> src)
     }
 }
 
+void AutoFile::write_large(Span<std::byte> src)
+{
+    if (!m_file) throw std::ios_base::failure("AutoFile::write_large: file handle is nullptr");
+    util::Xor(src, m_xor, *m_position); // obfuscate in-place
+    if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
+        throw std::ios_base::failure("AutoFile::write_large: write failed");
+    }
+    if (m_position) *m_position += src.size();
+}
+
 bool AutoFile::Commit()
 {
     return ::FileCommit(m_file);
diff --git a/src/streams.h b/src/streams.h
index e5316da7e70..90cceed651b 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -162,6 +162,7 @@ public:
     typedef vector_type::reverse_iterator reverse_iterator;
 
     explicit DataStream() = default;
+    explicit DataStream(size_type n) { reserve(n); }
     explicit DataStream(Span<const uint8_t> sp) : DataStream{AsBytes(sp)} {}
     explicit DataStream(Span<const value_type> sp) : vch(sp.data(), sp.data() + sp.size()) {}
 
@@ -451,6 +452,7 @@ public:
     void read(Span<std::byte> dst);
     void ignore(size_t nSize);
     void write(Span<const std::byte> src);
+    void write_large(Span<std::byte> src);  // Note that src will be mutated
 
     template <typename T>
     AutoFile& operator<<(const T& obj)
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 777122df6d0..464273eed42 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -567,4 +567,29 @@ BOOST_AUTO_TEST_CASE(streams_hashed)
     BOOST_CHECK_EQUAL(hash_writer.GetHash(), hash_verifier.GetHash());
 }
 
+BOOST_AUTO_TEST_CASE(streams_datastream_write_large)
+{
+    const uint32_t v1{m_rng.rand32()}, v2{m_rng.rand32()};
+    const fs::path tmp_path{m_args.GetDataDirBase() / "test_datastream_write_large.bin"};
+
+    // Write out the values through in a precisely sized vector.
+    {
+        AutoFile file{fsbridge::fopen(tmp_path, "w+b")};
+        DataStream data_stream(sizeof(v1) + sizeof(v2));
+        assert(data_stream.empty());
+        file.write_large(data_stream << v1 << v2);
+    }
+
+    // Read back and verify.
+    {
+        AutoFile file{fsbridge::fopen(tmp_path, "rb")};
+        uint32_t v3{0}, v4{0};
+        file >> v3 >> v4;
+        BOOST_CHECK_EQUAL(v3, v1);
+        BOOST_CHECK_EQUAL(v4, v2);
+    }
+
+    fs::remove(tmp_path);
+}
+
 BOOST_AUTO_TEST_SUITE_END()

From 23ed684c6a6f93b6645ad7456d7439c6a32db687 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sun, 26 Jan 2025 17:22:47 +0100
Subject: [PATCH 03/24] optimization: Bulk serialization writes in
 `SaveBlockUndo` and `SaveBlock`

Similarly to the serialization reads, buffered writes will enable batched xor calculations - especially since currently we need to copy the write inputs Span to do the obfuscation on it, batching enables doing the xor on the internal buffer instead.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SaveBlockBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

Before:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        5,267,613.94 |              189.84 |    1.0% |     11.05 | `SaveBlockBench`

After:
|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        1,767,367.40 |              565.81 |    1.6% |     10.86 | `SaveBlockBench`

> C++ compiler .......................... GNU 13.3.0

Before:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        4,128,530.90 |              242.22 |    3.8% |   19,358,001.33 |    8,601,983.31 |  2.250 |   3,079,334.76 |    0.4% |     10.64 | `SaveBlockBench`

After:
|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        3,130,556.05 |              319.43 |    4.7% |   17,305,378.56 |    6,457,946.37 |  2.680 |   2,579,854.87 |    0.3% |     10.83 | `SaveBlockBench`

Co-authored-by: Cory Fields <cory-nospam-@coryfields.com>
---
 src/node/blockstorage.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index de4f893e513..9cdb0b22700 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -974,16 +974,16 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt
         }
 
         // Write index header
-        fileout << GetParams().MessageStart() << blockundo_size;
-        // Write undo data
+        fileout.write_large(DataStream(BLOCK_SERIALIZATION_HEADER_SIZE) << GetParams().MessageStart() << blockundo_size);
         pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE;
-        fileout << blockundo;
+        {
+            // Calculate checksum
+            HashWriter hasher{};
+            hasher << block.pprev->GetBlockHash() << blockundo;
 
-        // Calculate & write checksum
-        HashWriter hasher{};
-        hasher << block.pprev->GetBlockHash();
-        hasher << blockundo;
-        fileout << hasher.GetHash();
+            // Write undo data & checksum
+            fileout.write_large(DataStream(blockundo_size + sizeof(uint256)) << blockundo << hasher.GetHash());
+        }
 
         // rev files are written in block height order, whereas blk files are written as blocks come in (often out of order)
         // we want to flush the rev (undo) file once we've written the last block, which is indicated by the last height
@@ -1135,10 +1135,11 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight)
     }
 
     // Write index header
-    fileout << GetParams().MessageStart() << block_size;
-    // Write block
+    fileout.write_large(DataStream(BLOCK_SERIALIZATION_HEADER_SIZE) << GetParams().MessageStart() << block_size);
     pos.nPos += BLOCK_SERIALIZATION_HEADER_SIZE;
-    fileout << TX_WITH_WITNESS(block);
+    // Write block
+    fileout.write_large(DataStream(block_size) << TX_WITH_WITNESS(block));
+
     return pos;
 }
 

From d0a86b343dced4c820fa5f356a231f47c1aa80fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 24 Jan 2025 15:12:28 +0100
Subject: [PATCH 04/24] log: unify error messages for (read/write)[undo]block

Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com>
---
 src/node/blockstorage.cpp | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index 9cdb0b22700..23b78df4100 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -683,7 +683,7 @@ bool BlockManager::ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index
     // Open history file to read
     AutoFile filein{OpenUndoFile(pos, true)};
     if (filein.IsNull()) {
-        LogError("OpenUndoFile failed for %s", pos.ToString());
+        LogError("OpenUndoFile failed for %s while reading", pos.ToString());
         return false;
     }
 
@@ -963,13 +963,13 @@ bool BlockManager::WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationSt
         FlatFilePos pos;
         const unsigned int blockundo_size{static_cast<unsigned int>(GetSerializeSize(blockundo))};
         if (!FindUndoPos(state, block.nFile, pos, blockundo_size + UNDO_DATA_DISK_OVERHEAD)) {
-            LogError("FindUndoPos failed");
+            LogError("FindUndoPos failed for %s while writing", pos.ToString());
             return false;
         }
         // Open history file to append
         AutoFile fileout{OpenUndoFile(pos)};
         if (fileout.IsNull()) {
-            LogError("OpenUndoFile failed");
+            LogError("OpenUndoFile failed for %s while writing", pos.ToString());
             return FatalError(m_opts.notifications, state, _("Failed to write undo data."));
         }
 
@@ -1025,7 +1025,7 @@ bool BlockManager::ReadBlock(CBlock& block, FlatFilePos pos) const
     // Open history file to read
     AutoFile filein{OpenBlockFile(pos, true)};
     if (filein.IsNull()) {
-        LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString());
+        LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString());
         return false;
     }
 
@@ -1041,19 +1041,19 @@ bool BlockManager::ReadBlock(CBlock& block, FlatFilePos pos) const
         filein >> Span{mem};
         SpanReader(mem) >> TX_WITH_WITNESS(block);
     } catch (const std::exception& e) {
-        LogError("%s: Deserialize or I/O error - %s at %s\n", __func__, e.what(), pos.ToString());
+        LogError("%s: Deserialize or I/O error - %s at %s", __func__, e.what(), pos.ToString());
         return false;
     }
 
     // Check the header
     if (!CheckProofOfWork(block.GetHash(), block.nBits, GetConsensus())) {
-        LogError("%s: Errors in block header at %s\n", __func__, pos.ToString());
+        LogError("%s: Errors in block header at %s", __func__, pos.ToString());
         return false;
     }
 
     // Signet only: check block solution
     if (GetConsensus().signet_blocks && !CheckSignetBlockSolution(block, GetConsensus())) {
-        LogError("%s: Errors in block solution at %s\n", __func__, pos.ToString());
+        LogError("%s: Errors in block solution at %s", __func__, pos.ToString());
         return false;
     }
 
@@ -1068,7 +1068,7 @@ bool BlockManager::ReadBlock(CBlock& block, const CBlockIndex& index) const
         return false;
     }
     if (block.GetHash() != index.GetBlockHash()) {
-        LogError("%s: GetHash() doesn't match index for %s at %s\n", __func__, index.ToString(), block_pos.ToString());
+        LogError("%s: GetHash() doesn't match index for %s at %s", __func__, index.ToString(), block_pos.ToString());
         return false;
     }
     return true;
@@ -1080,13 +1080,13 @@ bool BlockManager::ReadRawBlock(std::vector<uint8_t>& block, const FlatFilePos&
     // If nPos is less than 8 the pos is null and we don't have the block data
     // Return early to prevent undefined behavior of unsigned int underflow
     if (hpos.nPos < 8) {
-        LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString());
+        LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString());
         return false;
     }
     hpos.nPos -= 8; // Seek back 8 bytes for meta header
     AutoFile filein{OpenBlockFile(hpos, true)};
     if (filein.IsNull()) {
-        LogError("%s: OpenBlockFile failed for %s\n", __func__, pos.ToString());
+        LogError("%s: OpenBlockFile failed for %s", __func__, pos.ToString());
         return false;
     }
 
@@ -1097,14 +1097,14 @@ bool BlockManager::ReadRawBlock(std::vector<uint8_t>& block, const FlatFilePos&
         filein >> blk_start >> blk_size;
 
         if (blk_start != GetParams().MessageStart()) {
-            LogError("%s: Block magic mismatch for %s: %s versus expected %s\n", __func__, pos.ToString(),
+            LogError("%s: Block magic mismatch for %s: %s versus expected %s", __func__, pos.ToString(),
                          HexStr(blk_start),
                          HexStr(GetParams().MessageStart()));
             return false;
         }
 
         if (blk_size > MAX_SIZE) {
-            LogError("%s: Block data is larger than maximum deserialization size for %s: %s versus %s\n", __func__, pos.ToString(),
+            LogError("%s: Block data is larger than maximum deserialization size for %s: %s versus %s", __func__, pos.ToString(),
                          blk_size, MAX_SIZE);
             return false;
         }
@@ -1112,7 +1112,7 @@ bool BlockManager::ReadRawBlock(std::vector<uint8_t>& block, const FlatFilePos&
         block.resize(blk_size); // Zeroing of memory is intentional here
         filein.read(MakeWritableByteSpan(block));
     } catch (const std::exception& e) {
-        LogError("%s: Read from block file failed: %s for %s\n", __func__, e.what(), pos.ToString());
+        LogError("%s: Read from block file failed: %s for %s", __func__, e.what(), pos.ToString());
         return false;
     }
 
@@ -1124,12 +1124,12 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight)
     const unsigned int block_size{static_cast<unsigned int>(GetSerializeSize(TX_WITH_WITNESS(block)))};
     FlatFilePos pos{FindNextBlockPos(block_size + BLOCK_SERIALIZATION_HEADER_SIZE, nHeight, block.GetBlockTime())};
     if (pos.IsNull()) {
-        LogError("FindNextBlockPos failed");
+        LogError("FindNextBlockPos failed for %s while writing", pos.ToString());
         return FlatFilePos();
     }
     AutoFile fileout{OpenBlockFile(pos)};
     if (fileout.IsNull()) {
-        LogError("OpenBlockFile failed");
+        LogError("OpenBlockFile failed for %s while writing", pos.ToString());
         m_opts.notifications.fatalError(_("Failed to write block."));
         return FlatFilePos();
     }

From 34afcc90c05a56bb4ef17753ecfcc0f5b9fba018 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Thu, 24 Oct 2024 08:37:57 +0200
Subject: [PATCH 05/24] test: Compare util::Xor with randomized inputs against
 simple impl

Since production code only uses keys of length 8, we're not testing with other values anymore
---
 src/test/streams_tests.cpp | 85 +++++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 464273eed42..982c7d9d95e 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -14,13 +14,73 @@ using namespace std::string_literals;
 
 BOOST_FIXTURE_TEST_SUITE(streams_tests, BasicTestingSetup)
 
+BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks)
+{
+    auto apply_random_xor_chunks{[](std::span<std::byte> write, const std::span<std::byte> key, FastRandomContext& rng) {
+        for (size_t offset{0}; offset < write.size();) {
+            const size_t chunk_size{1 + rng.randrange(write.size() - offset)};
+            util::Xor(write.subspan(offset, chunk_size), key, offset);
+            offset += chunk_size;
+        }
+    }};
+
+    FastRandomContext rng{/*fDeterministic=*/false};
+    for (size_t test{0}; test < 100; ++test) {
+        const size_t write_size{1 + rng.randrange(100U)};
+        const std::vector original{rng.randbytes<std::byte>(write_size)};
+        std::vector roundtrip{original};
+
+        std::vector key_bytes{rng.randbytes<std::byte>(sizeof(uint64_t))};
+        uint64_t key;
+        std::memcpy(&key, key_bytes.data(), sizeof key);
+
+        apply_random_xor_chunks(roundtrip, key_bytes, rng);
+
+        const bool all_zero = (key == 0) || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2);
+        BOOST_CHECK_EQUAL(original != roundtrip, !all_zero);
+
+        apply_random_xor_chunks(roundtrip, key_bytes, rng);
+        BOOST_CHECK(original == roundtrip);
+    }
+}
+
+BOOST_AUTO_TEST_CASE(xor_bytes_reference)
+{
+    auto expected_xor{[](std::span<std::byte> write, const std::span<const std::byte> key, size_t key_offset) {
+        for (auto& b : write) {
+            b ^= key[key_offset++ % key.size()];
+        }
+    }};
+
+    FastRandomContext rng{/*fDeterministic=*/false};
+    for (size_t test{0}; test < 100; ++test) {
+        const size_t write_size{1 + rng.randrange(100U)};
+        const size_t key_offset{rng.randrange(3 * 8U)}; // Should wrap around
+
+        std::vector key_bytes{rng.randbytes<std::byte>(sizeof(uint64_t))};
+        uint64_t key;
+        std::memcpy(&key, key_bytes.data(), sizeof key);
+
+        std::vector expected{rng.randbytes<std::byte>(write_size)};
+        std::vector actual{expected};
+
+        expected_xor(expected, key_bytes, key_offset);
+        util::Xor(actual, key_bytes, key_offset);
+
+        BOOST_CHECK_EQUAL_COLLECTIONS(expected.begin(), expected.end(), actual.begin(), actual.end());
+    }
+}
+
 BOOST_AUTO_TEST_CASE(xor_file)
 {
     fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"};
     auto raw_file{[&](const auto& mode) { return fsbridge::fopen(xor_path, mode); }};
     const std::vector<uint8_t> test1{1, 2, 3};
     const std::vector<uint8_t> test2{4, 5};
-    const std::vector<std::byte> xor_pat{std::byte{0xff}, std::byte{0x00}};
+    const std::vector xor_pat{std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}};
+    uint64_t xor_key;
+    std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
+
     {
         // Check errors for missing file
         AutoFile xor_file{raw_file("rb"), xor_pat};
@@ -73,7 +133,7 @@ BOOST_AUTO_TEST_CASE(streams_vector_writer)
 {
     unsigned char a(1);
     unsigned char b(2);
-    unsigned char bytes[] = { 3, 4, 5, 6 };
+    unsigned char bytes[] = {3, 4, 5, 6};
     std::vector<unsigned char> vch;
 
     // Each test runs twice. Serializing a second time at the same starting
@@ -225,29 +285,34 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
     // Degenerate case
     {
         DataStream ds{in};
-        ds.Xor({0x00, 0x00});
+        ds.Xor({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00});
         BOOST_CHECK_EQUAL(""s, ds.str());
     }
 
     in.push_back(std::byte{0x0f});
     in.push_back(std::byte{0xf0});
 
-    // Single character key
     {
+        const std::vector xor_pat{std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}};
+        uint64_t xor_key;
+        std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
+
         DataStream ds{in};
-        ds.Xor({0xff});
+        ds.Xor({0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff});
         BOOST_CHECK_EQUAL("\xf0\x0f"s, ds.str());
     }
 
-    // Multi character key
-
     in.clear();
     in.push_back(std::byte{0xf0});
     in.push_back(std::byte{0x0f});
 
     {
+        const std::vector xor_pat{std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}};
+        uint64_t xor_key;
+        std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
+
         DataStream ds{in};
-        ds.Xor({0xff, 0x0f});
+        ds.Xor({0xff, 0x0f, 0xff, 0x0f, 0xff, 0x0f, 0xff, 0x0f});
         BOOST_CHECK_EQUAL("\x0f\x00"s, ds.str());
     }
 }
@@ -270,7 +335,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
         BOOST_CHECK(false);
     } catch (const std::exception& e) {
         BOOST_CHECK(strstr(e.what(),
-                        "Rewind limit must be less than buffer size") != nullptr);
+                           "Rewind limit must be less than buffer size") != nullptr);
     }
 
     // The buffer is 25 bytes, allow rewinding 10 bytes.
@@ -359,7 +424,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file)
         BOOST_CHECK(false);
     } catch (const std::exception& e) {
         BOOST_CHECK(strstr(e.what(),
-                        "BufferedFile::Fill: end of file") != nullptr);
+                           "BufferedFile::Fill: end of file") != nullptr);
     }
     // Attempting to read beyond the end sets the EOF indicator.
     BOOST_CHECK(bf.eof());

From 8ce0670506bc1f1a659271deaf383a56a142df70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 6 Dec 2024 16:18:03 +0100
Subject: [PATCH 06/24] bench: Make Xor benchmark more representative

To make the benchmarks representative, I've collected the write-vector's sizes during IBD for every invocation of `util::Xor` until 860k blocks, and used it as a basis for the micro-benchmarks, having a similar distribution of random data (taking the 1000 most frequent ones, making sure the very big ones are also covered).

And even though we already have serialization tests, `AutoFileXor` was added to serializing 1 MB via the provided key_bytes.
This was used to test the effect of disabling obfuscation.

>  cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build -j$(nproc) \
&& build/src/bench/bench_bitcoin -filter='XorHistogram|AutoFileXor' -min-time=10000

C++ compiler .......................... AppleClang 16.0.0.16000026

|             ns/byte |              byte/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|                1.07 |      937,527,289.88 |    0.4% |     10.24 | `AutoFileXor`
|                0.87 |    1,149,859,017.49 |    0.3% |     10.80 | `XorHistogram`

C++ compiler .......................... GNU 13.2.0

|             ns/byte |              byte/s |    err% |        ins/byte |        cyc/byte |    IPC |       bra/byte |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|                1.87 |      535,253,389.72 |    0.0% |            9.20 |            3.45 |  2.669 |           1.03 |    0.1% |     11.02 | `AutoFileXor`
|                1.70 |      587,844,715.57 |    0.0% |            9.35 |            5.41 |  1.729 |           1.05 |    1.7% |     10.95 | `XorHistogram`
---
 src/bench/xor.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/bench/xor.cpp b/src/bench/xor.cpp
index fc9dc5d1721..1c9c5189baa 100644
--- a/src/bench/xor.cpp
+++ b/src/bench/xor.cpp
@@ -7,17 +7,24 @@
 #include <span.h>
 #include <streams.h>
 
+#include <cmath>
 #include <cstddef>
+#include <map>
 #include <vector>
 
 static void Xor(benchmark::Bench& bench)
 {
-    FastRandomContext frc{/*fDeterministic=*/true};
-    auto data{frc.randbytes<std::byte>(1024)};
-    auto key{frc.randbytes<std::byte>(31)};
+    FastRandomContext rng{/*fDeterministic=*/true};
+    auto test_data{rng.randbytes<std::byte>(1 << 20)};
 
-    bench.batch(data.size()).unit("byte").run([&] {
-        util::Xor(data, key);
+    std::vector key_bytes{rng.randbytes<std::byte>(8)};
+    uint64_t key;
+    std::memcpy(&key, key_bytes.data(), 8);
+
+    size_t offset{0};
+    bench.batch(test_data.size()).unit("byte").run([&] {
+        util::Xor(test_data, key_bytes, offset++);
+        ankerl::nanobench::doNotOptimizeAway(test_data);
     });
 }
 

From bb9cb81607076e9a14178ad2f62fcf68c5d5499b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 21 Dec 2024 16:15:04 +0100
Subject: [PATCH 07/24] optimization: Xor 64 bits together instead of
 byte-by-byte

`util::Xor` method was split out into more focused parts:
* one which assumes tha the `uint64_t` key is properly aligned, doing the first few xors as 64 bits (the memcpy is eliminated in most compilers), and the last iteration is optimized for 8/16/32 bytes.
* an unaligned `uint64_t` key with a `key_offset` parameter which is rotated to accommodate the data (adjusting for endianness).
* a legacy `std::vector<std::byte>` key with an asserted 8 byte size, converted to `uint64_t`.

Note that the default statement alone would pass the tests, but would be very slow, since the 1, 2 and 4 byte versions won't be specialized by the compiler, hence the switch.

Asserts were added throughout the code to make sure every such vector has length 8, since in the next commit we're converting all of them to `uint64_t`.

refactor: Migrate fixed-size obfuscation end-to-end from `std::vector<std::byte>` to `uint64_t`

Since `util::Xor` accepts `uint64_t` values, we're eliminating any repeated vector-to-uint64_t conversions going back to the loading/saving of these values (we're still serializing them as vectors, but converting as soon as possible to `uint64_t`). This is the reason the tests still generate vector values and convert to `uint64_t` later instead of generating it directly.

We're also short-circuit `Xor` calls with 0 key values early to avoid unnecessary calculations (e.g. `MakeWritableByteSpan`) - even assuming that XOR is never called for 0.

>  cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release \
&& cmake --build build -j$(nproc) \
&& build/src/bench/bench_bitcoin -filter='XorHistogram|AutoFileXor' -min-time=10000

C++ compiler .......................... AppleClang 16.0.0.16000026

|             ns/byte |              byte/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|                0.09 |   10,799,585,470.46 |    1.3% |     11.00 | `AutoFileXor`
|                0.14 |    7,144,743,097.97 |    0.2% |     11.01 | `XorHistogram`

C++ compiler .......................... GNU 13.2.0

|             ns/byte |              byte/s |    err% |        ins/byte |        cyc/byte |    IPC |       bra/byte |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|                0.59 |    1,706,433,032.76 |    0.1% |            0.00 |            0.00 |  0.620 |           0.00 |    1.8% |     11.01 | `AutoFileXor`
|                0.47 |    2,145,375,849.71 |    0.0% |            0.95 |            1.48 |  0.642 |           0.20 |    9.6% |     10.93 | `XorHistogram`

----

A few other benchmarks that seem to have improved as well (tested with Clang only):
Before:

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        2,237,168.64 |              446.99 |    0.3% |     10.91 | `ReadBlockFromDiskTest`
|          748,837.59 |            1,335.40 |    0.2% |     10.68 | `ReadRawBlockFromDiskTest`

After:

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        1,827,436.12 |              547.21 |    0.7% |     10.95 | `ReadBlockFromDiskTest`
|           49,276.48 |           20,293.66 |    0.2% |     10.99 | `ReadRawBlockFromDiskTest`
---
 src/bench/xor.cpp               |   7 +-
 src/dbwrapper.cpp               |  53 +++++---------
 src/dbwrapper.h                 |  16 ++---
 src/node/blockstorage.cpp       |  10 +--
 src/node/blockstorage.h         |   2 +-
 src/node/mempool_persist.cpp    |  17 ++---
 src/obfuscation.h               |  86 +++++++++++++++++++++++
 src/streams.cpp                 |  21 +++---
 src/streams.h                   |  40 ++---------
 src/test/dbwrapper_tests.cpp    |  18 ++---
 src/test/fuzz/autofile.cpp      |   2 +-
 src/test/fuzz/buffered_file.cpp |   2 +-
 src/test/streams_tests.cpp      | 121 +++++++++++++++++++++-----------
 13 files changed, 232 insertions(+), 163 deletions(-)
 create mode 100644 src/obfuscation.h

diff --git a/src/bench/xor.cpp b/src/bench/xor.cpp
index 1c9c5189baa..feeab0d348d 100644
--- a/src/bench/xor.cpp
+++ b/src/bench/xor.cpp
@@ -17,13 +17,12 @@ static void Xor(benchmark::Bench& bench)
     FastRandomContext rng{/*fDeterministic=*/true};
     auto test_data{rng.randbytes<std::byte>(1 << 20)};
 
-    std::vector key_bytes{rng.randbytes<std::byte>(8)};
-    uint64_t key;
-    std::memcpy(&key, key_bytes.data(), 8);
+    const Obfuscation obfuscation{rng.rand64()};
+    assert(obfuscation);
 
     size_t offset{0};
     bench.batch(test_data.size()).unit("byte").run([&] {
-        util::Xor(test_data, key_bytes, offset++);
+        obfuscation(test_data, offset++);
         ankerl::nanobench::doNotOptimizeAway(test_data);
     });
 }
diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp
index 8fb366515af..bbe066d7c55 100644
--- a/src/dbwrapper.cpp
+++ b/src/dbwrapper.cpp
@@ -171,7 +171,7 @@ void CDBBatch::Clear()
 void CDBBatch::WriteImpl(Span<const std::byte> key, DataStream& ssValue)
 {
     leveldb::Slice slKey(CharCast(key.data()), key.size());
-    ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent));
+    dbwrapper_private::GetObfuscation(parent)(ssValue);
     leveldb::Slice slValue(CharCast(ssValue.data()), ssValue.size());
     m_impl_batch->batch.Put(slKey, slValue);
     // LevelDB serializes writes as:
@@ -220,7 +220,11 @@ struct LevelDBContext {
 };
 
 CDBWrapper::CDBWrapper(const DBParams& params)
-    : m_db_context{std::make_unique<LevelDBContext>()}, m_name{fs::PathToString(params.path.stem())}, m_path{params.path}, m_is_memory{params.memory_only}
+    : m_db_context{std::make_unique<LevelDBContext>()},
+      m_name{fs::PathToString(params.path.stem())},
+      m_obfuscation{0},
+      m_path{params.path},
+      m_is_memory{params.memory_only}
 {
     DBContext().penv = nullptr;
     DBContext().readoptions.verify_checksums = true;
@@ -255,24 +259,23 @@ CDBWrapper::CDBWrapper(const DBParams& params)
         LogPrintf("Finished database compaction of %s\n", fs::PathToString(params.path));
     }
 
-    // The base-case obfuscation key, which is a noop.
-    obfuscate_key = std::vector<unsigned char>(OBFUSCATE_KEY_NUM_BYTES, '\000');
-
-    bool key_exists = Read(OBFUSCATE_KEY_KEY, obfuscate_key);
-
-    if (!key_exists && params.obfuscate && IsEmpty()) {
-        // Initialize non-degenerate obfuscation if it won't upset
-        // existing, non-obfuscated data.
-        std::vector<unsigned char> new_key = CreateObfuscateKey();
+    m_obfuscation = 0; // Needed for unobfuscated Read
+    std::vector<unsigned char> obfuscate_key_vector(Obfuscation::SIZE_BYTES, '\000');
+    const bool key_missing = !Read(OBFUSCATE_KEY_KEY, obfuscate_key_vector);
+    if (key_missing && params.obfuscate && IsEmpty()) {
+        // Initialize non-degenerate obfuscation if it won't upset existing, non-obfuscated data.
+        std::vector<uint8_t> new_key(Obfuscation::SIZE_BYTES);
+        GetRandBytes(new_key);
 
         // Write `new_key` so we don't obfuscate the key with itself
         Write(OBFUSCATE_KEY_KEY, new_key);
-        obfuscate_key = new_key;
+        obfuscate_key_vector = new_key;
 
-        LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key));
+        LogPrintf("Wrote new obfuscate key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key_vector));
     }
-
-    LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key));
+    LogPrintf("Using obfuscation key for %s: %s\n", fs::PathToString(params.path), HexStr(obfuscate_key_vector));
+    m_obfuscation = obfuscate_key_vector;
+    obfuscate_key_vector.clear();
 }
 
 CDBWrapper::~CDBWrapper()
@@ -323,19 +326,6 @@ size_t CDBWrapper::DynamicMemoryUsage() const
 // past the null-terminator.
 const std::string CDBWrapper::OBFUSCATE_KEY_KEY("\000obfuscate_key", 14);
 
-const unsigned int CDBWrapper::OBFUSCATE_KEY_NUM_BYTES = 8;
-
-/**
- * Returns a string (consisting of 8 random bytes) suitable for use as an
- * obfuscating XOR key.
- */
-std::vector<unsigned char> CDBWrapper::CreateObfuscateKey() const
-{
-    std::vector<uint8_t> ret(OBFUSCATE_KEY_NUM_BYTES);
-    GetRandBytes(ret);
-    return ret;
-}
-
 std::optional<std::string> CDBWrapper::ReadImpl(Span<const std::byte> key) const
 {
     leveldb::Slice slKey(CharCast(key.data()), key.size());
@@ -418,10 +408,5 @@ void CDBIterator::SeekToFirst() { m_impl_iter->iter->SeekToFirst(); }
 void CDBIterator::Next() { m_impl_iter->iter->Next(); }
 
 namespace dbwrapper_private {
-
-const std::vector<unsigned char>& GetObfuscateKey(const CDBWrapper &w)
-{
-    return w.obfuscate_key;
-}
-
+Obfuscation GetObfuscation(const CDBWrapper& w) { return w.m_obfuscation; }
 } // namespace dbwrapper_private
diff --git a/src/dbwrapper.h b/src/dbwrapper.h
index dd5daa7a1fc..6e61cb4e1be 100644
--- a/src/dbwrapper.h
+++ b/src/dbwrapper.h
@@ -63,8 +63,7 @@ namespace dbwrapper_private {
  * Database obfuscation should be considered an implementation detail of the
  * specific database.
  */
-const std::vector<unsigned char>& GetObfuscateKey(const CDBWrapper &w);
-
+Obfuscation GetObfuscation(const CDBWrapper&);
 }; // namespace dbwrapper_private
 
 bool DestroyDB(const std::string& path_str);
@@ -168,7 +167,7 @@ public:
     template<typename V> bool GetValue(V& value) {
         try {
             DataStream ssValue{GetValueImpl()};
-            ssValue.Xor(dbwrapper_private::GetObfuscateKey(parent));
+            dbwrapper_private::GetObfuscation(parent)(ssValue);
             ssValue >> value;
         } catch (const std::exception&) {
             return false;
@@ -181,7 +180,7 @@ struct LevelDBContext;
 
 class CDBWrapper
 {
-    friend const std::vector<unsigned char>& dbwrapper_private::GetObfuscateKey(const CDBWrapper &w);
+    friend Obfuscation dbwrapper_private::GetObfuscation(const CDBWrapper&);
 private:
     //! holds all leveldb-specific fields of this class
     std::unique_ptr<LevelDBContext> m_db_context;
@@ -190,16 +189,11 @@ private:
     std::string m_name;
 
     //! a key used for optional XOR-obfuscation of the database
-    std::vector<unsigned char> obfuscate_key;
+    Obfuscation m_obfuscation;
 
     //! the key under which the obfuscation key is stored
     static const std::string OBFUSCATE_KEY_KEY;
 
-    //! the length of the obfuscate key in number of bytes
-    static const unsigned int OBFUSCATE_KEY_NUM_BYTES;
-
-    std::vector<unsigned char> CreateObfuscateKey() const;
-
     //! path to filesystem storage
     const fs::path m_path;
 
@@ -230,7 +224,7 @@ public:
         }
         try {
             DataStream ssValue{MakeByteSpan(*strValue)};
-            ssValue.Xor(obfuscate_key);
+            m_obfuscation(ssValue);
             ssValue >> value;
         } catch (const std::exception&) {
             return false;
diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index 23b78df4100..f3766afa730 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -804,13 +804,13 @@ void BlockManager::UnlinkPrunedFiles(const std::set<int>& setFilesToPrune) const
 
 AutoFile BlockManager::OpenBlockFile(const FlatFilePos& pos, bool fReadOnly) const
 {
-    return AutoFile{m_block_file_seq.Open(pos, fReadOnly), m_xor_key};
+    return AutoFile{m_block_file_seq.Open(pos, fReadOnly), m_obfuscation};
 }
 
 /** Open an undo file (rev?????.dat) */
 AutoFile BlockManager::OpenUndoFile(const FlatFilePos& pos, bool fReadOnly) const
 {
-    return AutoFile{m_undo_file_seq.Open(pos, fReadOnly), m_xor_key};
+    return AutoFile{m_undo_file_seq.Open(pos, fReadOnly), m_obfuscation};
 }
 
 fs::path BlockManager::GetBlockPosFilename(const FlatFilePos& pos) const
@@ -1143,7 +1143,7 @@ FlatFilePos BlockManager::WriteBlock(const CBlock& block, int nHeight)
     return pos;
 }
 
-static auto InitBlocksdirXorKey(const BlockManager::Options& opts)
+static Obfuscation InitBlocksdirXorKey(const BlockManager::Options& opts)
 {
     // Bytes are serialized without length indicator, so this is also the exact
     // size of the XOR-key file.
@@ -1192,12 +1192,12 @@ static auto InitBlocksdirXorKey(const BlockManager::Options& opts)
         };
     }
     LogInfo("Using obfuscation key for blocksdir *.dat files (%s): '%s'\n", fs::PathToString(opts.blocks_dir), HexStr(xor_key));
-    return std::vector<std::byte>{xor_key.begin(), xor_key.end()};
+    return Obfuscation{xor_key};
 }
 
 BlockManager::BlockManager(const util::SignalInterrupt& interrupt, Options opts)
     : m_prune_mode{opts.prune_target > 0},
-      m_xor_key{InitBlocksdirXorKey(opts)},
+      m_obfuscation{InitBlocksdirXorKey(opts)},
       m_opts{std::move(opts)},
       m_block_file_seq{FlatFileSeq{m_opts.blocks_dir, "blk", m_opts.fast_prune ? 0x4000 /* 16kB */ : BLOCKFILE_CHUNK_SIZE}},
       m_undo_file_seq{FlatFileSeq{m_opts.blocks_dir, "rev", UNDOFILE_CHUNK_SIZE}},
diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h
index 13bb26470b5..651182feb22 100644
--- a/src/node/blockstorage.h
+++ b/src/node/blockstorage.h
@@ -235,7 +235,7 @@ private:
 
     const bool m_prune_mode;
 
-    const std::vector<std::byte> m_xor_key;
+    const Obfuscation m_obfuscation;
 
     /** Dirty block index entries. */
     std::set<CBlockIndex*> m_dirty_blockindex;
diff --git a/src/node/mempool_persist.cpp b/src/node/mempool_persist.cpp
index ff7de8c64a2..22d91f0678b 100644
--- a/src/node/mempool_persist.cpp
+++ b/src/node/mempool_persist.cpp
@@ -58,15 +58,15 @@ bool LoadMempool(CTxMemPool& pool, const fs::path& load_path, Chainstate& active
     try {
         uint64_t version;
         file >> version;
-        std::vector<std::byte> xor_key;
         if (version == MEMPOOL_DUMP_VERSION_NO_XOR_KEY) {
-            // Leave XOR-key empty
+            file.SetObfuscation(0);
         } else if (version == MEMPOOL_DUMP_VERSION) {
-            file >> xor_key;
+            Obfuscation obfuscation{0};
+            file >> obfuscation;
+            file.SetObfuscation(obfuscation);
         } else {
             return false;
         }
-        file.SetXor(xor_key);
         uint64_t total_txns_to_load;
         file >> total_txns_to_load;
         uint64_t txns_tried = 0;
@@ -177,12 +177,13 @@ bool DumpMempool(const CTxMemPool& pool, const fs::path& dump_path, FopenFn mock
         const uint64_t version{pool.m_opts.persist_v1_dat ? MEMPOOL_DUMP_VERSION_NO_XOR_KEY : MEMPOOL_DUMP_VERSION};
         file << version;
 
-        std::vector<std::byte> xor_key(8);
         if (!pool.m_opts.persist_v1_dat) {
-            FastRandomContext{}.fillrand(xor_key);
-            file << xor_key;
+            const Obfuscation obfuscation{FastRandomContext{}.rand64()};
+            file << obfuscation;
+            file.SetObfuscation(obfuscation);
+        } else {
+            file.SetObfuscation(0);
         }
-        file.SetXor(xor_key);
 
         uint64_t mempool_transactions_to_write(vinfo.size());
         file << mempool_transactions_to_write;
diff --git a/src/obfuscation.h b/src/obfuscation.h
new file mode 100644
index 00000000000..ec478300a02
--- /dev/null
+++ b/src/obfuscation.h
@@ -0,0 +1,86 @@
+// Copyright (c) 2009-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_OBFUSCATION_H
+#define BITCOIN_OBFUSCATION_H
+
+#include <array>
+#include <cassert>
+#include <cstdint>
+#include <random>
+#include <span.h>
+#include <util/check.h>
+#include <cstring>
+#include <climits>
+#include <serialize.h>
+
+class Obfuscation
+{
+public:
+    static constexpr size_t SIZE_BYTES{sizeof(uint64_t)};
+
+private:
+    std::array<uint64_t, SIZE_BYTES> rotations; // Cached key rotations
+    void SetRotations(const uint64_t key)
+    {
+        for (size_t i{0}; i < SIZE_BYTES; ++i)
+        {
+            size_t key_rotation_bits{CHAR_BIT * i};
+            if constexpr (std::endian::native == std::endian::big) key_rotation_bits *= -1;
+            rotations[i] = std::rotr(key, key_rotation_bits);
+        }
+    }
+
+    static uint64_t ToUint64(const Span<const std::byte> key_span)
+    {
+        uint64_t key{};
+        std::memcpy(&key, key_span.data(), SIZE_BYTES);
+        return key;
+    }
+
+    static void Xor(Span<std::byte> write, const uint64_t key, const size_t size)
+    {
+        assert(size <= write.size());
+        uint64_t raw{};
+        std::memcpy(&raw, write.data(), size);
+        raw ^= key;
+        std::memcpy(write.data(), &raw, size);
+    }
+
+public:
+    Obfuscation(const uint64_t key) { SetRotations(key); }
+    Obfuscation(const Span<const std::byte> key_span) : Obfuscation(ToUint64(key_span)) {}
+    Obfuscation(const std::array<const std::byte, SIZE_BYTES>& key_arr) : Obfuscation(ToUint64(key_arr)) {}
+    Obfuscation(const std::vector<uint8_t>& key_vec) : Obfuscation(MakeByteSpan(key_vec)) {}
+
+    uint64_t Key() const { return rotations[0]; }
+    operator bool() const { return Key() != 0; }
+    void operator()(Span<std::byte> write, const size_t key_offset_bytes = 0) const
+    {
+        if (!*this) return;
+        const uint64_t rot_key{rotations[key_offset_bytes % SIZE_BYTES]}; // Continue obfuscation from where we left off
+        for (; write.size() >= SIZE_BYTES; write = write.subspan(SIZE_BYTES)) { // Process multiple bytes at a time
+            Xor(write, rot_key, SIZE_BYTES);
+        }
+        Xor(write, rot_key, write.size());
+    }
+
+    template <typename Stream>
+    void Serialize(Stream& s) const
+    {
+        std::vector<std::byte> bytes(SIZE_BYTES);
+        std::memcpy(bytes.data(), &rotations[0], SIZE_BYTES);
+        s << bytes;
+    }
+
+    template <typename Stream>
+    void Unserialize(Stream& s)
+    {
+        std::vector<std::byte> bytes(SIZE_BYTES);
+        s >> bytes;
+        SetRotations(ToUint64(bytes));
+    }
+};
+
+#endif // BITCOIN_OBFUSCATION_H
diff --git a/src/streams.cpp b/src/streams.cpp
index 3fc58e260c9..bec505ebc3a 100644
--- a/src/streams.cpp
+++ b/src/streams.cpp
@@ -9,8 +9,7 @@
 
 #include <array>
 
-AutoFile::AutoFile(std::FILE* file, std::vector<std::byte> data_xor)
-    : m_file{file}, m_xor{std::move(data_xor)}
+AutoFile::AutoFile(std::FILE* file, const Obfuscation& obfuscation) : m_file{file}, m_obfuscation{obfuscation}
 {
     if (!IsNull()) {
         auto pos{std::ftell(m_file)};
@@ -21,12 +20,12 @@ AutoFile::AutoFile(std::FILE* file, std::vector<std::byte> data_xor)
 std::size_t AutoFile::detail_fread(Span<std::byte> dst)
 {
     if (!m_file) throw std::ios_base::failure("AutoFile::read: file handle is nullptr");
-    size_t ret = std::fread(dst.data(), 1, dst.size(), m_file);
-    if (!m_xor.empty()) {
-        if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::read: position unknown");
-        util::Xor(dst.subspan(0, ret), m_xor, *m_position);
+    const size_t ret = std::fread(dst.data(), 1, dst.size(), m_file);
+    if (m_obfuscation) {
+        if (!m_position) throw std::ios_base::failure("AutoFile::read: position unknown");
+        m_obfuscation(dst, *m_position);
     }
-    if (m_position.has_value()) *m_position += ret;
+    if (m_position) *m_position += ret;
     return ret;
 }
 
@@ -81,7 +80,7 @@ void AutoFile::ignore(size_t nSize)
 void AutoFile::write(Span<const std::byte> src)
 {
     if (!m_file) throw std::ios_base::failure("AutoFile::write: file handle is nullptr");
-    if (m_xor.empty()) {
+    if (!m_obfuscation) {
         if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
             throw std::ios_base::failure("AutoFile::write: write failed");
         }
@@ -91,8 +90,8 @@ void AutoFile::write(Span<const std::byte> src)
         std::array<std::byte, 4096> buf;
         while (src.size() > 0) {
             auto buf_now{Span{buf}.first(std::min<size_t>(src.size(), buf.size()))};
-            std::copy(src.begin(), src.begin() + buf_now.size(), buf_now.begin());
-            util::Xor(buf_now, m_xor, *m_position);
+            std::copy_n(src.begin(), buf_now.size(), buf_now.begin());
+            m_obfuscation(buf_now, *m_position);
             if (std::fwrite(buf_now.data(), 1, buf_now.size(), m_file) != buf_now.size()) {
                 throw std::ios_base::failure{"XorFile::write: failed"};
             }
@@ -105,7 +104,7 @@ void AutoFile::write(Span<const std::byte> src)
 void AutoFile::write_large(Span<std::byte> src)
 {
     if (!m_file) throw std::ios_base::failure("AutoFile::write_large: file handle is nullptr");
-    util::Xor(src, m_xor, *m_position); // obfuscate in-place
+    m_obfuscation(src, *m_position); // obfuscate in-place
     if (std::fwrite(src.data(), 1, src.size(), m_file) != src.size()) {
         throw std::ios_base::failure("AutoFile::write_large: write failed");
     }
diff --git a/src/streams.h b/src/streams.h
index 90cceed651b..e1b324b52d8 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -6,6 +6,7 @@
 #ifndef BITCOIN_STREAMS_H
 #define BITCOIN_STREAMS_H
 
+#include <obfuscation.h>
 #include <serialize.h>
 #include <span.h>
 #include <support/allocators/zeroafterfree.h>
@@ -21,30 +22,8 @@
 #include <stdint.h>
 #include <string.h>
 #include <string>
-#include <utility>
 #include <vector>
 
-namespace util {
-inline void Xor(Span<std::byte> write, Span<const std::byte> key, size_t key_offset = 0)
-{
-    if (key.size() == 0) {
-        return;
-    }
-    key_offset %= key.size();
-
-    for (size_t i = 0, j = key_offset; i != write.size(); i++) {
-        write[i] ^= key[j++];
-
-        // This potentially acts on very many bytes of data, so it's
-        // important that we calculate `j`, i.e. the `key` index in this
-        // way instead of doing a %, which would effectively be a division
-        // for each byte Xor'd -- much slower than need be.
-        if (j == key.size())
-            j = 0;
-    }
-}
-} // namespace util
-
 /* Minimal stream for overwriting and/or appending to an existing byte vector
  *
  * The referenced vector will grow as necessary
@@ -262,21 +241,16 @@ public:
         return (*this);
     }
 
-    template<typename T>
+    template <typename T>
     DataStream& operator>>(T&& obj)
     {
         ::Unserialize(*this, obj);
         return (*this);
     }
 
-    /**
-     * XOR the contents of this stream with a certain key.
-     *
-     * @param[in] key    The key used to XOR the data in this stream.
-     */
-    void Xor(const std::vector<unsigned char>& key)
+    void Obfuscate(const Obfuscation& obfuscation)
     {
-        util::Xor(MakeWritableByteSpan(*this), MakeByteSpan(key));
+        if (obfuscation) obfuscation(MakeWritableByteSpan(*this));
     }
 
     /** Compute total memory usage of this object (own memory + any dynamic memory). */
@@ -393,11 +367,11 @@ class AutoFile
 {
 protected:
     std::FILE* m_file;
-    std::vector<std::byte> m_xor;
+    Obfuscation m_obfuscation;
     std::optional<int64_t> m_position;
 
 public:
-    explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={});
+    explicit AutoFile(std::FILE* file, const Obfuscation& obfuscation = 0);
 
     ~AutoFile() { fclose(); }
 
@@ -429,7 +403,7 @@ public:
     bool IsNull() const { return m_file == nullptr; }
 
     /** Continue with a different XOR key */
-    void SetXor(std::vector<std::byte> data_xor) { m_xor = data_xor; }
+    void SetObfuscation(const Obfuscation& obfuscation) { m_obfuscation = obfuscation; }
 
     /** Implementation detail, only used internally. */
     std::size_t detail_fread(Span<std::byte> dst);
diff --git a/src/test/dbwrapper_tests.cpp b/src/test/dbwrapper_tests.cpp
index 3a86036327c..7d9135baf04 100644
--- a/src/test/dbwrapper_tests.cpp
+++ b/src/test/dbwrapper_tests.cpp
@@ -14,16 +14,6 @@
 
 using util::ToString;
 
-// Test if a string consists entirely of null characters
-static bool is_null_key(const std::vector<unsigned char>& key) {
-    bool isnull = true;
-
-    for (unsigned int i = 0; i < key.size(); i++)
-        isnull &= (key[i] == '\x00');
-
-    return isnull;
-}
-
 BOOST_FIXTURE_TEST_SUITE(dbwrapper_tests, BasicTestingSetup)
 
 BOOST_AUTO_TEST_CASE(dbwrapper)
@@ -37,7 +27,7 @@ BOOST_AUTO_TEST_CASE(dbwrapper)
         uint256 res;
 
         // Ensure that we're doing real obfuscation when obfuscate=true
-        BOOST_CHECK(obfuscate != is_null_key(dbwrapper_private::GetObfuscateKey(dbw)));
+        BOOST_CHECK(obfuscate == dbwrapper_private::GetObfuscation(dbw));
 
         BOOST_CHECK(dbw.Write(key, in));
         BOOST_CHECK(dbw.Read(key, res));
@@ -57,7 +47,7 @@ BOOST_AUTO_TEST_CASE(dbwrapper_basic_data)
         bool res_bool;
 
         // Ensure that we're doing real obfuscation when obfuscate=true
-        BOOST_CHECK(obfuscate != is_null_key(dbwrapper_private::GetObfuscateKey(dbw)));
+        BOOST_CHECK(obfuscate == dbwrapper_private::GetObfuscation(dbw));
 
         //Simulate block raw data - "b + block hash"
         std::string key_block = "b" + m_rng.rand256().ToString();
@@ -232,7 +222,7 @@ BOOST_AUTO_TEST_CASE(existing_data_no_obfuscate)
     BOOST_CHECK_EQUAL(res2.ToString(), in.ToString());
 
     BOOST_CHECK(!odbw.IsEmpty()); // There should be existing data
-    BOOST_CHECK(is_null_key(dbwrapper_private::GetObfuscateKey(odbw))); // The key should be an empty string
+    BOOST_CHECK(!dbwrapper_private::GetObfuscation(odbw));
 
     uint256 in2 = m_rng.rand256();
     uint256 res3;
@@ -269,7 +259,7 @@ BOOST_AUTO_TEST_CASE(existing_data_reindex)
     // Check that the key/val we wrote with unobfuscated wrapper doesn't exist
     uint256 res2;
     BOOST_CHECK(!odbw.Read(key, res2));
-    BOOST_CHECK(!is_null_key(dbwrapper_private::GetObfuscateKey(odbw)));
+    BOOST_CHECK(dbwrapper_private::GetObfuscation(odbw));
 
     uint256 in2 = m_rng.rand256();
     uint256 res3;
diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp
index 81761c7bf96..c76899cda82 100644
--- a/src/test/fuzz/autofile.cpp
+++ b/src/test/fuzz/autofile.cpp
@@ -20,7 +20,7 @@ FUZZ_TARGET(autofile)
     FuzzedFileProvider fuzzed_file_provider{fuzzed_data_provider};
     AutoFile auto_file{
         fuzzed_file_provider.open(),
-        ConsumeRandomLengthByteVector<std::byte>(fuzzed_data_provider),
+        fuzzed_data_provider.ConsumeIntegral<uint64_t>()
     };
     LIMITED_WHILE(fuzzed_data_provider.ConsumeBool(), 100)
     {
diff --git a/src/test/fuzz/buffered_file.cpp b/src/test/fuzz/buffered_file.cpp
index a6a042a25cb..d8c09f88ed8 100644
--- a/src/test/fuzz/buffered_file.cpp
+++ b/src/test/fuzz/buffered_file.cpp
@@ -22,7 +22,7 @@ FUZZ_TARGET(buffered_file)
     std::optional<BufferedFile> opt_buffered_file;
     AutoFile fuzzed_file{
         fuzzed_file_provider.open(),
-        ConsumeRandomLengthByteVector<std::byte>(fuzzed_data_provider),
+        fuzzed_data_provider.ConsumeIntegral<uint64_t>()
     };
     try {
         auto n_buf_size = fuzzed_data_provider.ConsumeIntegralInRange<uint64_t>(0, 4096);
diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp
index 982c7d9d95e..fd19f81409b 100644
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -14,34 +14,52 @@ using namespace std::string_literals;
 
 BOOST_FIXTURE_TEST_SUITE(streams_tests, BasicTestingSetup)
 
-BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks)
+BOOST_AUTO_TEST_CASE(obfuscation_constructors)
 {
-    auto apply_random_xor_chunks{[](std::span<std::byte> write, const std::span<std::byte> key, FastRandomContext& rng) {
-        for (size_t offset{0}; offset < write.size();) {
-            const size_t chunk_size{1 + rng.randrange(write.size() - offset)};
-            util::Xor(write.subspan(offset, chunk_size), key, offset);
-            offset += chunk_size;
-        }
-    }};
+    constexpr uint64_t test_key = 0x0123456789ABCDEF;
 
-    FastRandomContext rng{/*fDeterministic=*/false};
-    for (size_t test{0}; test < 100; ++test) {
-        const size_t write_size{1 + rng.randrange(100U)};
-        const std::vector original{rng.randbytes<std::byte>(write_size)};
-        std::vector roundtrip{original};
+    // Direct uint64_t constructor
+    const Obfuscation obf1{test_key};
+    BOOST_CHECK_EQUAL(obf1.Key(), test_key);
 
-        std::vector key_bytes{rng.randbytes<std::byte>(sizeof(uint64_t))};
-        uint64_t key;
-        std::memcpy(&key, key_bytes.data(), sizeof key);
+    // Span constructor
+    std::array<std::byte, Obfuscation::SIZE_BYTES> key_bytes{};
+    std::memcpy(key_bytes.data(), &test_key, Obfuscation::SIZE_BYTES);
+    const Obfuscation obf2{Span{key_bytes}};
+    BOOST_CHECK_EQUAL(obf2.Key(), test_key);
 
-        apply_random_xor_chunks(roundtrip, key_bytes, rng);
+    // std::array<std:byte> constructor
+    const Obfuscation obf3{key_bytes};
+    BOOST_CHECK_EQUAL(obf3.Key(), test_key);
 
-        const bool all_zero = (key == 0) || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2);
-        BOOST_CHECK_EQUAL(original != roundtrip, !all_zero);
+    // std::vector<uint8_t> constructor
+    std::vector<uint8_t> uchar_key(Obfuscation::SIZE_BYTES);
+    std::memcpy(uchar_key.data(), &test_key, uchar_key.size());
+    const Obfuscation obf4{uchar_key};
+    BOOST_CHECK_EQUAL(obf4.Key(), test_key);
+}
 
-        apply_random_xor_chunks(roundtrip, key_bytes, rng);
-        BOOST_CHECK(original == roundtrip);
-    }
+BOOST_AUTO_TEST_CASE(obfuscation_serialize)
+{
+    const Obfuscation original{0xDEADBEEF};
+
+    // Serialize
+    DataStream ds;
+    ds << original;
+
+    BOOST_CHECK_EQUAL(ds.size(), 1 + Obfuscation::SIZE_BYTES); // serialized as a vector
+
+    // Deserialize
+    Obfuscation recovered{0};
+    ds >> recovered;
+
+    BOOST_CHECK_EQUAL(recovered.Key(), original.Key());
+}
+
+BOOST_AUTO_TEST_CASE(obfuscation_empty)
+{
+    const Obfuscation null_obf{0};
+    BOOST_CHECK(!null_obf);
 }
 
 BOOST_AUTO_TEST_CASE(xor_bytes_reference)
@@ -57,33 +75,60 @@ BOOST_AUTO_TEST_CASE(xor_bytes_reference)
         const size_t write_size{1 + rng.randrange(100U)};
         const size_t key_offset{rng.randrange(3 * 8U)}; // Should wrap around
 
-        std::vector key_bytes{rng.randbytes<std::byte>(sizeof(uint64_t))};
-        uint64_t key;
-        std::memcpy(&key, key_bytes.data(), sizeof key);
 
+        const auto key_bytes{rng.randbytes<std::byte>(Obfuscation::SIZE_BYTES)};
+        const Obfuscation obfuscation{key_bytes};
         std::vector expected{rng.randbytes<std::byte>(write_size)};
         std::vector actual{expected};
 
         expected_xor(expected, key_bytes, key_offset);
-        util::Xor(actual, key_bytes, key_offset);
+        obfuscation(actual, key_offset);
 
         BOOST_CHECK_EQUAL_COLLECTIONS(expected.begin(), expected.end(), actual.begin(), actual.end());
     }
 }
 
+BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks)
+{
+    auto apply_random_xor_chunks{[](std::span<std::byte> write, const Obfuscation& obfuscation, FastRandomContext& rng) {
+        for (size_t offset{0}; offset < write.size();) {
+            const size_t chunk_size{1 + rng.randrange(write.size() - offset)};
+            obfuscation(write.subspan(offset, chunk_size), offset);
+            offset += chunk_size;
+        }
+    }};
+
+    FastRandomContext rng{/*fDeterministic=*/false};
+    for (size_t test{0}; test < 100; ++test) {
+        const size_t write_size{1 + rng.randrange(100U)};
+        const std::vector original{rng.randbytes<std::byte>(write_size)};
+        std::vector roundtrip{original};
+
+        const auto key_bytes{rng.randbytes<std::byte>(Obfuscation::SIZE_BYTES)};
+        const Obfuscation obfuscation{key_bytes};
+        apply_random_xor_chunks(roundtrip, obfuscation, rng);
+
+        const bool all_zero = !obfuscation || (HexStr(key_bytes).find_first_not_of('0') >= write_size * 2);
+        BOOST_CHECK_EQUAL(original != roundtrip, !all_zero);
+
+        apply_random_xor_chunks(roundtrip, obfuscation, rng);
+        BOOST_CHECK(original == roundtrip);
+    }
+}
+
 BOOST_AUTO_TEST_CASE(xor_file)
 {
     fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"};
     auto raw_file{[&](const auto& mode) { return fsbridge::fopen(xor_path, mode); }};
     const std::vector<uint8_t> test1{1, 2, 3};
     const std::vector<uint8_t> test2{4, 5};
-    const std::vector xor_pat{std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}};
+    constexpr std::array xor_pat{std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}, std::byte{0xff}, std::byte{0x00}};
     uint64_t xor_key;
     std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
 
     {
         // Check errors for missing file
-        AutoFile xor_file{raw_file("rb"), xor_pat};
+        AutoFile xor_file{raw_file("rb"), xor_key};
         BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullpt"});
         BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullpt"});
         BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullpt"});
@@ -95,7 +140,7 @@ BOOST_AUTO_TEST_CASE(xor_file)
 #else
         const char* mode = "wbx";
 #endif
-        AutoFile xor_file{raw_file(mode), xor_pat};
+        AutoFile xor_file{raw_file(mode), xor_key};
         xor_file << test1 << test2;
     }
     {
@@ -108,7 +153,7 @@ BOOST_AUTO_TEST_CASE(xor_file)
         BOOST_CHECK_EXCEPTION(non_xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: end of file"});
     }
     {
-        AutoFile xor_file{raw_file("rb"), xor_pat};
+        AutoFile xor_file{raw_file("rb"), xor_key};
         std::vector<std::byte> read1, read2;
         xor_file >> read1 >> read2;
         BOOST_CHECK_EQUAL(HexStr(read1), HexStr(test1));
@@ -117,7 +162,7 @@ BOOST_AUTO_TEST_CASE(xor_file)
         BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: end of file"});
     }
     {
-        AutoFile xor_file{raw_file("rb"), xor_pat};
+        AutoFile xor_file{raw_file("rb"), xor_key};
         std::vector<std::byte> read2;
         // Check that ignore works
         xor_file.ignore(4);
@@ -285,7 +330,7 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
     // Degenerate case
     {
         DataStream ds{in};
-        ds.Xor({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00});
+        Obfuscation{0}(ds);
         BOOST_CHECK_EQUAL(""s, ds.str());
     }
 
@@ -293,12 +338,10 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
     in.push_back(std::byte{0xf0});
 
     {
-        const std::vector xor_pat{std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}};
-        uint64_t xor_key;
-        std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
+        const Obfuscation obfuscation{{std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}, std::byte{0xff}}};
 
         DataStream ds{in};
-        ds.Xor({0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff});
+        obfuscation(ds);
         BOOST_CHECK_EQUAL("\xf0\x0f"s, ds.str());
     }
 
@@ -307,12 +350,10 @@ BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
     in.push_back(std::byte{0x0f});
 
     {
-        const std::vector xor_pat{std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}};
-        uint64_t xor_key;
-        std::memcpy(&xor_key, xor_pat.data(), sizeof xor_key);
+        const Obfuscation obfuscation{{std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}, std::byte{0xff}, std::byte{0x0f}}};
 
         DataStream ds{in};
-        ds.Xor({0xff, 0x0f, 0xff, 0x0f, 0xff, 0x0f, 0xff, 0x0f});
+        obfuscation(ds);
         BOOST_CHECK_EQUAL("\x0f\x00"s, ds.str());
     }
 }

From 99b2c2a862b49e15139ce5e874ca15f12ce6f681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Tue, 19 Nov 2024 16:11:56 +0100
Subject: [PATCH 08/24] bench: measure block (size)serialization speed

The SizeComputer is a special serializer which returns what the exact final size will be of the serialized content.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock|DeserializeBlock' --min-time=10000

> C compiler ............................ AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          936,285.45 |            1,068.05 |    0.1% |     11.01 | `DeserializeBlock`
|          194,330.04 |            5,145.88 |    0.2% |     10.97 | `SerializeBlock`
|           12,215.05 |           81,866.19 |    0.0% |     11.00 | `SizeComputerBlock`

> C++ compiler .......................... GNU 13.3.0

|            ns/block |             block/s |    err% |       ins/block |       cyc/block |    IPC |      bra/block |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        4,447,243.87 |              224.86 |    0.0% |   53,689,737.58 |   15,966,336.86 |  3.363 |   2,409,315.46 |    0.5% |     11.01 | `DeserializeBlock`
|          869,833.14 |            1,149.65 |    0.0% |    8,015,883.90 |    3,123,013.80 |  2.567 |   1,517,035.87 |    0.5% |     10.81 | `SerializeBlock`
|           26,535.51 |           37,685.36 |    0.0% |      225,261.03 |       95,278.40 |  2.364 |      53,037.03 |    0.6% |     11.00 | `SizeComputerBlock`
---
 src/bench/checkblock.cpp | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp
index 9558d64f199..ba26457b31a 100644
--- a/src/bench/checkblock.cpp
+++ b/src/bench/checkblock.cpp
@@ -21,11 +21,34 @@
 #include <optional>
 #include <vector>
 
+static void SizeComputerBlock(benchmark::Bench& bench) {
+    CBlock block;
+    DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block);
+
+    bench.unit("block").run([&] {
+        SizeComputer size_computer;
+        size_computer << TX_WITH_WITNESS(block);
+        assert(size_computer.size() == benchmark::data::block413567.size());
+    });
+}
+
+static void SerializeBlock(benchmark::Bench& bench) {
+    CBlock block;
+    DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block);
+
+    // Create output stream and verify first serialization matches input
+    bench.unit("block").run([&] {
+        DataStream output_stream(benchmark::data::block413567.size());
+        output_stream << TX_WITH_WITNESS(block);
+        assert(output_stream.size() == benchmark::data::block413567.size());
+    });
+}
+
 // These are the two major time-sinks which happen after we have fully received
 // a block off the wire, but before we can relay the block on to peers using
 // compact block relay.
 
-static void DeserializeBlockTest(benchmark::Bench& bench)
+static void DeserializeBlock(benchmark::Bench& bench)
 {
     DataStream stream(benchmark::data::block413567);
     std::byte a{0};
@@ -39,7 +62,7 @@ static void DeserializeBlockTest(benchmark::Bench& bench)
     });
 }
 
-static void DeserializeAndCheckBlockTest(benchmark::Bench& bench)
+static void DeserializeAndCheckBlock(benchmark::Bench& bench)
 {
     DataStream stream(benchmark::data::block413567);
     std::byte a{0};
@@ -60,5 +83,7 @@ static void DeserializeAndCheckBlockTest(benchmark::Bench& bench)
     });
 }
 
-BENCHMARK(DeserializeBlockTest, benchmark::PriorityLevel::HIGH);
-BENCHMARK(DeserializeAndCheckBlockTest, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SizeComputerBlock, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SerializeBlock, benchmark::PriorityLevel::HIGH);
+BENCHMARK(DeserializeBlock, benchmark::PriorityLevel::HIGH);
+BENCHMARK(DeserializeAndCheckBlock, benchmark::PriorityLevel::HIGH);

From 794180e8f8e1347f0fe54e23b34073eae904fd9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 17 Jan 2025 14:29:33 +0100
Subject: [PATCH 09/24] refactor: reduce template bloat in primitive
 serialization

Merged multiple template methods into single constexpr-delimited implementation to reduce template bloat (i.e. related functionality is grouped into a single method, but can be optimized because of C++20 constexpr conditions).
This unifies related methods that were only bound before by similar signatures - and enables `SizeComputer` optimizations later
---
 src/serialize.h | 53 +++++++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/src/serialize.h b/src/serialize.h
index 6384c95dba3..387f294223f 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -252,38 +252,47 @@ const Out& AsBase(const In& x)
 template<class T>
 concept CharNotInt8 = std::same_as<T, char> && !std::same_as<T, int8_t>;
 
+template <typename T>
+concept ByteOrIntegral = std::is_same_v<T, std::byte> ||
+    (std::is_integral_v<T> && !std::is_same_v<T, char>);
+
 template <typename Stream, CharNotInt8 V> void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t
-template <typename Stream> void Serialize(Stream& s, std::byte a) { ser_writedata8(s, uint8_t(a)); }
-template<typename Stream> inline void Serialize(Stream& s, int8_t a  ) { ser_writedata8(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, uint8_t a ) { ser_writedata8(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, int16_t a ) { ser_writedata16(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, uint16_t a) { ser_writedata16(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, int32_t a ) { ser_writedata32(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, int64_t a ) { ser_writedata64(s, a); }
-template<typename Stream> inline void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); }
+template <typename Stream, ByteOrIntegral T> void Serialize(Stream& s, T a)
+{
+    if constexpr (sizeof(T) == 1) {
+        ser_writedata8(s, static_cast<uint8_t>(a));   // (u)int8_t or std::byte or bool
+    } else if constexpr (sizeof(T) == 2) {
+        ser_writedata16(s, static_cast<uint16_t>(a)); // (u)int16_t
+    } else if constexpr (sizeof(T) == 4) {
+        ser_writedata32(s, static_cast<uint32_t>(a)); // (u)int32_t
+    } else {
+        static_assert(sizeof(T) == 8);
+        ser_writedata64(s, static_cast<uint64_t>(a)); // (u)int64_t
+    }
+}
 template <typename Stream, BasicByte B, int N> void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); }
 template <typename Stream, BasicByte B, std::size_t N> void Serialize(Stream& s, const std::array<B, N>& a) { s.write(MakeByteSpan(a)); }
 template <typename Stream, BasicByte B, std::size_t N> void Serialize(Stream& s, std::span<B, N> span) { s.write(std::as_bytes(span)); }
 template <typename Stream, BasicByte B> void Serialize(Stream& s, Span<B> span) { s.write(AsBytes(span)); }
 
 template <typename Stream, CharNotInt8 V> void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t
-template <typename Stream> void Unserialize(Stream& s, std::byte& a) { a = std::byte{ser_readdata8(s)}; }
-template<typename Stream> inline void Unserialize(Stream& s, int8_t& a  ) { a = ser_readdata8(s); }
-template<typename Stream> inline void Unserialize(Stream& s, uint8_t& a ) { a = ser_readdata8(s); }
-template<typename Stream> inline void Unserialize(Stream& s, int16_t& a ) { a = ser_readdata16(s); }
-template<typename Stream> inline void Unserialize(Stream& s, uint16_t& a) { a = ser_readdata16(s); }
-template<typename Stream> inline void Unserialize(Stream& s, int32_t& a ) { a = ser_readdata32(s); }
-template<typename Stream> inline void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); }
-template<typename Stream> inline void Unserialize(Stream& s, int64_t& a ) { a = ser_readdata64(s); }
-template<typename Stream> inline void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); }
+template <typename Stream, ByteOrIntegral T> void Unserialize(Stream& s, T& a)
+{
+    if constexpr (sizeof(T) == 1) {
+        a = static_cast<T>(ser_readdata8(s));  // (u)int8_t or std::byte or bool
+    } else if constexpr (sizeof(T) == 2) {
+        a = static_cast<T>(ser_readdata16(s)); // (u)int16_t
+    } else if constexpr (sizeof(T) == 4) {
+        a = static_cast<T>(ser_readdata32(s)); // (u)int32_t
+    } else {
+        static_assert(sizeof(T) == 8);
+        a = static_cast<T>(ser_readdata64(s)); // (u)int64_t
+    }
+}
 template <typename Stream, BasicByte B, int N> void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); }
 template <typename Stream, BasicByte B, std::size_t N> void Unserialize(Stream& s, std::array<B, N>& a) { s.read(MakeWritableByteSpan(a)); }
 template <typename Stream, BasicByte B, std::size_t N> void Unserialize(Stream& s, std::span<B, N> span) { s.read(std::as_writable_bytes(span)); }
 template <typename Stream, BasicByte B> void Unserialize(Stream& s, Span<B> span) { s.read(AsWritableBytes(span)); }
-
-template <typename Stream> inline void Serialize(Stream& s, bool a) { uint8_t f = a; ser_writedata8(s, f); }
-template <typename Stream> inline void Unserialize(Stream& s, bool& a) { uint8_t f = ser_readdata8(s); a = f; }
 // clang-format on
 
 
@@ -489,7 +498,7 @@ public:
  * serialization, and Unser(stream, object&) for deserialization. Serialization routines (inside
  * READWRITE, or directly with << and >> operators), can then use Using<Formatter>(object).
  *
- * This works by constructing a Wrapper<Formatter, T>-wrapped version of object, where T is
+ * This works by constructing a Wrapper<Formatter, T&>-wrapped version of object, where T is
  * const during serialization, and non-const during deserialization, which maintains const
  * correctness.
  */

From 028c00654195bfde7b86f5634eaa36f997d79f14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Thu, 13 Feb 2025 22:49:51 +0100
Subject: [PATCH 10/24] cleanup: remove unused `ser_writedata16be` and
 `ser_readdata16be`

---
 src/serialize.h           | 11 -----------
 src/test/fuzz/integer.cpp |  4 ----
 2 files changed, 15 deletions(-)

diff --git a/src/serialize.h b/src/serialize.h
index 387f294223f..fa85663ad31 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -60,11 +60,6 @@ template<typename Stream> inline void ser_writedata16(Stream &s, uint16_t obj)
     obj = htole16_internal(obj);
     s.write(AsBytes(Span{&obj, 1}));
 }
-template<typename Stream> inline void ser_writedata16be(Stream &s, uint16_t obj)
-{
-    obj = htobe16_internal(obj);
-    s.write(AsBytes(Span{&obj, 1}));
-}
 template<typename Stream> inline void ser_writedata32(Stream &s, uint32_t obj)
 {
     obj = htole32_internal(obj);
@@ -92,12 +87,6 @@ template<typename Stream> inline uint16_t ser_readdata16(Stream &s)
     s.read(AsWritableBytes(Span{&obj, 1}));
     return le16toh_internal(obj);
 }
-template<typename Stream> inline uint16_t ser_readdata16be(Stream &s)
-{
-    uint16_t obj;
-    s.read(AsWritableBytes(Span{&obj, 1}));
-    return be16toh_internal(obj);
-}
 template<typename Stream> inline uint32_t ser_readdata32(Stream &s)
 {
     uint32_t obj;
diff --git a/src/test/fuzz/integer.cpp b/src/test/fuzz/integer.cpp
index b9e3154106f..2e51eac3078 100644
--- a/src/test/fuzz/integer.cpp
+++ b/src/test/fuzz/integer.cpp
@@ -236,10 +236,6 @@ FUZZ_TARGET(integer, .init = initialize_integer)
         const uint16_t deserialized_u16 = ser_readdata16(stream);
         assert(u16 == deserialized_u16 && stream.empty());
 
-        ser_writedata16be(stream, u16);
-        const uint16_t deserialized_u16be = ser_readdata16be(stream);
-        assert(u16 == deserialized_u16be && stream.empty());
-
         ser_writedata8(stream, u8);
         const uint8_t deserialized_u8 = ser_readdata8(stream);
         assert(u8 == deserialized_u8 && stream.empty());

From c02600b8e1d4f34185db53039de7a4c10d944c9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 14 Feb 2025 00:53:11 +0100
Subject: [PATCH 11/24] optimization: Add single byte write

Single byte writes are used very often (used for every (u)int8_t or std::byte or bool and for every VarInt's first byte which is also needed for every (pre)Vector).
It makes sense to avoid the generalized serialization infrastructure that isn't needed:
* AutoFile write doesn't need to allocate 4k buffer for a single byte now;
* `VectorWriter` and `DataStream` avoids memcpy/insert calls.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock|DeserializeBlock' --min-time=10000

> C compiler ............................ AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          934,120.45 |            1,070.53 |    0.2% |     11.01 | `DeserializeBlock`
|          170,719.27 |            5,857.57 |    0.1% |     10.99 | `SerializeBlock`
|           12,048.40 |           82,998.58 |    0.2% |     11.01 | `SizeComputerBlock`

> C++ compiler .......................... GNU 13.3.0

|            ns/block |             block/s |    err% |       ins/block |       cyc/block |    IPC |      bra/block |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        4,433,835.04 |              225.54 |    0.0% |   53,688,481.60 |   15,918,730.23 |  3.373 |   2,409,056.47 |    0.5% |     11.01 | `DeserializeBlock`
|          563,663.10 |            1,774.11 |    0.0% |    7,386,775.59 |    2,023,525.77 |  3.650 |   1,385,368.57 |    0.5% |     11.00 | `SerializeBlock`
|           27,351.60 |           36,560.93 |    0.1% |      225,261.03 |       98,209.77 |  2.294 |      53,037.03 |    0.9% |     11.00 | `SizeComputerBlock`
---
 src/hash.h      |  9 +++++++++
 src/serialize.h |  7 ++++++-
 src/streams.cpp | 18 ++++++++++++++++++
 src/streams.h   | 16 ++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/hash.h b/src/hash.h
index 52babf8b1d9..5e9a2bd648e 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -107,6 +107,10 @@ public:
     {
         ctx.Write(UCharCast(src.data()), src.size());
     }
+    void write(std::byte src)
+    {
+        ctx.Write(UCharCast(&src), 1);
+    }
 
     /** Compute the double-SHA256 hash of all data written to this object.
      *
@@ -194,6 +198,11 @@ public:
         m_source.write(src);
         HashWriter::write(src);
     }
+    void write(std::byte src)
+    {
+        m_source.write(src);
+        HashWriter::write(src);
+    }
 
     template <typename T>
     HashedSourceWriter& operator<<(const T& obj)
diff --git a/src/serialize.h b/src/serialize.h
index fa85663ad31..0b75a1e3a44 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -53,7 +53,7 @@ constexpr deserialize_type deserialize {};
  */
 template<typename Stream> inline void ser_writedata8(Stream &s, uint8_t obj)
 {
-    s.write(AsBytes(Span{&obj, 1}));
+    s.write(std::byte{obj});
 }
 template<typename Stream> inline void ser_writedata16(Stream &s, uint16_t obj)
 {
@@ -1067,6 +1067,10 @@ public:
     {
         this->nSize += src.size();
     }
+    void write(std::byte)
+    {
+        this->nSize += 1;
+    }
 
     /** Pretend _nSize bytes are written, without specifying them. */
     void seek(size_t _nSize)
@@ -1131,6 +1135,7 @@ public:
     template <typename U> ParamsStream& operator<<(const U& obj) { ::Serialize(*this, obj); return *this; }
     template <typename U> ParamsStream& operator>>(U&& obj) { ::Unserialize(*this, obj); return *this; }
     void write(Span<const std::byte> src) { GetStream().write(src); }
+    void write(std::byte src) { GetStream().write(src); }
     void read(Span<std::byte> dst) { GetStream().read(dst); }
     void ignore(size_t num) { GetStream().ignore(num); }
     bool eof() const { return GetStream().eof(); }
diff --git a/src/streams.cpp b/src/streams.cpp
index bec505ebc3a..51b8c4aa087 100644
--- a/src/streams.cpp
+++ b/src/streams.cpp
@@ -111,6 +111,24 @@ void AutoFile::write_large(Span<std::byte> src)
     if (m_position) *m_position += src.size();
 }
 
+void AutoFile::write(std::byte val)
+{
+    if (!m_file) throw std::ios_base::failure("AutoFile::write: file handle is nullptr");
+    if (!m_obfuscation) {
+        if (fwrite(&val, 1, 1, m_file) != 1) {
+            throw std::ios_base::failure("AutoFile::write: write failed");
+        }
+        if (m_position.has_value()) *m_position += 1;
+    } else {
+        if (!m_position.has_value()) throw std::ios_base::failure("AutoFile::write: position unknown");
+        auto src{Span{&val, 1}};
+        m_obfuscation(src, *m_position);
+        if (fwrite(src.data(), 1, 1, m_file) != 1) {
+            throw std::ios_base::failure{"XorFile::write: failed"};
+        }
+        *m_position += 1;
+    }
+}
 bool AutoFile::Commit()
 {
     return ::FileCommit(m_file);
diff --git a/src/streams.h b/src/streams.h
index e1b324b52d8..b4a17702b9a 100644
--- a/src/streams.h
+++ b/src/streams.h
@@ -62,6 +62,16 @@ public:
         }
         nPos += src.size();
     }
+    void write(std::byte val)
+    {
+        assert(nPos <= vchData.size());
+        if (nPos < vchData.size()) {
+            vchData[nPos] = static_cast<unsigned char>(val);
+        } else {
+            vchData.push_back(static_cast<unsigned char>(val));
+        }
+        nPos += 1;
+    }
     template <typename T>
     VectorWriter& operator<<(const T& obj)
     {
@@ -233,6 +243,11 @@ public:
         // Write to the end of the buffer
         vch.insert(vch.end(), src.begin(), src.end());
     }
+    void write(value_type val)
+    {
+        // Push single value to the end of the buffer
+        vch.push_back(val);
+    }
 
     template<typename T>
     DataStream& operator<<(const T& obj)
@@ -427,6 +442,7 @@ public:
     void ignore(size_t nSize);
     void write(Span<const std::byte> src);
     void write_large(Span<std::byte> src);  // Note that src will be mutated
+    void write(std::byte src);
 
     template <typename T>
     AutoFile& operator<<(const T& obj)

From c0724983059dc4a7d6f268456bd91110cdec0aa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 14 Feb 2025 13:54:57 +0100
Subject: [PATCH 12/24] optimization: merge SizeComputer specializations + add
 new ones

Endianness doesn't affect the final size, we can skip it for `SizeComputer`.
We can `if constexpr` previous calls into existing method, short-circuiting existing logic when we only need their serialized sizes.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock|DeserializeBlock' --min-time=10000

> C compiler ............................ AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          888,859.82 |            1,125.04 |    0.4% |     10.87 | `DeserializeBlock`
|          168,502.88 |            5,934.62 |    0.1% |     10.99 | `SerializeBlock`
|           10,200.88 |           98,030.75 |    0.1% |     11.00 | `SizeComputerBlock`

> C++ compiler .......................... GNU 13.3.0

|            ns/block |             block/s |    err% |       ins/block |       cyc/block |    IPC |      bra/block |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        4,460,428.52 |              224.19 |    0.0% |   53,692,507.13 |   16,015,347.97 |  3.353 |   2,410,105.48 |    0.5% |     11.01 | `DeserializeBlock`
|          567,042.65 |            1,763.54 |    0.0% |    7,386,775.59 |    2,035,613.84 |  3.629 |   1,385,368.57 |    0.5% |     11.01 | `SerializeBlock`
|           25,728.56 |           38,867.32 |    0.0% |      172,750.03 |       92,366.64 |  1.870 |      42,131.03 |    1.7% |     11.00 | `SizeComputerBlock`
---
 src/serialize.h | 79 +++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/src/serialize.h b/src/serialize.h
index 0b75a1e3a44..6df240436da 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -48,6 +48,16 @@ static const unsigned int MAX_VECTOR_ALLOCATE = 5000000;
 struct deserialize_type {};
 constexpr deserialize_type deserialize {};
 
+class SizeComputer;
+
+//! Check if type contains a stream by seeing if it has a GetStream() method.
+template<typename T>
+concept ContainsStream = requires(T t) { t.GetStream(); };
+
+template<typename T>
+concept ContainsSizeComputer = ContainsStream<T> &&
+    std::is_same_v<std::remove_reference_t<decltype(std::declval<T>().GetStream())>, SizeComputer>;
+
 /*
  * Lowest-level serialization and conversion.
  */
@@ -107,8 +117,6 @@ template<typename Stream> inline uint64_t ser_readdata64(Stream &s)
 }
 
 
-class SizeComputer;
-
 /**
  * Convert any argument to a reference to X, maintaining constness.
  *
@@ -248,7 +256,9 @@ concept ByteOrIntegral = std::is_same_v<T, std::byte> ||
 template <typename Stream, CharNotInt8 V> void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t
 template <typename Stream, ByteOrIntegral T> void Serialize(Stream& s, T a)
 {
-    if constexpr (sizeof(T) == 1) {
+    if constexpr (ContainsSizeComputer<Stream>) {
+        s.GetStream().seek(sizeof(T));
+    } else if constexpr (sizeof(T) == 1) {
         ser_writedata8(s, static_cast<uint8_t>(a));   // (u)int8_t or std::byte or bool
     } else if constexpr (sizeof(T) == 2) {
         ser_writedata16(s, static_cast<uint16_t>(a)); // (u)int16_t
@@ -300,12 +310,14 @@ constexpr inline unsigned int GetSizeOfCompactSize(uint64_t nSize)
     else                         return sizeof(unsigned char) + sizeof(uint64_t);
 }
 
-inline void WriteCompactSize(SizeComputer& os, uint64_t nSize);
-
 template<typename Stream>
 void WriteCompactSize(Stream& os, uint64_t nSize)
 {
-    if (nSize < 253)
+    if constexpr (ContainsSizeComputer<Stream>)
+    {
+        os.GetStream().seek(GetSizeOfCompactSize(nSize));
+    }
+    else if (nSize < 253)
     {
         ser_writedata8(os, nSize);
     }
@@ -412,7 +424,7 @@ struct CheckVarIntMode {
 };
 
 template<VarIntMode Mode, typename I>
-inline unsigned int GetSizeOfVarInt(I n)
+constexpr unsigned int GetSizeOfVarInt(I n)
 {
     CheckVarIntMode<Mode, I>();
     int nRet = 0;
@@ -425,25 +437,26 @@ inline unsigned int GetSizeOfVarInt(I n)
     return nRet;
 }
 
-template<typename I>
-inline void WriteVarInt(SizeComputer& os, I n);
-
 template<typename Stream, VarIntMode Mode, typename I>
 void WriteVarInt(Stream& os, I n)
 {
-    CheckVarIntMode<Mode, I>();
-    unsigned char tmp[(sizeof(n)*8+6)/7];
-    int len=0;
-    while(true) {
-        tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
-        if (n <= 0x7F)
-            break;
-        n = (n >> 7) - 1;
-        len++;
+    if constexpr (ContainsSizeComputer<Stream>) {
+        os.GetStream().seek(GetSizeOfVarInt<Mode, I>(n));
+    } else {
+        CheckVarIntMode<Mode, I>();
+        unsigned char tmp[(sizeof(n)*8+6)/7];
+        int len=0;
+        while(true) {
+            tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
+            if (n <= 0x7F)
+                break;
+            n = (n >> 7) - 1;
+            len++;
+        }
+        do {
+            ser_writedata8(os, tmp[len]);
+        } while(len--);
     }
-    do {
-        ser_writedata8(os, tmp[len]);
-    } while(len--);
 }
 
 template<typename Stream, VarIntMode Mode, typename I>
@@ -532,7 +545,9 @@ struct CustomUintFormatter
     template <typename Stream, typename I> void Ser(Stream& s, I v)
     {
         if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range");
-        if (BigEndian) {
+        if constexpr (ContainsSizeComputer<Stream>) {
+            s.GetStream().seek(Bytes);
+        } else if (BigEndian) {
             uint64_t raw = htobe64_internal(v);
             s.write(AsBytes(Span{&raw, 1}).last(Bytes));
         } else {
@@ -1063,6 +1078,9 @@ protected:
 public:
     SizeComputer() = default;
 
+    SizeComputer& GetStream() { return *this; }
+    const SizeComputer& GetStream() const { return *this; };
+
     void write(Span<const std::byte> src)
     {
         this->nSize += src.size();
@@ -1090,27 +1108,12 @@ public:
     }
 };
 
-template<typename I>
-inline void WriteVarInt(SizeComputer &s, I n)
-{
-    s.seek(GetSizeOfVarInt<I>(n));
-}
-
-inline void WriteCompactSize(SizeComputer &s, uint64_t nSize)
-{
-    s.seek(GetSizeOfCompactSize(nSize));
-}
-
 template <typename T>
 size_t GetSerializeSize(const T& t)
 {
     return (SizeComputer() << t).size();
 }
 
-//! Check if type contains a stream by seeing if has a GetStream() method.
-template<typename T>
-concept ContainsStream = requires(T t) { t.GetStream(); };
-
 /** Wrapper that overrides the GetParams() function of a stream. */
 template <typename SubStream, typename Params>
 class ParamsStream

From b07cdbe542c997c162b57d925c3ecf1389331613 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 18 Jan 2025 15:37:08 +0100
Subject: [PATCH 13/24] test: validate duplicate detection in
 `CheckTransaction`

The `CheckTransaction` validation function in https://github.com/bitcoin/bitcoin/blob/master/src/consensus/tx_check.cpp#L41-L45 relies on a correct ordering relation for detecting duplicate transaction inputs.

This update to the tests ensures that:
* Accurate detection of duplicates: Beyond trivial cases (e.g., two identical inputs), duplicates are detected correctly in more complex scenarios.
* Consistency across methods: Both sorted sets and hash-based sets behave identically when detecting duplicates for `COutPoint` and related values.
* Robust ordering and equality relations: The function maintains expected behavior for ordering and equality checks.

Using randomized testing with shuffled inputs (to avoid any remaining bias introduced), the enhanced test validates that `CheckTransaction` remains robust and reliable across various input configurations. It confirms identical behavior to a hashing-based duplicate detection mechanism, ensuring consistency and correctness.

To make sure the new branches in the follow-up commits will be covered, `basic_transaction_tests` was extended a randomized test one comparing against the old implementation (and also an alternative duplicate). The iterations and ranges were chosen such that every new branch is expected to be hit once.
---
 src/test/transaction_tests.cpp | 220 +++++++++++++++++++++++++++++++--
 src/test/util/setup_common.cpp |   5 +
 src/test/util/setup_common.h   |   1 +
 3 files changed, 217 insertions(+), 9 deletions(-)

diff --git a/src/test/transaction_tests.cpp b/src/test/transaction_tests.cpp
index 8beeec49915..0abd8aa0b46 100644
--- a/src/test/transaction_tests.cpp
+++ b/src/test/transaction_tests.cpp
@@ -406,20 +406,110 @@ BOOST_AUTO_TEST_CASE(tx_oversized)
     }
 }
 
-BOOST_AUTO_TEST_CASE(basic_transaction_tests)
+static CMutableTransaction CreateTransaction()
 {
-    // Random real transaction (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436)
-    unsigned char ch[] = {0x01, 0x00, 0x00, 0x00, 0x01, 0x6b, 0xff, 0x7f, 0xcd, 0x4f, 0x85, 0x65, 0xef, 0x40, 0x6d, 0xd5, 0xd6, 0x3d, 0x4f, 0xf9, 0x4f, 0x31, 0x8f, 0xe8, 0x20, 0x27, 0xfd, 0x4d, 0xc4, 0x51, 0xb0, 0x44, 0x74, 0x01, 0x9f, 0x74, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x49, 0x30, 0x46, 0x02, 0x21, 0x00, 0xda, 0x0d, 0xc6, 0xae, 0xce, 0xfe, 0x1e, 0x06, 0xef, 0xdf, 0x05, 0x77, 0x37, 0x57, 0xde, 0xb1, 0x68, 0x82, 0x09, 0x30, 0xe3, 0xb0, 0xd0, 0x3f, 0x46, 0xf5, 0xfc, 0xf1, 0x50, 0xbf, 0x99, 0x0c, 0x02, 0x21, 0x00, 0xd2, 0x5b, 0x5c, 0x87, 0x04, 0x00, 0x76, 0xe4, 0xf2, 0x53, 0xf8, 0x26, 0x2e, 0x76, 0x3e, 0x2d, 0xd5, 0x1e, 0x7f, 0xf0, 0xbe, 0x15, 0x77, 0x27, 0xc4, 0xbc, 0x42, 0x80, 0x7f, 0x17, 0xbd, 0x39, 0x01, 0x41, 0x04, 0xe6, 0xc2, 0x6e, 0xf6, 0x7d, 0xc6, 0x10, 0xd2, 0xcd, 0x19, 0x24, 0x84, 0x78, 0x9a, 0x6c, 0xf9, 0xae, 0xa9, 0x93, 0x0b, 0x94, 0x4b, 0x7e, 0x2d, 0xb5, 0x34, 0x2b, 0x9d, 0x9e, 0x5b, 0x9f, 0xf7, 0x9a, 0xff, 0x9a, 0x2e, 0xe1, 0x97, 0x8d, 0xd7, 0xfd, 0x01, 0xdf, 0xc5, 0x22, 0xee, 0x02, 0x28, 0x3d, 0x3b, 0x06, 0xa9, 0xd0, 0x3a, 0xcf, 0x80, 0x96, 0x96, 0x8d, 0x7d, 0xbb, 0x0f, 0x91, 0x78, 0xff, 0xff, 0xff, 0xff, 0x02, 0x8b, 0xa7, 0x94, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xba, 0xde, 0xec, 0xfd, 0xef, 0x05, 0x07, 0x24, 0x7f, 0xc8, 0xf7, 0x42, 0x41, 0xd7, 0x3b, 0xc0, 0x39, 0x97, 0x2d, 0x7b, 0x88, 0xac, 0x40, 0x94, 0xa8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x19, 0x76, 0xa9, 0x14, 0xc1, 0x09, 0x32, 0x48, 0x3f, 0xec, 0x93, 0xed, 0x51, 0xf5, 0xfe, 0x95, 0xe7, 0x25, 0x59, 0xf2, 0xcc, 0x70, 0x43, 0xf9, 0x88, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00};
-    std::vector<unsigned char> vch(ch, ch + sizeof(ch) -1);
-    DataStream stream(vch);
+    // Serialized random real transaction (e2769b09e784f32f62ef849763d4f45b98e07ba658647343b915ff832b110436)
+    static constexpr auto ser_tx{"01000000016bff7fcd4f8565ef406dd5d63d4ff94f318fe82027fd4dc451b04474019f74b4000000008c493046022100da0dc6aecefe1e06efdf05773757deb168820930e3b0d03f46f5fcf150bf990c022100d25b5c87040076e4f253f8262e763e2dd51e7ff0be157727c4bc42807f17bd39014104e6c26ef67dc610d2cd192484789a6cf9aea9930b944b7e2db5342b9d9e5b9ff79aff9a2ee1978dd7fd01dfc522ee02283d3b06a9d03acf8096968d7dbb0f9178ffffffff028ba7940e000000001976a914badeecfdef0507247fc8f74241d73bc039972d7b88ac4094a802000000001976a914c10932483fec93ed51f5fe95e72559f2cc7043f988ac0000000000"_hex};
     CMutableTransaction tx;
-    stream >> TX_WITH_WITNESS(tx);
+    DataStream(ser_tx) >> TX_WITH_WITNESS(tx);
+    return tx;
+}
+
+BOOST_AUTO_TEST_CASE(transaction_duplicate_input_test)
+{
+    auto tx{CreateTransaction()};
+
     TxValidationState state;
     BOOST_CHECK_MESSAGE(CheckTransaction(CTransaction(tx), state) && state.IsValid(), "Simple deserialized transaction should be valid.");
 
-    // Check that duplicate txins fail
-    tx.vin.push_back(tx.vin[0]);
-    BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with duplicate txins should be invalid.");
+    // Add duplicate input
+    tx.vin.emplace_back(tx.vin[0]);
+    std::ranges::shuffle(tx.vin, m_rng);
+    BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with 2 duplicate txins should be invalid.");
+
+    // ... add a valid input for more complex check
+    tx.vin.emplace_back(COutPoint(Txid::FromUint256(uint256{1}), 1));
+    std::ranges::shuffle(tx.vin, m_rng);
+    BOOST_CHECK_MESSAGE(!CheckTransaction(CTransaction(tx), state) || !state.IsValid(), "Transaction with 3 inputs (2 valid, 1 duplicate) should be invalid.");
+}
+
+BOOST_AUTO_TEST_CASE(transaction_duplicate_detection_test)
+{
+    // Randomized testing against hash- and tree-based duplicate check
+    auto reference_duplicate_check_hash{[](const std::vector<CTxIn>& vin) {
+        std::unordered_set<COutPoint, SaltedOutpointHasher> vInOutPoints;
+        for (const auto& txin : vin) {
+            if (!vInOutPoints.insert(txin.prevout).second) {
+                return false;
+            }
+        }
+        return true;
+    }};
+    auto reference_duplicate_check_tree{[](const std::vector<CTxIn>& vin) {
+        std::set<COutPoint> vInOutPoints;
+        for (const auto& txin : vin) {
+            if (!vInOutPoints.insert(txin.prevout).second) {
+                return false;
+            }
+        }
+        return true;
+    }};
+
+    std::vector<Txid> hashes;
+    std::vector<uint32_t> ns;
+    for (int i = 0; i < 10; ++i) {
+        hashes.emplace_back(Txid::FromUint256(m_rng.rand256()));
+        ns.emplace_back(m_rng.rand32());
+    }
+    auto tx{CreateTransaction()};
+    TxValidationState state;
+    for (int i{0}; i < 100; ++i) {
+        if (m_rng.randbool()) {
+            tx.vin.clear();
+        }
+        for (int j{0}, num_inputs{1 + m_rng.randrange(5)}; j < num_inputs; ++j) {
+            if (COutPoint outpoint(hashes[m_rng.randrange(hashes.size())], ns[m_rng.randrange(ns.size())]); !outpoint.IsNull()) {
+                tx.vin.emplace_back(outpoint);
+            }
+        }
+        std::ranges::shuffle(tx.vin, m_rng);
+
+        bool actual{CheckTransaction(CTransaction(tx), state)};
+        BOOST_CHECK_EQUAL(actual, reference_duplicate_check_hash(tx.vin));
+        BOOST_CHECK_EQUAL(actual, reference_duplicate_check_tree(tx.vin));
+    }
+}
+
+BOOST_AUTO_TEST_CASE(transaction_null_prevout_detection_test)
+{
+    // Randomized testing against linear null prevout check
+    auto reference_null_prevout_check_hash{[](const std::vector<CTxIn>& vin) {
+        for (const auto& txin : vin) {
+            if (txin.prevout.IsNull()) {
+                return false;
+            }
+        }
+        return true;
+    }};
+
+    auto tx{CreateTransaction()};
+    TxValidationState state;
+    for (int i{0}; i < 100; ++i) {
+        if (m_rng.randbool()) {
+            tx.vin.clear();
+        }
+        for (int j{0}, num_inputs{1 + m_rng.randrange(5)}; j < num_inputs; ++j) {
+            switch (m_rng.randrange(5)) {
+            case 0: tx.vin.emplace_back(COutPoint()); break;                                               // Null prevout
+            case 1: tx.vin.emplace_back(Txid::FromUint256(uint256::ZERO), m_rng.rand32()); break;          // Null hash, random index
+            case 2: tx.vin.emplace_back(Txid::FromUint256(m_rng.rand256()), COutPoint::NULL_INDEX); break; // Random hash, Null index
+            default: tx.vin.emplace_back(Txid::FromUint256(m_rng.rand256()), m_rng.rand32());              // Random prevout
+            }
+        }
+        std::ranges::shuffle(tx.vin, m_rng);
+
+        BOOST_CHECK_EQUAL(CheckTransaction(CTransaction(tx), state), reference_null_prevout_check_hash(tx.vin));
+    }
 }
 
 BOOST_AUTO_TEST_CASE(test_Get)
@@ -1048,4 +1138,116 @@ BOOST_AUTO_TEST_CASE(test_IsStandard)
     CheckIsNotStandard(t, "dust");
 }
 
+BOOST_AUTO_TEST_CASE(test_uint256_sorting)
+{
+    // Sorting
+    std::vector original{
+        uint256{1},
+        uint256{2},
+        uint256{3}
+    };
+
+    std::vector shuffled{original};
+    std::ranges::shuffle(shuffled, m_rng);
+    std::sort(shuffled.begin(), shuffled.end());
+
+    BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end());
+
+    // Operators
+    constexpr auto a{uint256{1}},
+                   b{uint256{2}},
+                   c{uint256{3}};
+
+    BOOST_CHECK(a == a);
+    BOOST_CHECK(a == uint256{1});
+    BOOST_CHECK(b == b);
+    BOOST_CHECK(c == c);
+    BOOST_CHECK(a != b);
+    BOOST_CHECK(a != uint256{10});
+    BOOST_CHECK(a != c);
+    BOOST_CHECK(b != c);
+
+    BOOST_CHECK(a < b);
+    BOOST_CHECK(a < uint256{10});
+    BOOST_CHECK(b < c);
+    BOOST_CHECK(a < c);
+}
+
+BOOST_AUTO_TEST_CASE(test_transaction_identifier_sorting)
+{
+    std::vector original{
+        Txid::FromUint256(uint256{1}),
+        Txid::FromUint256(uint256{2}),
+        Txid::FromUint256(uint256{3})
+    };
+
+    std::vector shuffled{original};
+    std::ranges::shuffle(shuffled, m_rng);
+    std::sort(shuffled.begin(), shuffled.end());
+
+    BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end());
+
+    // Operators
+    const auto a(Txid::FromUint256(uint256{1})),
+               b(Txid::FromUint256(uint256{2})),
+               c(Txid::FromUint256(uint256{3}));
+
+    BOOST_CHECK(a == uint256{1});
+
+    BOOST_CHECK(a == a);
+    BOOST_CHECK(a == Txid::FromUint256(uint256{1}));
+    BOOST_CHECK(b == b);
+    BOOST_CHECK(c == c);
+    BOOST_CHECK(a != b);
+    BOOST_CHECK(a != Txid::FromUint256(uint256{10}));
+    BOOST_CHECK(a != c);
+    BOOST_CHECK(b != c);
+
+    BOOST_CHECK(a < b);
+    BOOST_CHECK(a < Txid::FromUint256(uint256{10}));
+    BOOST_CHECK(b < c);
+    BOOST_CHECK(a < c);
+}
+
+BOOST_AUTO_TEST_CASE(test_coutpoint_sorting)
+{
+    // Sorting
+    std::vector original{
+        COutPoint(Txid::FromUint256(uint256{1}), 1),
+        COutPoint(Txid::FromUint256(uint256{1}), 2),
+        COutPoint(Txid::FromUint256(uint256{1}), 3),
+        COutPoint(Txid::FromUint256(uint256{2}), 1),
+        COutPoint(Txid::FromUint256(uint256{2}), 2),
+        COutPoint(Txid::FromUint256(uint256{2}), 3),
+        COutPoint(Txid::FromUint256(uint256{3}), 1),
+        COutPoint(Txid::FromUint256(uint256{3}), 2),
+        COutPoint(Txid::FromUint256(uint256{3}), 3)
+    };
+
+    std::vector shuffled{original};
+    std::ranges::shuffle(shuffled, m_rng);
+    std::sort(shuffled.begin(), shuffled.end());
+
+    BOOST_CHECK_EQUAL_COLLECTIONS(original.begin(), original.end(), shuffled.begin(), shuffled.end());
+
+    // Operators
+    const auto a{COutPoint(Txid::FromUint256(uint256{1}), 1)},
+               b{COutPoint(Txid::FromUint256(uint256{1}), 2)},
+               c{COutPoint(Txid::FromUint256(uint256{2}), 1)};
+
+    BOOST_CHECK(a == a);
+    BOOST_CHECK(a == COutPoint(Txid::FromUint256(uint256{1}), 1));
+    BOOST_CHECK(b == b);
+    BOOST_CHECK(c == c);
+    BOOST_CHECK(a != b);
+    BOOST_CHECK(a != COutPoint(Txid::FromUint256(uint256{1}), 10));
+    BOOST_CHECK(a != c);
+    BOOST_CHECK(b != c);
+
+    BOOST_CHECK(a < b);
+    BOOST_CHECK(a < COutPoint(Txid::FromUint256(uint256{1}), 10));
+    BOOST_CHECK(b < c);
+    BOOST_CHECK(a < c);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/test/util/setup_common.cpp b/src/test/util/setup_common.cpp
index bf26997c076..76a103487f8 100644
--- a/src/test/util/setup_common.cpp
+++ b/src/test/util/setup_common.cpp
@@ -614,3 +614,8 @@ std::ostream& operator<<(std::ostream& os, const uint256& num)
 {
     return os << num.ToString();
 }
+
+std::ostream& operator<<(std::ostream& os, const COutPoint& outpoint)
+{
+    return os << outpoint.hash << ", " << outpoint.n;
+}
diff --git a/src/test/util/setup_common.h b/src/test/util/setup_common.h
index 33ad2584573..a12c985bb26 100644
--- a/src/test/util/setup_common.h
+++ b/src/test/util/setup_common.h
@@ -285,6 +285,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::optional<T>& v)
 std::ostream& operator<<(std::ostream& os, const arith_uint256& num);
 std::ostream& operator<<(std::ostream& os, const uint160& num);
 std::ostream& operator<<(std::ostream& os, const uint256& num);
+std::ostream& operator<<(std::ostream& os, const COutPoint& outpoint);
 // @}
 
 /**

From 99fe67e132d45b1899078467c85f56829a514e1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 18 Jan 2025 11:43:26 +0100
Subject: [PATCH 14/24] bench: measure `CheckBlock` speed separately from
 serialization

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          372,743.63 |            2,682.81 |    1.1% |     10.99 | `CheckBlockBench`

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        3,304,694.54 |              302.60 |    0.5% |     11.05 | `DuplicateInputs`

> C++ compiler .......................... GNU 13.3.0

|            ns/block |             block/s |    err% |       ins/block |       cyc/block |    IPC |      bra/block |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        1,096,261.84 |              912.19 |    0.1% |    7,963,390.88 |    3,487,375.26 |  2.283 |   1,266,941.00 |    1.8% |     11.03 | `CheckBlockBench`

|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        8,366,309.48 |              119.53 |    0.0% |   23,865,177.67 |   26,620,160.23 |  0.897 |   5,972,887.41 |    4.0% |     10.78 | `DuplicateInputs`
---
 src/bench/checkblock.cpp | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp
index ba26457b31a..5f8bc6c3725 100644
--- a/src/bench/checkblock.cpp
+++ b/src/bench/checkblock.cpp
@@ -48,7 +48,7 @@ static void SerializeBlock(benchmark::Bench& bench) {
 // a block off the wire, but before we can relay the block on to peers using
 // compact block relay.
 
-static void DeserializeBlock(benchmark::Bench& bench)
+static void DeserializeBlockBench(benchmark::Bench& bench)
 {
     DataStream stream(benchmark::data::block413567);
     std::byte a{0};
@@ -62,28 +62,20 @@ static void DeserializeBlock(benchmark::Bench& bench)
     });
 }
 
-static void DeserializeAndCheckBlock(benchmark::Bench& bench)
+static void CheckBlockBench(benchmark::Bench& bench)
 {
-    DataStream stream(benchmark::data::block413567);
-    std::byte a{0};
-    stream.write({&a, 1}); // Prevent compaction
-
-    ArgsManager bench_args;
-    const auto chainParams = CreateChainParams(bench_args, ChainType::MAIN);
-
+    CBlock block;
+    DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block);
+    const auto chainParams = CreateChainParams(ArgsManager{}, ChainType::MAIN);
     bench.unit("block").run([&] {
-        CBlock block; // Note that CBlock caches its checked state, so we need to recreate it here
-        stream >> TX_WITH_WITNESS(block);
-        bool rewound = stream.Rewind(benchmark::data::block413567.size());
-        assert(rewound);
-
+        block.fChecked = block.m_checked_witness_commitment = block.m_checked_merkle_root = false; // Reset the cached state
         BlockValidationState validationState;
-        bool checked = CheckBlock(block, validationState, chainParams->GetConsensus());
-        assert(checked);
+        bool checked = CheckBlock(block, validationState, chainParams->GetConsensus(), /*fCheckPOW=*/true, /*fCheckMerkleRoot=*/true);
+        assert(checked && validationState.IsValid());
     });
 }
 
 BENCHMARK(SizeComputerBlock, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SerializeBlock, benchmark::PriorityLevel::HIGH);
-BENCHMARK(DeserializeBlock, benchmark::PriorityLevel::HIGH);
-BENCHMARK(DeserializeAndCheckBlock, benchmark::PriorityLevel::HIGH);
+BENCHMARK(DeserializeBlockBench, benchmark::PriorityLevel::HIGH);
+BENCHMARK(CheckBlockBench, benchmark::PriorityLevel::HIGH);

From 452baf49e16c16c9a5de6d3a273988ec747803f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Tue, 21 Jan 2025 14:01:31 +0100
Subject: [PATCH 15/24] bench: add `ProcessTransactionBench` to measure
 `CheckBlock` in context

The newly introduced `ProcessTransactionBench` incorporates multiple steps in the validation pipeline, offering a more comprehensive view of `CheckBlock` performance within a realistic transaction validation context.

Previous microbenchmarks, such as DeserializeAndCheckBlockTest and DuplicateInputs, focused on isolated aspects of transaction and block validation. While these tests provided valuable insights for targeted profiling, they lacked context regarding the broader validation process, where interactions between components play a critical role.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='ProcessTransactionBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|            9,585.10 |          104,328.55 |    0.1% |     11.03 | `ProcessTransactionBench`

> C++ compiler .......................... GNU 13.3.0

|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|           56,199.57 |           17,793.73 |    0.1% |      229,263.01 |      178,766.31 |  1.282 |      15,509.97 |    0.5% |     10.91 | `ProcessTransactionBench`
---
 src/bench/mempool_stress.cpp | 57 ++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/src/bench/mempool_stress.cpp b/src/bench/mempool_stress.cpp
index fbac25db5f5..2fe0622d18b 100644
--- a/src/bench/mempool_stress.cpp
+++ b/src/bench/mempool_stress.cpp
@@ -13,10 +13,19 @@
 #include <test/util/txmempool.h>
 #include <txmempool.h>
 #include <validation.h>
+#include <bench/data/block413567.raw.h>
+#include <node/context.h>
+#include <node/miner.h>
+#include <primitives/block.h>
+#include <test/util/script.h>
+#include <util/check.h>
 
+#include <array>
+#include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <streams.h>
 #include <vector>
 
 class CCoinsViewCache;
@@ -126,5 +135,53 @@ static void MempoolCheck(benchmark::Bench& bench)
     });
 }
 
+static void ProcessTransactionBench(benchmark::Bench& bench)
+{
+    const auto testing_setup{MakeNoLogFileContext<const TestingSetup>()};
+    CTxMemPool& pool{*Assert(testing_setup->m_node.mempool)};
+    ChainstateManager& chainman{*testing_setup->m_node.chainman};
+
+    CBlock block;
+    DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block);
+
+    std::vector<CTransactionRef> txs(block.vtx.size() - 1);
+    for (size_t i{1}; i < block.vtx.size(); ++i) {
+        CMutableTransaction mtx{*block.vtx[i]};
+        for (auto& txin : mtx.vin) {
+            txin.nSequence = CTxIn::SEQUENCE_FINAL;
+            txin.scriptSig.clear();
+            txin.scriptWitness.stack = {WITNESS_STACK_ELEM_OP_TRUE};
+        }
+        txs[i - 1] = MakeTransactionRef(std::move(mtx));
+    }
+
+    CCoinsViewCache* coins_tip{nullptr};
+    size_t cached_coin_count{0};
+    {
+        LOCK(cs_main);
+        coins_tip = &chainman.ActiveChainstate().CoinsTip();
+        for (const auto& tx : txs) {
+            const Coin coin(CTxOut(2 * tx->GetValueOut(), P2WSH_OP_TRUE), 1, /*fCoinBaseIn=*/false);
+            for (const auto& in : tx->vin) {
+                coins_tip->AddCoin(in.prevout, Coin{coin}, /*possible_overwrite=*/false);
+                cached_coin_count++;
+            }
+        }
+    }
+
+    bench.batch(txs.size()).run([&] {
+        LOCK2(cs_main, pool.cs);
+        assert(coins_tip->GetCacheSize() == cached_coin_count);
+        for (const auto& tx : txs) pool.removeRecursive(*tx, MemPoolRemovalReason::REPLACED);
+        assert(pool.size() == 0);
+
+        for (const auto& tx : txs) {
+            const auto res{chainman.ProcessTransaction(tx, /*test_accept=*/true)};
+            assert(res.m_result_type == MempoolAcceptResult::ResultType::VALID);
+        }
+    });
+}
+
 BENCHMARK(ComplexMemPool, benchmark::PriorityLevel::HIGH);
 BENCHMARK(MempoolCheck, benchmark::PriorityLevel::HIGH);
+BENCHMARK(ProcessTransactionBench, benchmark::PriorityLevel::HIGH);

From 45f8cda9bb015dddcb082bae05403c60b87a5f4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 25 Jan 2025 14:07:24 +0100
Subject: [PATCH 16/24] optimization: move duplicate checks outside of coinbase
 branch

`IsCoinBase` means single input with NULL prevout, so it makes sense to restrict duplicate check to non-coinbase transactions only.
The behavior is the same as before, except that single-input-transactions aren't checked for duplicates anymore (~70-90% of the cases, see https://transactionfee.info/charts/transactions-1in).
I've added braces to the conditions and loops to simplify review of followup commits.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          335,917.12 |            2,976.92 |    1.3% |     11.01 | `CheckBlockBench`

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        3,286,337.42 |              304.29 |    1.1% |     10.90 | `DuplicateInputs`
|            9,561.02 |          104,591.35 |    0.2% |     11.02 | `ProcessTransactionBench`
---
 src/consensus/tx_check.cpp | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp
index b3fee1e8b1a..050bca84d47 100644
--- a/src/consensus/tx_check.cpp
+++ b/src/consensus/tx_check.cpp
@@ -38,22 +38,25 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state)
     // of a tx as spent, it does not check if the tx has duplicate inputs.
     // Failure to run this check will result in either a crash or an inflation bug, depending on the implementation of
     // the underlying coins database.
-    std::set<COutPoint> vInOutPoints;
-    for (const auto& txin : tx.vin) {
-        if (!vInOutPoints.insert(txin.prevout).second)
-            return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
-    }
+    if (tx.vin.size() == 1) {
+        if (tx.IsCoinBase()) {
+            if (tx.vin[0].scriptSig.size() < 2 || tx.vin[0].scriptSig.size() > 100) {
+                return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length");
+            }
+        }
+    } else {
+        std::set<COutPoint> vInOutPoints;
+        for (const auto& txin : tx.vin) {
+            if (!vInOutPoints.insert(txin.prevout).second) {
+                return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
+            }
+        }
 
-    if (tx.IsCoinBase())
-    {
-        if (tx.vin[0].scriptSig.size() < 2 || tx.vin[0].scriptSig.size() > 100)
-            return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length");
-    }
-    else
-    {
-        for (const auto& txin : tx.vin)
-            if (txin.prevout.IsNull())
+        for (const auto& txin : tx.vin) {
+            if (txin.prevout.IsNull()) {
                 return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null");
+            }
+        }
     }
 
     return true;

From 2a5df20ca9699b52a24442f2ee81124c4fc15b9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 17 Jan 2025 22:39:59 +0100
Subject: [PATCH 17/24] optimization: simplify duplicate checks for trivial
 inputs

No need to create a set for checking duplicates for two-input-transactions.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          314,137.30 |            3,183.32 |    1.2% |     11.04 | `CheckBlockBench`

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|        3,220,592.73 |              310.50 |    1.3% |     10.92 | `DuplicateInputs`
|            9,425.98 |          106,089.77 |    0.3% |     11.00 | `ProcessTransactionBench`
---
 src/consensus/tx_check.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp
index 050bca84d47..cd465e56d44 100644
--- a/src/consensus/tx_check.cpp
+++ b/src/consensus/tx_check.cpp
@@ -45,11 +45,17 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state)
             }
         }
     } else {
-        std::set<COutPoint> vInOutPoints;
-        for (const auto& txin : tx.vin) {
-            if (!vInOutPoints.insert(txin.prevout).second) {
+        if (tx.vin.size() == 2) {
+            if (tx.vin[0].prevout == tx.vin[1].prevout) {
                 return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
             }
+        } else {
+            std::set<COutPoint> vInOutPoints;
+            for (const auto& txin : tx.vin) {
+                if (!vInOutPoints.insert(txin.prevout).second) {
+                    return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
+                }
+            }
         }
 
         for (const auto& txin : tx.vin) {

From ce6840f7017d845cedccf8d54bbc307c73de20f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 18 Jan 2025 11:54:47 +0100
Subject: [PATCH 18/24] optimization: replace tree with sorted vector

A pre-sized vector retains locality (enabling SIMD operations), speeding up sorting and equality checks.
It's also simpler (therefore more reliable) than a sorted set. It also causes less memory fragmentation.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          181,922.54 |            5,496.85 |    0.2% |     10.98 | `CheckBlockBench`

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          997,739.30 |            1,002.27 |    1.0% |     10.94 | `DuplicateInputs`
|            9,449.28 |          105,828.15 |    0.3% |     10.99 | `ProcessTransactionBench`

Co-authored-by: Pieter Wuille <pieter@wuille.net>
---
 src/consensus/tx_check.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp
index cd465e56d44..9457596fbb9 100644
--- a/src/consensus/tx_check.cpp
+++ b/src/consensus/tx_check.cpp
@@ -50,11 +50,14 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state)
                 return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
             }
         } else {
-            std::set<COutPoint> vInOutPoints;
+            std::vector<COutPoint> sortedPrevouts;
+            sortedPrevouts.reserve(tx.vin.size());
             for (const auto& txin : tx.vin) {
-                if (!vInOutPoints.insert(txin.prevout).second) {
-                    return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
-                }
+                sortedPrevouts.push_back(txin.prevout);
+            }
+            std::sort(sortedPrevouts.begin(), sortedPrevouts.end());
+            if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) {
+                return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
             }
         }
 

From 9f601c0cab7f7c96e9f57fe0072d4aa31022654a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 25 Jan 2025 14:24:02 +0100
Subject: [PATCH 19/24] optimization: look for NULL prevouts in the sorted
 values

For the 2 input case we simply check them both, like we did with equality.

For the general case, we take advantage of sorting, making invalid value detection constant time instead of linear in the worst case.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='CheckBlockBench|DuplicateInputs|ProcessTransactionBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|            ns/block |             block/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          179,971.00 |            5,556.45 |    0.3% |     11.02 | `CheckBlockBench`

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|          963,177.98 |            1,038.23 |    1.7% |     10.92 | `DuplicateInputs`
|            9,410.90 |          106,259.75 |    0.3% |     11.01 | `ProcessTransactionBench`

> C++ compiler .......................... GNU 13.3.0

|            ns/block |             block/s |    err% |       ins/block |       cyc/block |    IPC |      bra/block |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|          834,855.94 |            1,197.81 |    0.0% |    6,518,548.86 |    2,656,039.78 |  2.454 |     919,160.84 |    1.5% |     10.78 | `CheckBlockBench`

|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|        4,261,492.75 |              234.66 |    0.0% |   17,379,823.40 |   13,559,793.33 |  1.282 |   4,265,714.28 |    3.4% |     11.00 | `DuplicateInputs`
|           55,819.53 |           17,914.88 |    0.1% |      227,828.15 |      177,520.09 |  1.283 |      15,184.36 |    0.4% |     10.91 | `ProcessTransactionBench`
---
 src/consensus/tx_check.cpp | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/consensus/tx_check.cpp b/src/consensus/tx_check.cpp
index 9457596fbb9..0a471deda24 100644
--- a/src/consensus/tx_check.cpp
+++ b/src/consensus/tx_check.cpp
@@ -44,25 +44,27 @@ bool CheckTransaction(const CTransaction& tx, TxValidationState& state)
                 return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-cb-length");
             }
         }
+    } else if (tx.vin.size() == 2) {
+        if (tx.vin[0].prevout == tx.vin[1].prevout) {
+            return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
+        }
+        if (tx.vin[0].prevout.IsNull() || tx.vin[1].prevout.IsNull()) {
+            return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null");
+        }
     } else {
-        if (tx.vin.size() == 2) {
-            if (tx.vin[0].prevout == tx.vin[1].prevout) {
-                return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
-            }
-        } else {
-            std::vector<COutPoint> sortedPrevouts;
-            sortedPrevouts.reserve(tx.vin.size());
-            for (const auto& txin : tx.vin) {
-                sortedPrevouts.push_back(txin.prevout);
-            }
-            std::sort(sortedPrevouts.begin(), sortedPrevouts.end());
-            if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) {
-                return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
-            }
+        std::vector<COutPoint> sortedPrevouts;
+        sortedPrevouts.reserve(tx.vin.size());
+        for (const auto& txin : tx.vin) {
+            sortedPrevouts.push_back(txin.prevout);
+        }
+        std::sort(sortedPrevouts.begin(), sortedPrevouts.end());
+        if (std::ranges::adjacent_find(sortedPrevouts) != sortedPrevouts.end()) {
+            return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-inputs-duplicate");
         }
 
-        for (const auto& txin : tx.vin) {
-            if (txin.prevout.IsNull()) {
+        for (const auto& in : sortedPrevouts) {
+            if (!in.hash.IsNull()) break; // invalid values can only be at the beginning
+            if (in.IsNull()) {
                 return state.Invalid(TxValidationResult::TX_CONSENSUS, "bad-txns-prevout-null");
             }
         }

From 626d55b9a8e4c428c07b3e084df3bb14e652a064 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sun, 5 Jan 2025 21:44:27 +0100
Subject: [PATCH 20/24] coins: bump default LevelDB write batch size to 64 MiB

The UTXO set has grown significantly, and flushing it from memory to LevelDB often takes over 20 minutes after a successful IBD with large dbcache values.
The final UTXO set is written to disk in batches, which LevelDB sorts into SST files.
By increasing the default batch size, we can reduce overhead from repeated compaction cycles, minimize constant overhead per batch, and achieve more sequential writes.

Experiments with different batch sizes (loaded via assumeutxo at block 840k, then measuring final flush time) show that 64 MiB batches significantly reduce flush time without notably increasing memory usage:

| dbbatchsize | flush_sum (ms) |
|-------------|----------------|
| 8 MiB       | ~240,000       |
| 16 MiB      | ~220,000       |
| 32 MiB      | ~200,000       |
| *64 MiB*    | *~150,000*     |
| 128 MiB     | ~156,000       |
| 256 MiB     | ~166,000       |
| 512 MiB     | ~186,000       |
| 1 GiB       | ~186,000       |

Checking the impact of a `-reindex-chainstate` with `-stopatheight=878000` and `-dbcache=30000` gives:
16 << 20
```
2025-01-12T07:31:05Z Flushed fee estimates to fee_estimates.dat.
2025-01-12T07:31:05Z [warning] Flushing large (26 GiB) UTXO set to disk, it may take several minutes
2025-01-12T07:53:51Z Shutdown: done
```
Flush time: 22 minutes and 46 seconds

64 >> 20
```
2025-01-12T18:30:00Z Flushed fee estimates to fee_estimates.dat.
2025-01-12T18:30:00Z [warning] Flushing large (26 GiB) UTXO set to disk, it may take several minutes
2025-01-12T18:44:43Z Shutdown: done
```
Flush time: ~14 minutes 43 seconds.
---
 src/txdb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/txdb.h b/src/txdb.h
index 968b7c27810..5d6d861ffd4 100644
--- a/src/txdb.h
+++ b/src/txdb.h
@@ -22,7 +22,7 @@ class COutPoint;
 class uint256;
 
 //! -dbbatchsize default (bytes)
-static const int64_t nDefaultDbBatchSize = 16 << 20;
+static const int64_t nDefaultDbBatchSize = 64 << 20;
 
 //! User-controlled performance and debug options.
 struct CoinsViewOptions {

From 24d35ec4ec3e10bf5d538fdffe16c62d6ae37570 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Fri, 21 Jun 2024 18:00:14 +0200
Subject: [PATCH 21/24] bench: Add COutPoint and SaltedOutpointHasher
 benchmarks

This commit introduces new benchmarks to measure the performance of various operations using
SaltedOutpointHasher, including hash computation, set operations, and set creation.

These benchmarks are intended to provide insights about coin caching performance (e.g. during IBD).

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SaltedOutpointHasherBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|               58.60 |       17,065,922.04 |    0.3% |     11.02 | `SaltedOutpointHasherBench_create_set`
|               11.97 |       83,576,684.83 |    0.1% |     11.01 | `SaltedOutpointHasherBench_hash`
|               14.50 |       68,985,850.12 |    0.3% |     10.96 | `SaltedOutpointHasherBench_match`
|               13.90 |       71,942,033.47 |    0.4% |     11.03 | `SaltedOutpointHasherBench_mismatch`

> C++ compiler .......................... GNU 13.3.0

|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|              136.76 |        7,312,133.16 |    0.0% |        1,086.67 |          491.12 |  2.213 |         119.54 |    1.1% |     11.01 | `SaltedOutpointHasherBench_create_set`
|               23.82 |       41,978,882.62 |    0.0% |          252.01 |           85.57 |  2.945 |           4.00 |    0.0% |     11.00 | `SaltedOutpointHasherBench_hash`
|               60.42 |       16,549,695.42 |    0.1% |          460.51 |          217.04 |  2.122 |          21.00 |    1.4% |     10.99 | `SaltedOutpointHasherBench_match`
|               78.66 |       12,713,595.35 |    0.1% |          555.59 |          282.52 |  1.967 |          20.19 |    2.2% |     10.74 | `SaltedOutpointHasherBench_mismatch`
---
 src/bench/crypto_hash.cpp | 101 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 2 deletions(-)

diff --git a/src/bench/crypto_hash.cpp b/src/bench/crypto_hash.cpp
index 2f1ff564388..88a44fc57c3 100644
--- a/src/bench/crypto_hash.cpp
+++ b/src/bench/crypto_hash.cpp
@@ -2,7 +2,6 @@
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
 
-
 #include <bench/bench.h>
 #include <crypto/muhash.h>
 #include <crypto/ripemd160.h>
@@ -12,9 +11,11 @@
 #include <crypto/sha512.h>
 #include <crypto/siphash.h>
 #include <random.h>
-#include <span.h>
 #include <tinyformat.h>
 #include <uint256.h>
+#include <primitives/transaction.h>
+#include <util/hasher.h>
+#include <unordered_set>
 
 #include <cstdint>
 #include <vector>
@@ -205,6 +206,98 @@ static void SipHash_32b(benchmark::Bench& bench)
     });
 }
 
+static void SaltedOutpointHasherBench_hash(benchmark::Bench& bench)
+{
+    FastRandomContext rng{/*fDeterministic=*/true};
+    constexpr size_t size{1000};
+
+    std::vector<COutPoint> outpoints(size);
+    for (auto& outpoint : outpoints) {
+        outpoint = {Txid::FromUint256(rng.rand256()), rng.rand32()};
+    }
+
+    const SaltedOutpointHasher hasher;
+    bench.batch(size).run([&] {
+        size_t result{0};
+        for (const auto& outpoint : outpoints) {
+            result ^= hasher(outpoint);
+        }
+        ankerl::nanobench::doNotOptimizeAway(result);
+    });
+}
+
+static void SaltedOutpointHasherBench_match(benchmark::Bench& bench)
+{
+    FastRandomContext rng{/*fDeterministic=*/true};
+    constexpr size_t size{1000};
+
+    std::unordered_set<COutPoint, SaltedOutpointHasher> values;
+    std::vector<COutPoint> value_vector;
+    values.reserve(size);
+    value_vector.reserve(size);
+
+    for (size_t i{0}; i < size; ++i) {
+        COutPoint outpoint{Txid::FromUint256(rng.rand256()), rng.rand32()};
+        values.emplace(outpoint);
+        value_vector.push_back(outpoint);
+        assert(values.contains(outpoint));
+    }
+
+    bench.batch(size).run([&] {
+        bool result = true;
+        for (const auto& outpoint : value_vector) {
+            result ^= values.contains(outpoint);
+        }
+        ankerl::nanobench::doNotOptimizeAway(result);
+    });
+}
+
+static void SaltedOutpointHasherBench_mismatch(benchmark::Bench& bench)
+{
+    FastRandomContext rng{/*fDeterministic=*/true};
+    constexpr size_t size{1000};
+
+    std::unordered_set<COutPoint, SaltedOutpointHasher> values;
+    std::vector<COutPoint> missing_value_vector;
+    values.reserve(size);
+    missing_value_vector.reserve(size);
+
+    for (size_t i{0}; i < size; ++i) {
+        values.emplace(Txid::FromUint256(rng.rand256()), rng.rand32());
+        COutPoint missing_outpoint{Txid::FromUint256(rng.rand256()), rng.rand32()};
+        missing_value_vector.push_back(missing_outpoint);
+        assert(!values.contains(missing_outpoint));
+    }
+
+    bench.batch(size).run([&] {
+        bool result{false};
+        for (const auto& outpoint : missing_value_vector) {
+            result ^= values.contains(outpoint);
+        }
+        ankerl::nanobench::doNotOptimizeAway(result);
+    });
+}
+
+static void SaltedOutpointHasherBench_create_set(benchmark::Bench& bench)
+{
+    FastRandomContext rng{/*fDeterministic=*/true};
+    constexpr size_t size{1000};
+
+    std::vector<COutPoint> outpoints(size);
+    for (auto& outpoint : outpoints) {
+        outpoint = {Txid::FromUint256(rng.rand256()), rng.rand32()};
+    }
+
+    bench.batch(size).run([&] {
+        std::unordered_set<COutPoint, SaltedOutpointHasher> set;
+        set.reserve(size);
+        for (const auto& outpoint : outpoints) {
+            set.emplace(outpoint);
+        }
+        ankerl::nanobench::doNotOptimizeAway(set.size());
+    });
+}
+
 static void MuHash(benchmark::Bench& bench)
 {
     MuHash3072 acc;
@@ -276,6 +369,10 @@ BENCHMARK(SHA256_32b_SSE4, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SHA256_32b_AVX2, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SHA256_32b_SHANI, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SipHash_32b, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SaltedOutpointHasherBench_hash, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SaltedOutpointHasherBench_match, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SaltedOutpointHasherBench_mismatch, benchmark::PriorityLevel::HIGH);
+BENCHMARK(SaltedOutpointHasherBench_create_set, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SHA256D64_1024_STANDARD, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SHA256D64_1024_SSE4, benchmark::PriorityLevel::HIGH);
 BENCHMARK(SHA256D64_1024_AVX2, benchmark::PriorityLevel::HIGH);

From 09131cc9d126c8f449c6125361b92cf9136028b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 1 Feb 2025 19:01:36 +0100
Subject: [PATCH 22/24] test: Rename k1/k2 to k0/k1 for consistency

---
 src/test/hash_tests.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/hash_tests.cpp b/src/test/hash_tests.cpp
index f56f7232c3b..f6e15db1f1e 100644
--- a/src/test/hash_tests.cpp
+++ b/src/test/hash_tests.cpp
@@ -133,18 +133,18 @@ BOOST_AUTO_TEST_CASE(siphash)
     // Check consistency between CSipHasher and SipHashUint256[Extra].
     FastRandomContext ctx;
     for (int i = 0; i < 16; ++i) {
+        uint64_t k0 = ctx.rand64();
         uint64_t k1 = ctx.rand64();
-        uint64_t k2 = ctx.rand64();
         uint256 x = m_rng.rand256();
         uint32_t n = ctx.rand32();
         uint8_t nb[4];
         WriteLE32(nb, n);
-        CSipHasher sip256(k1, k2);
+        CSipHasher sip256(k0, k1);
         sip256.Write(x);
         CSipHasher sip288 = sip256;
         sip288.Write(nb);
-        BOOST_CHECK_EQUAL(SipHashUint256(k1, k2, x), sip256.Finalize());
-        BOOST_CHECK_EQUAL(SipHashUint256Extra(k1, k2, x, n), sip288.Finalize());
+        BOOST_CHECK_EQUAL(SipHashUint256(k0, k1, x), sip256.Finalize());
+        BOOST_CHECK_EQUAL(SipHashUint256Extra(k0, k1, x, n), sip288.Finalize()); // TODO modified in follow-up commit
     }
 }
 

From 39e33d49280151e52fc9a0ee72cf58af3ec244a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 1 Feb 2025 19:16:17 +0100
Subject: [PATCH 23/24] refactor: Extract C0-C3 Siphash constants

---
 src/crypto/siphash.cpp | 25 +++++++++++++------------
 src/crypto/siphash.h   |  5 +++++
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/crypto/siphash.cpp b/src/crypto/siphash.cpp
index 8004a0548ec..d604969d3ec 100644
--- a/src/crypto/siphash.cpp
+++ b/src/crypto/siphash.cpp
@@ -17,10 +17,10 @@
 
 CSipHasher::CSipHasher(uint64_t k0, uint64_t k1)
 {
-    v[0] = 0x736f6d6570736575ULL ^ k0;
-    v[1] = 0x646f72616e646f6dULL ^ k1;
-    v[2] = 0x6c7967656e657261ULL ^ k0;
-    v[3] = 0x7465646279746573ULL ^ k1;
+    v[0] = C0 ^ k0;
+    v[1] = C1 ^ k1;
+    v[2] = C2 ^ k0;
+    v[3] = C3 ^ k1;
     count = 0;
     tmp = 0;
 }
@@ -97,10 +97,10 @@ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val)
     /* Specialized implementation for efficiency */
     uint64_t d = val.GetUint64(0);
 
-    uint64_t v0 = 0x736f6d6570736575ULL ^ k0;
-    uint64_t v1 = 0x646f72616e646f6dULL ^ k1;
-    uint64_t v2 = 0x6c7967656e657261ULL ^ k0;
-    uint64_t v3 = 0x7465646279746573ULL ^ k1 ^ d;
+    uint64_t v0 = CSipHasher::C0 ^ k0;
+    uint64_t v1 = CSipHasher::C1 ^ k1;
+    uint64_t v2 = CSipHasher::C2 ^ k0;
+    uint64_t v3 = CSipHasher::C3 ^ k1 ^ d;
 
     SIPROUND;
     SIPROUND;
@@ -137,10 +137,11 @@ uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint3
     /* Specialized implementation for efficiency */
     uint64_t d = val.GetUint64(0);
 
-    uint64_t v0 = 0x736f6d6570736575ULL ^ k0;
-    uint64_t v1 = 0x646f72616e646f6dULL ^ k1;
-    uint64_t v2 = 0x6c7967656e657261ULL ^ k0;
-    uint64_t v3 = 0x7465646279746573ULL ^ k1 ^ d;
+    // TODO moved in next commit
+    uint64_t v0 = CSipHasher::C0 ^ k0;
+    uint64_t v1 = CSipHasher::C1 ^ k1;
+    uint64_t v2 = CSipHasher::C2 ^ k0;
+    uint64_t v3 = CSipHasher::C3 ^ k1 ^ d;
 
     SIPROUND;
     SIPROUND;
diff --git a/src/crypto/siphash.h b/src/crypto/siphash.h
index 4fb3dc2f258..c3da7247554 100644
--- a/src/crypto/siphash.h
+++ b/src/crypto/siphash.h
@@ -19,6 +19,11 @@ private:
     uint8_t count; // Only the low 8 bits of the input size matter.
 
 public:
+    static constexpr uint64_t C0{0x736f6d6570736575ULL};
+    static constexpr uint64_t C1{0x646f72616e646f6dULL};
+    static constexpr uint64_t C2{0x6c7967656e657261ULL};
+    static constexpr uint64_t C3{0x7465646279746573ULL};
+
     /** Construct a SipHash calculator initialized with 128-bit key (k0, k1) */
     CSipHasher(uint64_t k0, uint64_t k1);
     /** Hash a 64-bit integer worth of data

From a06a674b4361061432d20dc21f183b7eac0175b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <pap.lorinc@gmail.com>
Date: Sat, 1 Feb 2025 19:21:34 +0100
Subject: [PATCH 24/24] optimization: refactor: Introduce Uint256ExtraSipHasher
 to cache SipHash constant state

Previously, only k0 and k1 were stored, causing the constant xor operations to be recomputed in every call to `SipHashUint256Extra`.
This commit adds a dedicated `Uint256ExtraSipHasher` class that caches the initial state (v0-v3) and to perform the `SipHash` computation on a `uint256` (with an extra parameter), hiding the constant computation details from higher-level code and improving efficiency.
This basically brings the precalculations in the `CSipHasher` constructor to the `uint256` specialized SipHash implementation.

> cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SaltedOutpointHasherBench' -min-time=10000

> C++ compiler .......................... AppleClang 16.0.0.16000026

|               ns/op |                op/s |    err% |     total | benchmark
|--------------------:|--------------------:|--------:|----------:|:----------
|               57.27 |       17,462,299.19 |    0.1% |     11.02 | `SaltedOutpointHasherBench_create_set`
|               11.24 |       88,997,888.48 |    0.3% |     11.04 | `SaltedOutpointHasherBench_hash`
|               13.91 |       71,902,014.20 |    0.2% |     11.01 | `SaltedOutpointHasherBench_match`
|               13.29 |       75,230,390.31 |    0.1% |     11.00 | `SaltedOutpointHasherBench_mismatch`

compared to master:
create_set - 17,462,299.19/17,065,922.04 - 2.3% faster
hash       - 88,997,888.48/83,576,684.83 - 6.4% faster
match      - 71,902,014.20/68,985,850.12 - 4.2% faster
mismatch   - 75,230,390.31/71,942,033.47 - 4.5% faster

> C++ compiler .......................... GNU 13.3.0

|               ns/op |                op/s |    err% |          ins/op |          cyc/op |    IPC |         bra/op |   miss% |     total | benchmark
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|              135.38 |        7,386,349.49 |    0.0% |        1,078.19 |          486.16 |  2.218 |         119.56 |    1.1% |     11.00 | `SaltedOutpointHasherBench_create_set`
|               23.67 |       42,254,558.08 |    0.0% |          247.01 |           85.01 |  2.906 |           4.00 |    0.0% |     11.00 | `SaltedOutpointHasherBench_hash`
|               58.95 |       16,962,220.14 |    0.1% |          446.55 |          211.74 |  2.109 |          20.86 |    1.4% |     11.01 | `SaltedOutpointHasherBench_match`
|               76.98 |       12,991,047.69 |    0.1% |          548.93 |          276.50 |  1.985 |          20.25 |    2.3% |     10.72 | `SaltedOutpointHasherBench_mismatch`

compared to master:
create_set -  7,386,349.49/7,312,133.16  - 1% faster
hash       - 42,254,558.08/41,978,882.62 - 0.6% faster
match      - 16,962,220.14/16,549,695.42 - 2.4% faster
mismatch   - 12,991,047.69/12,713,595.35 - 2% faster
---
 src/crypto/siphash.cpp    | 13 ++++---------
 src/crypto/siphash.h      | 15 ++++++++++++++-
 src/test/fuzz/integer.cpp |  2 +-
 src/test/hash_tests.cpp   |  4 ++--
 src/util/hasher.cpp       |  6 +++---
 src/util/hasher.h         |  8 +++-----
 6 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/src/crypto/siphash.cpp b/src/crypto/siphash.cpp
index d604969d3ec..c8ea824979e 100644
--- a/src/crypto/siphash.cpp
+++ b/src/crypto/siphash.cpp
@@ -132,17 +132,12 @@ uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val)
     return v0 ^ v1 ^ v2 ^ v3;
 }
 
-uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra)
+/* Specialized implementation for efficiency */
+uint64_t Uint256ExtraSipHasher::operator()(const uint256& val, uint32_t extra) const noexcept
 {
-    /* Specialized implementation for efficiency */
+    uint64_t v0 = v[0], v1 = v[1], v2 = v[2], v3 = v[3];
     uint64_t d = val.GetUint64(0);
-
-    // TODO moved in next commit
-    uint64_t v0 = CSipHasher::C0 ^ k0;
-    uint64_t v1 = CSipHasher::C1 ^ k1;
-    uint64_t v2 = CSipHasher::C2 ^ k0;
-    uint64_t v3 = CSipHasher::C3 ^ k1 ^ d;
-
+    v3 ^= d;
     SIPROUND;
     SIPROUND;
     v0 ^= d;
diff --git a/src/crypto/siphash.h b/src/crypto/siphash.h
index c3da7247554..c0d048c6733 100644
--- a/src/crypto/siphash.h
+++ b/src/crypto/siphash.h
@@ -48,6 +48,19 @@ public:
  *      .Finalize()
  */
 uint64_t SipHashUint256(uint64_t k0, uint64_t k1, const uint256& val);
-uint64_t SipHashUint256Extra(uint64_t k0, uint64_t k1, const uint256& val, uint32_t extra);
+
+class Uint256ExtraSipHasher {
+    uint64_t v[4];
+
+public:
+    Uint256ExtraSipHasher(const uint64_t k0, const uint64_t k1) noexcept {
+        v[0] = CSipHasher::C0 ^ k0;
+        v[1] = CSipHasher::C1 ^ k1;
+        v[2] = CSipHasher::C2 ^ k0;
+        v[3] = CSipHasher::C3 ^ k1;
+    }
+
+    uint64_t operator()(const uint256& val, uint32_t extra) const noexcept;
+};
 
 #endif // BITCOIN_CRYPTO_SIPHASH_H
diff --git a/src/test/fuzz/integer.cpp b/src/test/fuzz/integer.cpp
index 2e51eac3078..3c27d340377 100644
--- a/src/test/fuzz/integer.cpp
+++ b/src/test/fuzz/integer.cpp
@@ -119,7 +119,7 @@ FUZZ_TARGET(integer, .init = initialize_integer)
     (void)MillisToTimeval(i64);
     (void)SighashToStr(uch);
     (void)SipHashUint256(u64, u64, u256);
-    (void)SipHashUint256Extra(u64, u64, u256, u32);
+    (void)Uint256ExtraSipHasher(u64, u64)(u256, u32);
     (void)ToLower(ch);
     (void)ToUpper(ch);
     {
diff --git a/src/test/hash_tests.cpp b/src/test/hash_tests.cpp
index f6e15db1f1e..56e448f8920 100644
--- a/src/test/hash_tests.cpp
+++ b/src/test/hash_tests.cpp
@@ -130,7 +130,7 @@ BOOST_AUTO_TEST_CASE(siphash)
     ss << TX_WITH_WITNESS(tx);
     BOOST_CHECK_EQUAL(SipHashUint256(1, 2, ss.GetHash()), 0x79751e980c2a0a35ULL);
 
-    // Check consistency between CSipHasher and SipHashUint256[Extra].
+    // Check consistency between CSipHasher and SipHashUint256 and Uint256ExtraSipHasher.
     FastRandomContext ctx;
     for (int i = 0; i < 16; ++i) {
         uint64_t k0 = ctx.rand64();
@@ -144,7 +144,7 @@ BOOST_AUTO_TEST_CASE(siphash)
         CSipHasher sip288 = sip256;
         sip288.Write(nb);
         BOOST_CHECK_EQUAL(SipHashUint256(k0, k1, x), sip256.Finalize());
-        BOOST_CHECK_EQUAL(SipHashUint256Extra(k0, k1, x, n), sip288.Finalize()); // TODO modified in follow-up commit
+        BOOST_CHECK_EQUAL(Uint256ExtraSipHasher(k0, k1)(x, n), sip288.Finalize());
     }
 }
 
diff --git a/src/util/hasher.cpp b/src/util/hasher.cpp
index 3109ba02a8d..3e918aaf33c 100644
--- a/src/util/hasher.cpp
+++ b/src/util/hasher.cpp
@@ -11,9 +11,9 @@ SaltedTxidHasher::SaltedTxidHasher() :
     k0{FastRandomContext().rand64()},
     k1{FastRandomContext().rand64()} {}
 
-SaltedOutpointHasher::SaltedOutpointHasher(bool deterministic) :
-    k0{deterministic ? 0x8e819f2607a18de6 : FastRandomContext().rand64()},
-    k1{deterministic ? 0xf4020d2e3983b0eb : FastRandomContext().rand64()}
+SaltedOutpointHasher::SaltedOutpointHasher(bool deterministic) : hasher{
+    deterministic ? 0x8e819f2607a18de6 : FastRandomContext().rand64(),
+    deterministic ? 0xf4020d2e3983b0eb : FastRandomContext().rand64()}
 {}
 
 SaltedSipHasher::SaltedSipHasher() :
diff --git a/src/util/hasher.h b/src/util/hasher.h
index e4594c7ddaf..5f7940a8789 100644
--- a/src/util/hasher.h
+++ b/src/util/hasher.h
@@ -30,12 +30,10 @@ public:
 
 class SaltedOutpointHasher
 {
-private:
-    /** Salt */
-    const uint64_t k0, k1;
+    const Uint256ExtraSipHasher hasher;
 
 public:
-    SaltedOutpointHasher(bool deterministic = false);
+    explicit SaltedOutpointHasher(bool deterministic = false);
 
     /**
      * Having the hash noexcept allows libstdc++'s unordered_map to recalculate
@@ -47,7 +45,7 @@ public:
      * @see https://gcc.gnu.org/onlinedocs/gcc-13.2.0/libstdc++/manual/manual/unordered_associative.html
      */
     size_t operator()(const COutPoint& id) const noexcept {
-        return SipHashUint256Extra(k0, k1, id.hash, id.n);
+        return hasher(id.hash, id.n);
     }
 };