test: compare util::Xor with randomized inputs against simple impl

The two tests are doing different things - `xor_roundtrip_random_chunks` does black-box style property-based testing to validate that certain invariants hold - that deobfuscating an obfuscation results in the original message (higher level, it doesn't have to know about the implementation details). The `xor_bytes_reference` test makes sure the optimized xor implementation behaves in every imaginable scenario exactly as the simplest possible obfuscation - with random chunks, random alignment, random data, random key. Since we're touching the file, other related small refactors were also applied: * `nullpt` typo fixed; * manual byte-by-byte xor key creations were replaced with `_hex` factories; * since we're only using 64 bit keys in production, smaller keys were changed to reflect real-world usage; Co-authored-by: Hodlinator <172445034+hodlinator@users.noreply.github.com>
2025-08-25 20:11:35 +02:00 · 2025-07-16 14:09:59 -07:00
parent a5141cd39e
commit 618a30e326
1 changed files with 74 additions and 25 deletions
--- a/src/test/streams_tests.cpp
+++ b/src/test/streams_tests.cpp
@@ -14,22 +14,79 @@
 #include <boost/test/unit_test.hpp>

 using namespace std::string_literals;
+using namespace util::hex_literals;

 BOOST_FIXTURE_TEST_SUITE(streams_tests, BasicTestingSetup)

+// Test that obfuscation can be properly reverted even with random chunk sizes.
+BOOST_AUTO_TEST_CASE(xor_roundtrip_random_chunks)
+{
+    auto apply_random_xor_chunks{[&](std::span<std::byte> target, std::span<const std::byte, Obfuscation::KEY_SIZE> obfuscation) {
+        for (size_t offset{0}; offset < target.size();) {
+            const size_t chunk_size{1 + m_rng.randrange(target.size() - offset)};
+            util::Xor(target.subspan(offset, chunk_size), obfuscation, offset);
+            offset += chunk_size;
+        }
+    }};
+
+    for (size_t test{0}; test < 100; ++test) {
+        const size_t write_size{1 + m_rng.randrange(100U)};
+        const std::vector original{m_rng.randbytes<std::byte>(write_size)};
+        std::vector roundtrip{original};
+
+        const auto key_bytes{m_rng.randbool() ? m_rng.randbytes<Obfuscation::KEY_SIZE>() : std::array<std::byte, Obfuscation::KEY_SIZE>{}};
+        apply_random_xor_chunks(roundtrip, key_bytes);
+
+        const bool key_all_zeros{std::ranges::all_of(
+            std::span{key_bytes}.first(std::min(write_size, Obfuscation::KEY_SIZE)), [](auto b) { return b == std::byte{0}; })};
+        BOOST_CHECK(key_all_zeros ? original == roundtrip : original != roundtrip);
+
+        apply_random_xor_chunks(roundtrip, key_bytes);
+        BOOST_CHECK(original == roundtrip);
+    }
+}
+
+// Compares optimized obfuscation against a trivial, byte-by-byte reference implementation
+// with random offsets to ensure proper handling of key wrapping.
+BOOST_AUTO_TEST_CASE(xor_bytes_reference)
+{
+    auto expected_xor{[](std::span<std::byte> target, std::span<const std::byte, Obfuscation::KEY_SIZE> obfuscation, size_t key_offset) {
+        for (auto& b : target) {
+            b ^= obfuscation[key_offset++ % obfuscation.size()];
+        }
+    }};
+
+    for (size_t test{0}; test < 100; ++test) {
+        const size_t write_size{1 + m_rng.randrange(100U)};
+        const size_t key_offset{m_rng.randrange(3 * Obfuscation::KEY_SIZE)}; // Make sure the key can wrap around
+        const size_t write_offset{std::min(write_size, m_rng.randrange(Obfuscation::KEY_SIZE * 2))}; // Write unaligned data
+
+        const auto key_bytes{m_rng.randbool() ? m_rng.randbytes<Obfuscation::KEY_SIZE>() : std::array<std::byte, Obfuscation::KEY_SIZE>{}};
+        const std::vector obfuscation{key_bytes.begin(), key_bytes.end()};
+        std::vector expected{m_rng.randbytes<std::byte>(write_size)};
+        std::vector actual{expected};
+
+        expected_xor(std::span{expected}.subspan(write_offset), key_bytes, key_offset);
+        util::Xor(std::span{actual}.subspan(write_offset), key_bytes, key_offset);
+
+        BOOST_CHECK_EQUAL_COLLECTIONS(expected.begin(), expected.end(), actual.begin(), actual.end());
+    }
+}
+
 BOOST_AUTO_TEST_CASE(xor_file)
 {
    fs::path xor_path{m_args.GetDataDirBase() / "test_xor.bin"};
    auto raw_file{[&](const auto& mode) { return fsbridge::fopen(xor_path, mode); }};
    const std::vector<uint8_t> test1{1, 2, 3};
    const std::vector<uint8_t> test2{4, 5};
-    const std::vector<std::byte> xor_pat{std::byte{0xff}, std::byte{0x00}};
+    const auto xor_pat{"ff00ff00ff00ff00"_hex_v};
+
    {
        // Check errors for missing file
        AutoFile xor_file{raw_file("rb"), xor_pat};
-        BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullpt"});
-        BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullpt"});
-        BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullpt"});
+        BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullptr"});
+        BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullptr"});
+        BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullptr"});
    }
    {
 #ifdef __MINGW64__
@@ -77,7 +134,7 @@ BOOST_AUTO_TEST_CASE(streams_vector_writer)
 {
    unsigned char a(1);
    unsigned char b(2);
-    unsigned char bytes[] = { 3, 4, 5, 6 };
+    unsigned char bytes[] = {3, 4, 5, 6};
    std::vector<unsigned char> vch;

    // Each test runs twice. Serializing a second time at the same starting
@@ -224,34 +281,26 @@ BOOST_AUTO_TEST_CASE(bitstream_reader_writer)

 BOOST_AUTO_TEST_CASE(streams_serializedata_xor)
 {
-    std::vector<std::byte> in;
-
    // Degenerate case
    {
-        DataStream ds{in};
-        ds.Xor({0x00, 0x00});
+        DataStream ds{};
+        ds.Xor("0000000000000000"_hex_v_u8);
        BOOST_CHECK_EQUAL(""s, ds.str());
    }

-    in.push_back(std::byte{0x0f});
-    in.push_back(std::byte{0xf0});
-
-    // Single character key
    {
-        DataStream ds{in};
-        ds.Xor({0xff});
+        const auto obfuscation{"ffffffffffffffff"_hex_v_u8};
+
+        DataStream ds{"0ff0"_hex};
+        ds.Xor(obfuscation);
        BOOST_CHECK_EQUAL("\xf0\x0f"s, ds.str());
    }

-    // Multi character key
-
-    in.clear();
-    in.push_back(std::byte{0xf0});
-    in.push_back(std::byte{0x0f});
-
    {
-        DataStream ds{in};
-        ds.Xor({0xff, 0x0f});
+        const auto obfuscation{"ff0fff0fff0fff0f"_hex_v_u8};
+
+        DataStream ds{"f00f"_hex};
+        ds.Xor(obfuscation);
        BOOST_CHECK_EQUAL("\x0f\x00"s, ds.str());
    }
 }
@@ -564,7 +613,7 @@ BOOST_AUTO_TEST_CASE(buffered_reader_matches_autofile_random_content)
    const FlatFilePos pos{0, 0};

    const FlatFileSeq test_file{m_args.GetDataDirBase(), "buffered_file_test_random", node::BLOCKFILE_CHUNK_SIZE};
-    const std::vector obfuscation{m_rng.randbytes<std::byte>(Obfuscation::KEY_SIZE)};
+    const auto obfuscation{m_rng.randbytes<std::byte>(Obfuscation::KEY_SIZE)};

    // Write out the file with random content
    {
@@ -619,7 +668,7 @@ BOOST_AUTO_TEST_CASE(buffered_writer_matches_autofile_random_content)

    const FlatFileSeq test_buffered{m_args.GetDataDirBase(), "buffered_write_test", node::BLOCKFILE_CHUNK_SIZE};
    const FlatFileSeq test_direct{m_args.GetDataDirBase(), "direct_write_test", node::BLOCKFILE_CHUNK_SIZE};
-    const std::vector obfuscation{m_rng.randbytes<std::byte>(Obfuscation::KEY_SIZE)};
+    const auto obfuscation{m_rng.randbytes<std::byte>(Obfuscation::KEY_SIZE)};

    {
        DataBuffer test_data{m_rng.randbytes<std::byte>(file_size)};