From d5104cfbaeb82081e4b00a5084516555e446dcdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 27 Mar 2025 18:40:08 +0100 Subject: [PATCH] prevector: store `P2WSH`/`P2TR`/`P2PK` scripts inline The current `prevector` size of 28 bytes (chosen to fill the `sizeof(CScript)` aligned size) was introduced in 2015 (https://github.com/bitcoin/bitcoin/pull/6914) before SegWit and TapRoot. However, the increasingly common `P2WSH` and `P2TR` scripts are both 34 bytes, and are forced to use heap (re)allocation rather than efficient inline storage. The core trade-off of this change is to eliminate heap allocations for common 34-36 byte scripts at the cost of increasing the base memory footprint of all `CScript` objects by 8 bytes (while still respecting peak memory usage defined by `-dbcache`). Increasing the `prevector` size allows these scripts to be stored inline, avoiding extra heap allocations, reducing potential memory fragmentation, and improving performance during cache flushes. Massif analysis confirms a lower stable memory usage after flushing, suggesting the elimination of heap allocations outweighs the larger base size for common workloads. Due to memory alignment, increasing the `prevector` size to 36 bytes doesn't change the overall `sizeof(CScript)` compared to an increase to 34 bytes, allowing us to include `P2PK` scripts as well at no additional memory cost. Performance benchmarks for AssumeUTXO load and flush show: * Small dbcache (450MB): ~1-3% performance improvement (despite more frequent flushes) * Large dbcache (4500MB): ~6-8% performance improvement due to fewer heap allocations (and basically the number of flushes) * Very large dbcache (4500MB): ~5-6% performance improvement due to fewer heap allocations (and memory limit not being reached, so there's no memory penalty) Full IBD and reindex-chainstate with larger `dbcache` values also show an overall ~3-4% speedup. Co-authored-by: Ava Chow Co-authored-by: Andrew Toth Co-authored-by: maflcko <6399679+maflcko@users.noreply.github.com> --- src/script/script.h | 4 +--- src/test/script_tests.cpp | 18 +++++++++--------- src/test/validation_flush_tests.cpp | 5 ++--- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/script/script.h b/src/script/script.h index cd19f09436f..b556aae6060 100644 --- a/src/script/script.h +++ b/src/script/script.h @@ -403,10 +403,8 @@ private: /** * We use a prevector for the script to reduce the considerable memory overhead * of vectors in cases where they normally contain a small number of small elements. - * Tests in October 2015 showed use of this reduced dbcache memory usage by 23% - * and made an initial sync 13% faster. */ -using CScriptBase = prevector<28, uint8_t>; +using CScriptBase = prevector<36, uint8_t>; bool GetScriptOp(CScriptBase::const_iterator& pc, CScriptBase::const_iterator end, opcodetype& opcodeRet, std::vector* pvchRet); diff --git a/src/test/script_tests.cpp b/src/test/script_tests.cpp index d5b0dfab189..5d7593020b4 100644 --- a/src/test/script_tests.cpp +++ b/src/test/script_tests.cpp @@ -1175,10 +1175,10 @@ static TxoutType GetTxoutType(const CScript& output_script) BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) { BOOST_CHECK_EQUAL(sizeof(CompressedScript), 40); - BOOST_CHECK_EQUAL(sizeof(CScriptBase), 32); + BOOST_CHECK_EQUAL(sizeof(CScriptBase), 40); BOOST_CHECK_NE(sizeof(CScriptBase), sizeof(prevector)); // CScriptBase size should be set to avoid wasting space in padding - BOOST_CHECK_EQUAL(sizeof(CScript), 32); - BOOST_CHECK_EQUAL(sizeof(CTxOut), 40); + BOOST_CHECK_EQUAL(sizeof(CScript), 40); + BOOST_CHECK_EQUAL(sizeof(CTxOut), 48); CKey dummy_key; dummy_key.MakeNewKey(/*fCompressed=*/true); @@ -1212,25 +1212,25 @@ BOOST_AUTO_TEST_CASE(script_size_and_capacity_test) CHECK_SCRIPT_STATIC_SIZE(script, 25); } - // P2WSH needs extra allocation + // P2WSH has direct allocation { const auto script{GetScriptForDestination(WitnessV0ScriptHash{CScript{} << OP_TRUE})}; BOOST_CHECK(script.IsPayToWitnessScriptHash()); - CHECK_SCRIPT_DYNAMIC_SIZE(script, 34, 34); + CHECK_SCRIPT_STATIC_SIZE(script, 34); } - // P2TR needs extra allocation + // P2TR has direct allocation { const auto script{GetScriptForDestination(WitnessV1Taproot{XOnlyPubKey{dummy_pubkey}})}; BOOST_CHECK_EQUAL(GetTxoutType(script), TxoutType::WITNESS_V1_TAPROOT); - CHECK_SCRIPT_DYNAMIC_SIZE(script, 34, 34); + CHECK_SCRIPT_STATIC_SIZE(script, 34); } - // Compressed P2PK needs extra allocation + // Compressed P2PK has direct allocation { const auto script{GetScriptForRawPubKey(dummy_pubkey)}; BOOST_CHECK_EQUAL(GetTxoutType(script), TxoutType::PUBKEY); - CHECK_SCRIPT_DYNAMIC_SIZE(script, 35, 35); + CHECK_SCRIPT_STATIC_SIZE(script, 35); } // Uncompressed P2PK needs extra allocation diff --git a/src/test/validation_flush_tests.cpp b/src/test/validation_flush_tests.cpp index c325f7deb2b..4d6017a0e30 100644 --- a/src/test/validation_flush_tests.cpp +++ b/src/test/validation_flush_tests.cpp @@ -26,9 +26,8 @@ BOOST_AUTO_TEST_CASE(getcoinscachesizestate) LOCK(::cs_main); auto& view = chainstate.CoinsTip(); - // The number of bytes consumed by coin's heap data, i.e. CScript - // (prevector<28, unsigned char>) when assigned 56 bytes of data per above. - // + // The number of bytes consumed by coin's heap data, i.e. + // CScript (prevector<36, unsigned char>) when assigned 56 bytes of data per above. // See also: Coin::DynamicMemoryUsage(). constexpr unsigned int COIN_SIZE = is_64_bit ? 80 : 64;