Merge bitcoin/bitcoin#29612: rpc: Optimize serialization and enhance metadata of dumptxoutset output

542e13b293 rpc: Enhance metadata of the dumptxoutset output (Fabian Jahr)
4d8e5edbaa assumeutxo: Add documentation on dumptxoutset serialization format (Fabian Jahr)
c14ed7f384 assumeutxo: Add test for changed coin size value (Fabian Jahr)
de95953d87 rpc: Optimize serialization disk space of dumptxoutset (Fabian Jahr)

Pull request description:

  The second attempt at implementing the `dumptxoutset` space optimization as suggested in #25675. Closes #25675.

  This builds on the work done in #26045, addresses open feedback, adds some further improvements (most importantly usage of compact size), documentation, and an additional test.

  The [original snapshot at height 830,000](https://github.com/bitcoin/bitcoin/pull/29551) came in at 10.82 GB. With this change, the same snapshot is 8.94 GB, a reduction of 17.4%.

  This also enhances the metadata of the output file and adds the following data to allow for better error handling and make future upgrades easier:
  - A newly introduced utxo set magic
  - A version number
  - The network magic
  - The block height

ACKs for top commit:
  achow101:
    ACK 542e13b293
  TheCharlatan:
    Re-ACK 542e13b293
  theStack:
    ACK 542e13b293

Tree-SHA512: 0825d30e5c3c364062db3c6cbca4e3c680e6e6d3e259fa70c0c2b2a7020f24a47406a623582040988d5c7745b08649c31110df4c10656aa25f3f27eb35843d99
This commit is contained in:
Ava Chow
2024-05-23 12:31:23 -04:00
9 changed files with 261 additions and 76 deletions

View File

@@ -34,6 +34,7 @@
#include <rpc/server_util.h>
#include <rpc/util.h>
#include <script/descriptor.h>
#include <serialize.h>
#include <streams.h>
#include <sync.h>
#include <txdb.h>
@@ -2696,29 +2697,60 @@ UniValue CreateUTXOSnapshot(
tip->nHeight, tip->GetBlockHash().ToString(),
fs::PathToString(path), fs::PathToString(temppath)));
SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};
afile << metadata;
COutPoint key;
Txid last_hash;
Coin coin;
unsigned int iter{0};
size_t written_coins_count{0};
std::vector<std::pair<uint32_t, Coin>> coins;
// To reduce space the serialization format of the snapshot avoids
// duplication of tx hashes. The code takes advantage of the guarantee by
// leveldb that keys are lexicographically sorted.
// In the coins vector we collect all coins that belong to a certain tx hash
// (key.hash) and when we have them all (key.hash != last_hash) we write
// them to file using the below lambda function.
// See also https://github.com/bitcoin/bitcoin/issues/25675
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
afile << last_hash;
WriteCompactSize(afile, coins.size());
for (const auto& [n, coin] : coins) {
WriteCompactSize(afile, n);
afile << coin;
++written_coins_count;
}
};
pcursor->GetKey(key);
last_hash = key.hash;
while (pcursor->Valid()) {
if (iter % 5000 == 0) node.rpc_interruption_point();
++iter;
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
afile << key;
afile << coin;
if (key.hash != last_hash) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
last_hash = key.hash;
coins.clear();
}
coins.emplace_back(key.n, coin);
}
pcursor->Next();
}
if (!coins.empty()) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
}
CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
afile.fclose();
UniValue result(UniValue::VOBJ);
result.pushKV("coins_written", maybe_stats->coins_count);
result.pushKV("coins_written", written_coins_count);
result.pushKV("base_hash", tip->GetBlockHash().ToString());
result.pushKV("base_height", tip->nHeight);
result.pushKV("path", path.utf8string());
@@ -2778,12 +2810,22 @@ static RPCHelpMan loadtxoutset()
}
SnapshotMetadata metadata;
afile >> metadata;
try {
afile >> metadata;
} catch (const std::ios_base::failure& e) {
throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
}
uint256 base_blockhash = metadata.m_base_blockhash;
int base_blockheight = metadata.m_base_blockheight;
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); });
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
"assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
"assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
base_blockhash.ToString(),
base_blockheight,
heights_formatted));
}
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
return chainman.m_blockman.LookupBlockIndex(base_blockhash));