From 586f2a6b1b53cb57342fa7c1135f1ea1e9c63705 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Jan 2025 14:02:10 -0500 Subject: [PATCH 1/2] coins: keep track of number of dirty entries in cache --- src/coins.cpp | 27 +++++++++++++++++++++++---- src/coins.h | 7 ++++++- src/test/coins_tests.cpp | 9 +++++++-- src/test/fuzz/coins_view.cpp | 4 +++- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/coins.cpp b/src/coins.cpp index 24a102b0bc1..30d82e1d08c 100644 --- a/src/coins.cpp +++ b/src/coins.cpp @@ -78,6 +78,7 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi bool fresh = false; if (!inserted) { cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage(); + m_dirty_count -= it->second.IsDirty(); } if (!possible_overwrite) { if (!it->second.coin.IsSpent()) { @@ -100,6 +101,7 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi } it->second.coin = std::move(coin); CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; if (fresh) CCoinsCacheEntry::SetFresh(*it, m_sentinel); cachedCoinsUsage += it->second.coin.DynamicMemoryUsage(); TRACEPOINT(utxocache, add, @@ -113,7 +115,10 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin) { cachedCoinsUsage += coin.DynamicMemoryUsage(); auto [it, inserted] = cacheCoins.try_emplace(std::move(outpoint), std::move(coin)); - if (inserted) CCoinsCacheEntry::SetDirty(*it, m_sentinel); + if (inserted) { + CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; + } } void AddCoins(CCoinsViewCache& cache, const CTransaction &tx, int nHeight, bool check_for_overwrite) { @@ -131,6 +136,7 @@ bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) { CCoinsMap::iterator it = FetchCoin(outpoint); if (it == cacheCoins.end()) return false; cachedCoinsUsage -= it->second.coin.DynamicMemoryUsage(); + m_dirty_count -= it->second.IsDirty(); TRACEPOINT(utxocache, spent, outpoint.hash.data(), (uint32_t)outpoint.n, @@ -144,6 +150,7 @@ bool CCoinsViewCache::SpendCoin(const COutPoint &outpoint, Coin* moveout) { cacheCoins.erase(it); } else { CCoinsCacheEntry::SetDirty(*it, m_sentinel); + ++m_dirty_count; it->second.coin.Clear(); } return true; @@ -204,6 +211,7 @@ bool CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &ha } cachedCoinsUsage += entry.coin.DynamicMemoryUsage(); CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + ++m_dirty_count; // We can mark it FRESH in the parent if it was FRESH in the child // Otherwise it might have just been flushed from the parent's cache // and already exist in the grandparent @@ -223,6 +231,7 @@ bool CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &ha // The grandparent cache does not have an entry, and the coin // has been spent. We can just delete it from the parent cache. cachedCoinsUsage -= itUs->second.coin.DynamicMemoryUsage(); + m_dirty_count -= itUs->second.IsDirty(); cacheCoins.erase(itUs); } else { // A normal modification. @@ -235,7 +244,10 @@ bool CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &ha itUs->second.coin = it->second.coin; } cachedCoinsUsage += itUs->second.coin.DynamicMemoryUsage(); - CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + if (!itUs->second.IsDirty()) { + ++m_dirty_count; + CCoinsCacheEntry::SetDirty(*itUs, m_sentinel); + } // NOTE: It isn't safe to mark the coin as FRESH in the parent // cache. If it already existed and was spent in the parent // cache then marking it FRESH would prevent that spentness @@ -248,19 +260,20 @@ bool CCoinsViewCache::BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &ha } bool CCoinsViewCache::Flush() { - auto cursor{CoinsViewCacheCursor(cachedCoinsUsage, m_sentinel, cacheCoins, /*will_erase=*/true)}; + auto cursor{CoinsViewCacheCursor(cachedCoinsUsage, m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/true)}; bool fOk = base->BatchWrite(cursor, hashBlock); if (fOk) { cacheCoins.clear(); ReallocateCache(); } cachedCoinsUsage = 0; + m_dirty_count = 0; return fOk; } bool CCoinsViewCache::Sync() { - auto cursor{CoinsViewCacheCursor(cachedCoinsUsage, m_sentinel, cacheCoins, /*will_erase=*/false)}; + auto cursor{CoinsViewCacheCursor(cachedCoinsUsage, m_dirty_count, m_sentinel, cacheCoins, /*will_erase=*/false)}; bool fOk = base->BatchWrite(cursor, hashBlock); if (fOk) { if (m_sentinel.second.Next() != &m_sentinel) { @@ -306,6 +319,7 @@ void CCoinsViewCache::ReallocateCache() { // Cache should be empty when we're calling this. assert(cacheCoins.size() == 0); + Assume(m_dirty_count == 0); cacheCoins.~CCoinsMap(); m_cache_coins_memory_resource.~CCoinsMapMemoryResource(); ::new (&m_cache_coins_memory_resource) CCoinsMapMemoryResource{}; @@ -316,6 +330,7 @@ void CCoinsViewCache::SanityCheck() const { size_t recomputed_usage = 0; size_t count_flagged = 0; + size_t dirty_count = 0; for (const auto& [_, entry] : cacheCoins) { unsigned attr = 0; if (entry.IsDirty()) attr |= 1; @@ -327,6 +342,9 @@ void CCoinsViewCache::SanityCheck() const // Recompute cachedCoinsUsage. recomputed_usage += entry.coin.DynamicMemoryUsage(); + // Recompute m_num_dirty; + dirty_count += entry.IsDirty(); + // Count the number of entries we expect in the linked list. if (entry.IsDirty() || entry.IsFresh()) ++count_flagged; } @@ -343,6 +361,7 @@ void CCoinsViewCache::SanityCheck() const } assert(count_linked == count_flagged); assert(recomputed_usage == cachedCoinsUsage); + assert(dirty_count == m_dirty_count); } static const size_t MIN_TRANSACTION_OUTPUT_WEIGHT = WITNESS_SCALE_FACTOR * ::GetSerializeSize(CTxOut()); diff --git a/src/coins.h b/src/coins.h index 61fb4af6420..15d9b479f79 100644 --- a/src/coins.h +++ b/src/coins.h @@ -272,10 +272,11 @@ struct CoinsViewCacheCursor //! Calling CCoinsMap::clear() afterwards is faster because a CoinsCachePair cannot be coerced back into a //! CCoinsMap::iterator to be erased, and must therefore be looked up again by key in the CCoinsMap before being erased. CoinsViewCacheCursor(size_t& usage LIFETIMEBOUND, + size_t& dirty LIFETIMEBOUND, CoinsCachePair& sentinel LIFETIMEBOUND, CCoinsMap& map LIFETIMEBOUND, bool will_erase) noexcept - : m_usage(usage), m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {} + : m_usage(usage), m_dirty(dirty), m_sentinel(sentinel), m_map(map), m_will_erase(will_erase) {} inline CoinsCachePair* Begin() const noexcept { return m_sentinel.second.Next(); } inline CoinsCachePair* End() const noexcept { return &m_sentinel; } @@ -284,6 +285,7 @@ struct CoinsViewCacheCursor inline CoinsCachePair* NextAndMaybeErase(CoinsCachePair& current) noexcept { const auto next_entry{current.second.Next()}; + m_dirty -= current.second.IsDirty(); // If we are not going to erase the cache, we must still erase spent entries. // Otherwise, clear the state of the entry. if (!m_will_erase) { @@ -300,6 +302,7 @@ struct CoinsViewCacheCursor inline bool WillErase(CoinsCachePair& current) const noexcept { return m_will_erase || current.second.coin.IsSpent(); } private: size_t& m_usage; + size_t& m_dirty; CoinsCachePair& m_sentinel; CCoinsMap& m_map; bool m_will_erase; @@ -377,6 +380,8 @@ protected: /* Cached dynamic memory usage for the inner Coin objects. */ mutable size_t cachedCoinsUsage{0}; + /* Running count of dirty Coin cache entries. */ + mutable size_t m_dirty_count{0}; public: CCoinsViewCache(CCoinsView *baseIn, bool deterministic = false); diff --git a/src/test/coins_tests.cpp b/src/test/coins_tests.cpp index c46144b34b4..261859ff9a9 100644 --- a/src/test/coins_tests.cpp +++ b/src/test/coins_tests.cpp @@ -100,6 +100,7 @@ public: CCoinsMap& map() const { return cacheCoins; } CoinsCachePair& sentinel() const { return m_sentinel; } size_t& usage() const { return cachedCoinsUsage; } + size_t& GetDirtyCount() const { return m_dirty_count; } }; } // namespace @@ -652,7 +653,8 @@ static void WriteCoinsViewEntry(CCoinsView& view, const MaybeCoin& cache_coin) CCoinsMapMemoryResource resource; CCoinsMap map{0, CCoinsMap::hasher{}, CCoinsMap::key_equal{}, &resource}; auto usage{cache_coin ? InsertCoinsMapEntry(map, sentinel, *cache_coin) : 0}; - auto cursor{CoinsViewCacheCursor(usage, sentinel, map, /*will_erase=*/true)}; + size_t dirty = cache_coin ? cache_coin->IsDirty() : 0; + auto cursor{CoinsViewCacheCursor(usage, dirty, sentinel, map, /*will_erase=*/true)}; BOOST_CHECK(view.BatchWrite(cursor, {})); } @@ -663,7 +665,10 @@ public: { auto base_cache_coin{base_value == ABSENT ? MISSING : CoinEntry{base_value, CoinEntry::State::DIRTY}}; WriteCoinsViewEntry(base, base_cache_coin); - if (cache_coin) cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin); + if (cache_coin) { + cache.usage() += InsertCoinsMapEntry(cache.map(), cache.sentinel(), *cache_coin); + cache.GetDirtyCount() += cache_coin->IsDirty(); + } } CCoinsView root; diff --git a/src/test/fuzz/coins_view.cpp b/src/test/fuzz/coins_view.cpp index 9c6aa6e7a1e..a9b91b9efe1 100644 --- a/src/test/fuzz/coins_view.cpp +++ b/src/test/fuzz/coins_view.cpp @@ -123,6 +123,7 @@ FUZZ_TARGET(coins_view, .init = initialize_coins_view) CoinsCachePair sentinel{}; sentinel.second.SelfRef(sentinel); size_t usage{0}; + size_t num_dirty{0}; CCoinsMapMemoryResource resource; CCoinsMap coins_map{0, SaltedOutpointHasher{/*deterministic=*/true}, CCoinsMap::key_equal{}, &resource}; LIMITED_WHILE(good_data && fuzzed_data_provider.ConsumeBool(), 10'000) @@ -144,10 +145,11 @@ FUZZ_TARGET(coins_view, .init = initialize_coins_view) if (dirty) CCoinsCacheEntry::SetDirty(*it, sentinel); if (fresh) CCoinsCacheEntry::SetFresh(*it, sentinel); usage += it->second.coin.DynamicMemoryUsage(); + num_dirty += dirty; } bool expected_code_path = false; try { - auto cursor{CoinsViewCacheCursor(usage, sentinel, coins_map, /*will_erase=*/true)}; + auto cursor{CoinsViewCacheCursor(usage, num_dirty, sentinel, coins_map, /*will_erase=*/true)}; coins_view_cache.BatchWrite(cursor, fuzzed_data_provider.ConsumeBool() ? ConsumeUInt256(fuzzed_data_provider) : coins_view_cache.GetBestBlock()); expected_code_path = true; } catch (const std::logic_error& e) { From 15619a1c99f6cc815291adf150bb8049c75a91cc Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Tue, 21 Jan 2025 14:20:54 -0500 Subject: [PATCH 2/2] validation: use dirty coins count in flush warnings --- src/coins.h | 3 +++ src/validation.cpp | 15 ++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/coins.h b/src/coins.h index 15d9b479f79..f897bce7495 100644 --- a/src/coins.h +++ b/src/coins.h @@ -468,6 +468,9 @@ public: //! Calculate the size of the cache (in number of transaction outputs) unsigned int GetCacheSize() const; + //! Calculate the number of dirty cache entries (transaction outputs) + size_t GetDirtyCount() const noexcept { return m_dirty_count; } + //! Calculate the size of the cache (in bytes) size_t DynamicMemoryUsage() const; diff --git a/src/validation.cpp b/src/validation.cpp index 64588e802d7..730cc4556cd 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -88,8 +88,8 @@ using node::CBlockIndexHeightOnlyComparator; using node::CBlockIndexWorkComparator; using node::SnapshotMetadata; -/** Size threshold for warning about slow UTXO set flush to disk. */ -static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB +/** Threshold for warning when writing this many dirty cache entries to disk. */ +static constexpr size_t WARN_FLUSH_COINS_COUNT = 10'000'000; /** Time to wait between writing blocks/block index to disk. */ static constexpr std::chrono::hours DATABASE_WRITE_INTERVAL{1}; /** Time to wait between flushing chainstate to disk. */ @@ -2828,7 +2828,8 @@ bool Chainstate::FlushStateToDisk( bool full_flush_completed = false; const size_t coins_count = CoinsTip().GetCacheSize(); - const size_t coins_mem_usage = CoinsTip().DynamicMemoryUsage(); + [[maybe_unused]] const size_t coins_mem_usage = CoinsTip().DynamicMemoryUsage(); + const size_t coins_dirty_count = CoinsTip().GetDirtyCount(); try { { @@ -2931,16 +2932,16 @@ bool Chainstate::FlushStateToDisk( } // Flush best chain related state. This can only be done if the blocks / block index write was also done. if (fDoFullFlush && !CoinsTip().GetBestBlock().IsNull()) { - if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); - LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", - coins_count, coins_mem_usage >> 10), BCLog::BENCH); + if (coins_dirty_count >= WARN_FLUSH_COINS_COUNT) LogWarning("Flushing large (%d entries) UTXO set to disk, it may take several minutes", coins_dirty_count); + LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d out of %d cached coins)", + coins_dirty_count, coins_count), BCLog::BENCH); // Typical Coin structures on disk are around 48 bytes in size. // Pushing a new one to the database can cause it to be written // twice (once in the log, and once in the tables). This is already // an overestimation, as most will delete an existing entry or // overwrite one. Still, use a conservative safety factor of 2. - if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { + if (!CheckDiskSpace(m_chainman.m_options.datadir, 2 * 2 * 48 * coins_dirty_count)) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); } // Flush the chainstate (which may refer to block index entries).