From f0207e00303a1030eca795ede231e3c0d94df061 Mon Sep 17 00:00:00 2001 From: TheCharlatan Date: Tue, 25 Jul 2023 11:32:09 +0200 Subject: [PATCH] blockstorage: Return on fatal block file flush error By returning an error code if `FlushBlockFile` fails, the caller now has to explicitly handle block file flushing errors. Before this change such errors were non-explicitly ignored without a clear rationale. Prior to this patch `FlushBlockFile` may have failed silently in `Chainstate::FlushStateToDisk`. Improve this with a log line. Also add a TODO comment to flesh out whether returning early in the case of an error is appropriate or not. Returning early might be appropriate to prohibit `WriteBlockIndexDB` from writing a block index entry that does not refer to a fully flushed block. Besides `Chainstate::FlushStateToDisk`, `FlushBlockFile` is also called by `FindBlockPos`. Don't change the abort behavior there, since we don't want to fail the function if the flushing of already written blocks fails. Instead, just document it. --- src/node/blockstorage.cpp | 21 ++++++++++++++++++--- src/node/blockstorage.h | 5 ++++- src/validation.cpp | 6 +++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 0d25c798ce3..d5f4fed995e 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -546,8 +546,9 @@ void BlockManager::FlushUndoFile(int block_file, bool finalize) } } -void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) +bool BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) { + bool success = true; LOCK(cs_LastBlockFile); if (m_blockfile_info.size() < 1) { @@ -555,17 +556,19 @@ void BlockManager::FlushBlockFile(bool fFinalize, bool finalize_undo) // chainstate init, when we call ChainstateManager::MaybeRebalanceCaches() (which // then calls FlushStateToDisk()), resulting in a call to this function before we // have populated `m_blockfile_info` via LoadBlockIndexDB(). - return; + return true; } assert(static_cast(m_blockfile_info.size()) > m_last_blockfile); FlatFilePos block_pos_old(m_last_blockfile, m_blockfile_info[m_last_blockfile].nSize); if (!BlockFileSeq().Flush(block_pos_old, fFinalize)) { m_opts.notifications.flushError("Flushing block file to disk failed. This is likely the result of an I/O error."); + success = false; } // we do not always flush the undo file, as the chain tip may be lagging behind the incoming blocks, // e.g. during IBD or a sync after a node going offline if (!fFinalize || finalize_undo) FlushUndoFile(m_last_blockfile, finalize_undo); + return success; } uint64_t BlockManager::CalculateCurrentUsage() @@ -658,7 +661,19 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne if (!fKnown) { LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s\n", m_last_blockfile, m_blockfile_info[m_last_blockfile].ToString()); } - FlushBlockFile(!fKnown, finalize_undo); + + // Do not propagate the return code. The flush concerns a previous block + // and undo file that has already been written to. If a flush fails + // here, and we crash, there is no expected additional block data + // inconsistency arising from the flush failure here. However, the undo + // data may be inconsistent after a crash if the flush is called during + // a reindex. A flush error might also leave some of the data files + // untrimmed. + if (!FlushBlockFile(!fKnown, finalize_undo)) { + LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning, + "Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n", + m_last_blockfile, !fKnown, finalize_undo, nFile); + } m_last_blockfile = nFile; m_undo_height_in_last_blockfile = 0; // No undo data yet in the new file, so reset our undo-height tracking. } diff --git a/src/node/blockstorage.h b/src/node/blockstorage.h index 0e8b9abce97..3e0962386f7 100644 --- a/src/node/blockstorage.h +++ b/src/node/blockstorage.h @@ -91,7 +91,10 @@ private: */ bool LoadBlockIndex() EXCLUSIVE_LOCKS_REQUIRED(cs_main); - void FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); + + /** Return false if block file flushing fails. */ + [[nodiscard]] bool FlushBlockFile(bool fFinalize = false, bool finalize_undo = false); + void FlushUndoFile(int block_file, bool finalize = false); [[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown); bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize); diff --git a/src/validation.cpp b/src/validation.cpp index cd6654abe48..a72e5159eff 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -2511,7 +2511,11 @@ bool Chainstate::FlushStateToDisk( LOG_TIME_MILLIS_WITH_CATEGORY("write block and undo data to disk", BCLog::BENCH); // First make sure all block and undo data is flushed to disk. - m_blockman.FlushBlockFile(); + // TODO: Handle return error, or add detailed comment why it is + // safe to not return an error upon failure. + if (!m_blockman.FlushBlockFile()) { + LogPrintLevel(BCLog::VALIDATION, BCLog::Level::Warning, "%s: Failed to flush block file.\n", __func__); + } } // Then update all block file information (which may refer to block and undo files).