diff --git a/src/net_processing.cpp b/src/net_processing.cpp index b39412158f8..10311a10be1 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -110,8 +110,11 @@ static constexpr auto GETDATA_TX_INTERVAL{60s}; static const unsigned int MAX_GETDATA_SZ = 1000; /** Number of blocks that can be requested at any given time from a single peer. */ static const int MAX_BLOCKS_IN_TRANSIT_PER_PEER = 16; -/** Time during which a peer must stall block download progress before being disconnected. */ -static constexpr auto BLOCK_STALLING_TIMEOUT{2s}; +/** Default time during which a peer must stall block download progress before being disconnected. + * the actual timeout is increased temporarily if peers are disconnected for hitting the timeout */ +static constexpr auto BLOCK_STALLING_TIMEOUT_DEFAULT{2s}; +/** Maximum timeout for stalling block download. */ +static constexpr auto BLOCK_STALLING_TIMEOUT_MAX{64s}; /** Number of headers sent in one getheaders result. We rely on the assumption that if a peer sends * less than this number, we reached its tip. Changing this value is a protocol upgrade. */ static const unsigned int MAX_HEADERS_RESULTS = 2000; @@ -705,6 +708,9 @@ private: /** Number of preferable block download peers. */ int m_num_preferred_download_peers GUARDED_BY(cs_main){0}; + /** Stalling timeout for blocks in IBD */ + std::atomic m_block_stalling_timeout{BLOCK_STALLING_TIMEOUT_DEFAULT}; + bool AlreadyHaveTx(const GenTxid& gtxid) EXCLUSIVE_LOCKS_REQUIRED(cs_main, !m_recent_confirmed_transactions_mutex); @@ -1700,7 +1706,8 @@ void PeerManagerImpl::StartScheduledTasks(CScheduler& scheduler) /** * Evict orphan txn pool entries based on a newly connected * block, remember the recently confirmed transactions, and delete tracked - * announcements for them. Also save the time of the last tip update. + * announcements for them. Also save the time of the last tip update and + * possibly reduce dynamic block stalling timeout. */ void PeerManagerImpl::BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindex) { @@ -1723,6 +1730,16 @@ void PeerManagerImpl::BlockConnected(const std::shared_ptr& pblock m_txrequest.ForgetTxHash(ptx->GetWitnessHash()); } } + + // In case the dynamic timeout was doubled once or more, reduce it slowly back to its default value + auto stalling_timeout = m_block_stalling_timeout.load(); + Assume(stalling_timeout >= BLOCK_STALLING_TIMEOUT_DEFAULT); + if (stalling_timeout != BLOCK_STALLING_TIMEOUT_DEFAULT) { + const auto new_timeout = std::max(std::chrono::duration_cast(stalling_timeout * 0.85), BLOCK_STALLING_TIMEOUT_DEFAULT); + if (m_block_stalling_timeout.compare_exchange_strong(stalling_timeout, new_timeout)) { + LogPrint(BCLog::NET, "Decreased stalling timeout to %d seconds\n", new_timeout.count()); + } + } } void PeerManagerImpl::BlockDisconnected(const std::shared_ptr &block, const CBlockIndex* pindex) @@ -5225,12 +5242,19 @@ bool PeerManagerImpl::SendMessages(CNode* pto) m_connman.PushMessage(pto, msgMaker.Make(NetMsgType::INV, vInv)); // Detect whether we're stalling - if (state.m_stalling_since.count() && state.m_stalling_since < current_time - BLOCK_STALLING_TIMEOUT) { + auto stalling_timeout = m_block_stalling_timeout.load(); + if (state.m_stalling_since.count() && state.m_stalling_since < current_time - stalling_timeout) { // Stalling only triggers when the block download window cannot move. During normal steady state, // the download window should be much larger than the to-be-downloaded set of blocks, so disconnection // should only happen during initial block download. LogPrintf("Peer=%d is stalling block download, disconnecting\n", pto->GetId()); pto->fDisconnect = true; + // Increase timeout for the next peer so that we don't disconnect multiple peers if our own + // bandwidth is insufficient. + const auto new_timeout = std::min(2 * stalling_timeout, BLOCK_STALLING_TIMEOUT_MAX); + if (stalling_timeout != new_timeout && m_block_stalling_timeout.compare_exchange_strong(stalling_timeout, new_timeout)) { + LogPrint(BCLog::NET, "Increased stalling timeout temporarily to %d seconds\n", m_block_stalling_timeout.load().count()); + } return true; } // In case there is a block that has been in flight from this peer for block_interval * (1 + 0.5 * N)