Merge bitcoin/bitcoin#30807: Fix peers abruptly disconnecting from AssumeUTXO nodes during IBD

992f83bb6f test: add coverage for assumeUTXO honest peers disconnection (furszy)
6d5812e5c8 assumeUTXO: fix peers disconnection during sync (furszy)

Pull request description:

  Because AssumeUTXO nodes prioritize tip synchronization, they relay their local
  address through the network before completing the background chain sync.
  This, combined with the advertising of full-node service (`NODE_NETWORK`), can
  result in an honest peer in IBD connecting to the AssumeUTXO node (while syncing)
  and requesting an historical block the node does not have. This behavior leads to
  an abrupt disconnection due to perceived unresponsiveness from the AssumeUTXO
  node.

  This lack of response occurs because nodes ignore `getdata` requests when they do
  not have the block data available (further discussion can be found in #30385).

  Fix this by refraining from signaling full-node service support while the
  background chain is being synced. During this period, the node will only
  signal `NODE_NETWORK_LIMITED` support. Then, full-node (`NODE_NETWORK`)
  support will be re-enabled once the background chain sync is completed.

  Thanks mzumsande for a post-#30385 convo too.

  Testing notes:
  Just cherry-pick the second commit (bb08c22) on master.
  It will fail there, due to the IBD node requesting historical blocks to the snapshot
  node - which is bad because the snapshot node will ignore the requests and
  stall + disconnect after some time.

ACKs for top commit:
  achow101:
    ACK 992f83bb6f
  naumenkogs:
    ACK 992f83bb6f
  mzumsande:
    ACK 992f83bb6f

Tree-SHA512: fef525d1cf3200c2dd89a346be9c82d77f2e28ddaaea1f490a435e180d1a47a371cadea508349777d740ab56e94be536ad8f7d61cc81f6550c58b609b3779ed3
This commit is contained in:
Ava Chow
2024-09-11 13:37:40 -04:00
7 changed files with 147 additions and 11 deletions

View File

@@ -1579,7 +1579,12 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
// This is defined and set here instead of inline in validation.h to avoid a hard
// dependency between validation and index/base, since the latter is not in
// libbitcoinkernel.
chainman.restart_indexes = [&node]() {
chainman.snapshot_download_completed = [&node]() {
if (!node.chainman->m_blockman.IsPruneMode()) {
LogPrintf("[snapshot] re-enabling NODE_NETWORK services\n");
node.connman->AddLocalServices(NODE_NETWORK);
}
LogPrintf("[snapshot] restarting indexes\n");
// Drain the validation interface queue to ensure that the old indexes
@@ -1716,8 +1721,13 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
}
}
} else {
LogPrintf("Setting NODE_NETWORK on non-prune mode\n");
nLocalServices = ServiceFlags(nLocalServices | NODE_NETWORK);
// Prior to setting NODE_NETWORK, check if we can provide historical blocks.
if (!WITH_LOCK(chainman.GetMutex(), return chainman.BackgroundSyncInProgress())) {
LogPrintf("Setting NODE_NETWORK on non-prune mode\n");
nLocalServices = ServiceFlags(nLocalServices | NODE_NETWORK);
} else {
LogPrintf("Running node in NODE_NETWORK_LIMITED mode until snapshot background sync completes\n");
}
}
// ********************************************************* Step 11: import blocks

View File

@@ -1791,7 +1791,8 @@ void CConnman::CreateNodeFromAcceptedSocket(std::unique_ptr<Sock>&& sock,
const bool inbound_onion = std::find(m_onion_binds.begin(), m_onion_binds.end(), addr_bind) != m_onion_binds.end();
// The V2Transport transparently falls back to V1 behavior when an incoming V1 connection is
// detected, so use it whenever we signal NODE_P2P_V2.
const bool use_v2transport(nLocalServices & NODE_P2P_V2);
ServiceFlags local_services = GetLocalServices();
const bool use_v2transport(local_services & NODE_P2P_V2);
CNode* pnode = new CNode(id,
std::move(sock),
@@ -1809,7 +1810,7 @@ void CConnman::CreateNodeFromAcceptedSocket(std::unique_ptr<Sock>&& sock,
.use_v2transport = use_v2transport,
});
pnode->AddRef();
m_msgproc->InitializeNode(*pnode, nLocalServices);
m_msgproc->InitializeNode(*pnode, local_services);
{
LOCK(m_nodes_mutex);
m_nodes.push_back(pnode);

View File

@@ -1221,6 +1221,11 @@ public:
//! that peer during `net_processing.cpp:PushNodeVersion()`.
ServiceFlags GetLocalServices() const;
//! Updates the local services that this node advertises to other peers
//! during connection handshake.
void AddLocalServices(ServiceFlags services) { nLocalServices = ServiceFlags(nLocalServices | services); };
void RemoveLocalServices(ServiceFlags services) { nLocalServices = ServiceFlags(nLocalServices & ~services); }
uint64_t GetMaxOutboundTarget() const EXCLUSIVE_LOCKS_REQUIRED(!m_total_bytes_sent_mutex);
std::chrono::seconds GetMaxOutboundTimeframe() const;
@@ -1460,11 +1465,12 @@ private:
* This data is replicated in each Peer instance we create.
*
* This data is not marked const, but after being set it should not
* change.
* change. Unless AssumeUTXO is started, in which case, the peer
* will be limited until the background chain sync finishes.
*
* \sa Peer::our_services
*/
ServiceFlags nLocalServices;
std::atomic<ServiceFlags> nLocalServices;
std::unique_ptr<CSemaphore> semOutbound;
std::unique_ptr<CSemaphore> semAddnode;

View File

@@ -3042,6 +3042,13 @@ static RPCHelpMan loadtxoutset()
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot: %s. (%s)", util::ErrorString(activation_result).original, path.utf8string()));
}
// Because we can't provide historical blocks during tip or background sync.
// Update local services to reflect we are a limited peer until we are fully sync.
node.connman->RemoveLocalServices(NODE_NETWORK);
// Setting the limited state is usually redundant because the node can always
// provide the last 288 blocks, but it doesn't hurt to set it.
node.connman->AddLocalServices(NODE_NETWORK_LIMITED);
CBlockIndex& snapshot_index{*CHECK_NONFATAL(*activation_result)};
UniValue result(UniValue::VOBJ);

View File

@@ -3575,8 +3575,8 @@ bool Chainstate::ActivateBestChain(BlockValidationState& state, std::shared_ptr<
//
// This cannot be done while holding cs_main (within
// MaybeCompleteSnapshotValidation) or a cs_main deadlock will occur.
if (m_chainman.restart_indexes) {
m_chainman.restart_indexes();
if (m_chainman.snapshot_download_completed) {
m_chainman.snapshot_download_completed();
}
break;
}

View File

@@ -977,7 +977,7 @@ public:
//! Function to restart active indexes; set dynamically to avoid a circular
//! dependency on `base/index.cpp`.
std::function<void()> restart_indexes = std::function<void()>();
std::function<void()> snapshot_download_completed = std::function<void()>();
const CChainParams& GetParams() const { return m_options.chainparams; }
const Consensus::Params& GetConsensus() const { return m_options.chainparams.GetConsensus(); }