From 3069d66dcac0e1eeca2142a2d72d3d010335346f Mon Sep 17 00:00:00 2001 From: Martin Zumsande Date: Fri, 28 Mar 2025 15:46:59 -0400 Subject: [PATCH 1/3] p2p: During block download, adjust pindexLastCommonBlock better Simplify and improve the logic for calculating pindexLastCommonBlock, in order to calculate nWindowEnd better. The previous logic would not take into account when the chain tip had moved forward, so that FindNextBlocks could iterate over many blocks already downloaded and connected, which could result in blocks not being requested for download that should have been requested, and peers being wrongly marked as staller. It also removes extra logic from commit 49d569cb1fdd62a9da8dff51dccaf4680fe3d0eb for the situation right after a snapshot was loaded: After snapshot loading, our tip becomes the snapshot block. For peers that have the most-work chain, which inlcludes the snapshot, our tip is an ancestor of the peer's best block, hence the general advancement logic will move pindexLastCommonBlock from any pre-snapshot position to the snapshot height automatically. Co-authored-by: stringintech Co-authored-by: Pieter Wuille --- src/net_processing.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 10fbae5e7ed..3ac8b13266b 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -1397,17 +1397,16 @@ void PeerManagerImpl::FindNextBlocksToDownload(const Peer& peer, unsigned int co return; } - // Bootstrap quickly by guessing a parent of our best tip is the forking point. - // Guessing wrong in either direction is not a problem. - // Also reset pindexLastCommonBlock after a snapshot was loaded, so that blocks after the snapshot will be prioritised for download. + // Determine the forking point between the peer's chain and our chain: + // pindexLastCommonBlock is required to be an ancestor of pindexBestKnownBlock, and will be used as a starting point. + // It is being set to the fork point between the peer's best known block and the current tip, unless it is already set to + // an ancestor with more work than the fork point. + auto fork_point = LastCommonAncestor(state->pindexBestKnownBlock, m_chainman.ActiveTip()); if (state->pindexLastCommonBlock == nullptr || - (snap_base && state->pindexLastCommonBlock->nHeight < snap_base->nHeight)) { - state->pindexLastCommonBlock = m_chainman.ActiveChain()[std::min(state->pindexBestKnownBlock->nHeight, m_chainman.ActiveChain().Height())]; + fork_point->nChainWork > state->pindexLastCommonBlock->nChainWork || + state->pindexBestKnownBlock->GetAncestor(state->pindexLastCommonBlock->nHeight) != state->pindexLastCommonBlock) { + state->pindexLastCommonBlock = fork_point; } - - // If the peer reorganized, our previous pindexLastCommonBlock may not be an ancestor - // of its current tip anymore. Go back enough to fix that. - state->pindexLastCommonBlock = LastCommonAncestor(state->pindexLastCommonBlock, state->pindexBestKnownBlock); if (state->pindexLastCommonBlock == state->pindexBestKnownBlock) return; From 9af6daf07ed0a82386c1930e67683af5f2090e8b Mon Sep 17 00:00:00 2001 From: Martin Zumsande Date: Wed, 2 Apr 2025 11:18:58 -0400 Subject: [PATCH 2/3] test: remove magic number when checking for blocks that have arrived getpeerinfo provides a list of blocks that are inflight, which can be used instead. --- test/functional/p2p_ibd_stalling.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/test/functional/p2p_ibd_stalling.py b/test/functional/p2p_ibd_stalling.py index 542d767ccee..e7066ea776e 100755 --- a/test/functional/p2p_ibd_stalling.py +++ b/test/functional/p2p_ibd_stalling.py @@ -80,10 +80,8 @@ class P2PIBDStallingTest(BitcoinTestFramework): peers[-1].block_store = block_dict peers[-1].send_and_ping(headers_message) - # Need to wait until 1023 blocks are received - the magic total bytes number is a workaround in lack of an rpc - # returning the number of downloaded (but not connected) blocks. - bytes_recv = 172761 if not self.options.v2transport else 169692 - self.wait_until(lambda: self.total_bytes_recv_for_blocks() == bytes_recv) + # Wait until all blocks are received (except for stall_block), so that no other blocks are in flight. + self.wait_until(lambda: sum(len(peer['inflight']) for peer in node.getpeerinfo()) == 1) self.all_sync_send_with_ping(peers) # If there was a peer marked for stalling, it would get disconnected @@ -144,12 +142,6 @@ class P2PIBDStallingTest(BitcoinTestFramework): self.log.info("Check that all outstanding blocks get connected") self.wait_until(lambda: node.getblockcount() == NUM_BLOCKS) - def total_bytes_recv_for_blocks(self): - total = 0 - for info in self.nodes[0].getpeerinfo(): - if ("block" in info["bytesrecv_per_msg"].keys()): - total += info["bytesrecv_per_msg"]["block"] - return total def all_sync_send_with_ping(self, peers): for p in peers: From 01cc20f3307c532f402cdf5dc17f2f14920b725b Mon Sep 17 00:00:00 2001 From: Martin Zumsande Date: Wed, 2 Apr 2025 11:29:05 -0400 Subject: [PATCH 3/3] test: improve coverage for a resolved stalling situation This test (which would fail without the previous commit) checks that after the stalling block was received, we don't incorrectly mark another peer as a staller immediately. --- test/functional/p2p_ibd_stalling.py | 37 ++++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/test/functional/p2p_ibd_stalling.py b/test/functional/p2p_ibd_stalling.py index e7066ea776e..c09b2e503c8 100755 --- a/test/functional/p2p_ibd_stalling.py +++ b/test/functional/p2p_ibd_stalling.py @@ -29,15 +29,15 @@ from test_framework.util import ( class P2PStaller(P2PDataStore): - def __init__(self, stall_block): - self.stall_block = stall_block + def __init__(self, stall_blocks): + self.stall_blocks = stall_blocks super().__init__() def on_getdata(self, message): for inv in message.inv: self.getdata_requests.append(inv.hash) if (inv.type & MSG_TYPE_MASK) == MSG_BLOCK: - if (inv.hash != self.stall_block): + if (inv.hash not in self.stall_blocks): self.send_without_ping(msg_block(self.block_store[inv.hash])) def on_getheaders(self, message): @@ -51,7 +51,7 @@ class P2PIBDStallingTest(BitcoinTestFramework): def run_test(self): NUM_BLOCKS = 1025 - NUM_PEERS = 4 + NUM_PEERS = 5 node = self.nodes[0] tip = int(node.getbestblockhash(), 16) blocks = [] @@ -66,7 +66,9 @@ class P2PIBDStallingTest(BitcoinTestFramework): block_time += 1 height += 1 block_dict[blocks[-1].hash_int] = blocks[-1] - stall_block = blocks[0].hash_int + stall_index = 0 + second_stall_index = 500 + stall_blocks = [blocks[stall_index].hash_int, blocks[second_stall_index].hash_int] headers_message = msg_headers() headers_message.headers = [CBlockHeader(b) for b in blocks[:NUM_BLOCKS-1]] @@ -76,12 +78,12 @@ class P2PIBDStallingTest(BitcoinTestFramework): self.mocktime = int(time.time()) + 1 node.setmocktime(self.mocktime) for id in range(NUM_PEERS): - peers.append(node.add_outbound_p2p_connection(P2PStaller(stall_block), p2p_idx=id, connection_type="outbound-full-relay")) + peers.append(node.add_outbound_p2p_connection(P2PStaller(stall_blocks), p2p_idx=id, connection_type="outbound-full-relay")) peers[-1].block_store = block_dict peers[-1].send_and_ping(headers_message) - # Wait until all blocks are received (except for stall_block), so that no other blocks are in flight. - self.wait_until(lambda: sum(len(peer['inflight']) for peer in node.getpeerinfo()) == 1) + # Wait until all blocks are received (except for the stall blocks), so that no other blocks are in flight. + self.wait_until(lambda: sum(len(peer['inflight']) for peer in node.getpeerinfo()) == len(stall_blocks)) self.all_sync_send_with_ping(peers) # If there was a peer marked for stalling, it would get disconnected @@ -102,7 +104,7 @@ class P2PIBDStallingTest(BitcoinTestFramework): node.setmocktime(self.mocktime) peers[0].wait_for_disconnect() assert_equal(node.num_test_p2p_connections(), NUM_PEERS - 1) - self.wait_until(lambda: self.is_block_requested(peers, stall_block)) + self.wait_until(lambda: self.is_block_requested(peers, stall_blocks[0])) # Make sure that SendMessages() is invoked, which assigns the missing block # to another peer and starts the stalling logic for them self.all_sync_send_with_ping(peers) @@ -117,7 +119,7 @@ class P2PIBDStallingTest(BitcoinTestFramework): self.mocktime += 2 node.setmocktime(self.mocktime) self.wait_until(lambda: sum(x.is_connected for x in node.p2ps) == NUM_PEERS - 2) - self.wait_until(lambda: self.is_block_requested(peers, stall_block)) + self.wait_until(lambda: self.is_block_requested(peers, stall_blocks[0])) self.all_sync_send_with_ping(peers) self.log.info("Check that the stalling timeout gets doubled to 8 seconds for the next staller") @@ -130,17 +132,18 @@ class P2PIBDStallingTest(BitcoinTestFramework): self.mocktime += 2 node.setmocktime(self.mocktime) self.wait_until(lambda: sum(x.is_connected for x in node.p2ps) == NUM_PEERS - 3) - self.wait_until(lambda: self.is_block_requested(peers, stall_block)) + self.wait_until(lambda: self.is_block_requested(peers, stall_blocks[0])) self.all_sync_send_with_ping(peers) - self.log.info("Provide the withheld block and check that stalling timeout gets reduced back to 2 seconds") - with node.assert_debug_log(expected_msgs=['Decreased stalling timeout to 2 seconds']): + self.log.info("Provide the first withheld block and check that stalling timeout gets reduced back to 2 seconds") + with node.assert_debug_log(expected_msgs=['Decreased stalling timeout to 2 seconds'], unexpected_msgs=['Stall started']): for p in peers: - if p.is_connected and (stall_block in p.getdata_requests): - p.send_without_ping(msg_block(block_dict[stall_block])) + if p.is_connected and (stall_blocks[0] in p.getdata_requests): + p.send_without_ping(msg_block(block_dict[stall_blocks[0]])) + self.all_sync_send_with_ping(peers) - self.log.info("Check that all outstanding blocks get connected") - self.wait_until(lambda: node.getblockcount() == NUM_BLOCKS) + self.log.info("Check that all outstanding blocks up to the second stall block get connected") + self.wait_until(lambda: node.getblockcount() == second_stall_index) def all_sync_send_with_ping(self, peers):