Merge bitcoin/bitcoin#34425: test: Fix all races after a socket is closed gracefully

fae807ed25 test: Remove unused, confusing and brittle connect_nodes.wait_for_connect (MarcoFalke)
fab2772647 test: Fix all races after a socket is closed gracefully (MarcoFalke)
fa21edddb2 test: Stricter checks in rpc_setban.py (MarcoFalke)
faa404e119 test: Add is_connected_to helper (MarcoFalke)

Pull request description:

  Currently, functional tests may intermittently fail, due to a lack of synchronization after a node connection socket was closed gracefully: If node A is connected to node B, and node B closes the connection, node A *must* wait for the connection to be closed before continuing the test. Otherwise, subsequent re-connections may not work while a stale connection is still alive.

  This can be reproduced locally via something like:

  ```diff
  diff --git a/src/net.cpp b/src/net.cpp
  index 6b79e913e8..32bd061500 100644
  --- a/src/net.cpp
  +++ b/src/net.cpp
  @@ -2200,2 +2200,3 @@ void CConnman::SocketHandlerConnected(const std::vector<CNode*>& nodes,
                   }
  +            UninterruptibleSleep(599ms);
                   pnode->CloseSocketDisconnect();
  ```

  With this diff, the tests should fail on master, and pass after this fix.

  The fix is placed inside the `connect_nodes` helper.

ACKs for top commit:
  rkrux:
    ACK fae807ed25
  w0xlt:
    ACK fae807ed25
  davidgumberg:
    (re-ish) crACK fae807ed25
  sedited:
    ACK fae807ed25

Tree-SHA512: 053825bbd319d7c49e08810bbabbf9c3a43d89897d697c0ca9232fd8a88ae475b4f9fbd79dff549b4e0a8941cf926ca4567e7fd43dc1749bf023d8ee7dd49608
This commit is contained in:
merge-script
2026-04-19 11:06:17 +02:00
3 changed files with 32 additions and 24 deletions

View File

@@ -26,19 +26,28 @@ class SetBanTests(BitcoinTestFramework):
peerinfo = self.nodes[1].getpeerinfo()[0]
assert "noban" not in peerinfo["permissions"]
# Node 0 get banned by Node 1
# Node 0 gets banned by Node 1
self.nodes[1].setban("127.0.0.1", "add")
self.wait_until(lambda: not self.nodes[0].is_connected_to(self.nodes[1]))
# Node 0 should not be able to reconnect
self.restart_node(1, [])
context = ExitStack()
context.enter_context(self.nodes[1].assert_debug_log(expected_msgs=['dropped (banned)\n'], timeout=50))
context.enter_context(self.nodes[1].assert_debug_log(expected_msgs=["dropped (banned)\n"]))
# When disconnected right after connecting, a v2 node will attempt to reconnect with v1.
# Wait for that to happen so that it cannot mess with later tests.
if self.options.v2transport:
context.enter_context(self.nodes[0].assert_debug_log(expected_msgs=['trying v1 connection'], timeout=50))
# Wait for all disconnects on node0, so that it cannot mess with later tests.
context.enter_context(self.nodes[0].assert_debug_log(
expected_msgs=[
"retrying with v1 transport protocol for peer=2",
"Cleared nodestate for peer=2",
"Cleared nodestate for peer=3",
] if self.options.v2transport else [
"Cleared nodestate for peer=2", # Just one v1 disconnect to wait for
],
timeout=8))
with context:
self.restart_node(1, [])
self.nodes[0].addnode("127.0.0.1:" + str(p2p_port(1)), "onetry")
assert not self.nodes[0].is_connected_to(self.nodes[1])
# However, node 0 should be able to reconnect if it has noban permission
self.restart_node(1, ['-whitelist=127.0.0.1'])

View File

@@ -549,16 +549,20 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
def wait_for_node_exit(self, i, timeout):
self.nodes[i].process.wait(timeout)
def connect_nodes(self, a, b, *, peer_advertises_v2=None, wait_for_connect: bool = True):
"""
Kwargs:
wait_for_connect: if True, block until the nodes are verified as connected. You might
want to disable this when using -stopatheight with one of the connected nodes,
since there will be a race between the actual connection and performing
the assertions before one node shuts down.
"""
def connect_nodes(self, a, b, *, peer_advertises_v2=None):
from_connection = self.nodes[a]
to_connection = self.nodes[b]
# Use subversion as peer id. Test nodes have their node number appended to the user agent string
from_connection_subver = from_connection.getnetworkinfo()['subversion']
to_connection_subver = to_connection.getnetworkinfo()['subversion']
def find_conn(node, peer_subversion, inbound):
return next(filter(lambda peer: peer['subver'] == peer_subversion and peer['inbound'] == inbound, node.getpeerinfo()), None)
self.wait_until(lambda: not find_conn(from_connection, to_connection_subver, inbound=False))
self.wait_until(lambda: not find_conn(to_connection, from_connection_subver, inbound=True))
ip_port = "127.0.0.1:" + str(p2p_port(b))
if peer_advertises_v2 is None:
@@ -571,16 +575,6 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
# compatibility with older clients
from_connection.addnode(ip_port, "onetry")
if not wait_for_connect:
return
# Use subversion as peer id. Test nodes have their node number appended to the user agent string
from_connection_subver = from_connection.getnetworkinfo()['subversion']
to_connection_subver = to_connection.getnetworkinfo()['subversion']
def find_conn(node, peer_subversion, inbound):
return next(filter(lambda peer: peer['subver'] == peer_subversion and peer['inbound'] == inbound, node.getpeerinfo()), None)
self.wait_until(lambda: find_conn(from_connection, to_connection_subver, inbound=False) is not None)
self.wait_until(lambda: find_conn(to_connection, from_connection_subver, inbound=True) is not None)

View File

@@ -914,6 +914,11 @@ class TestNode():
self.wait_until(lambda: self.num_test_p2p_connections() == 0)
def is_connected_to(self, other):
assert isinstance(other, TestNode)
other_subver = other.getnetworkinfo()["subversion"]
return any(peer["subver"] == other_subver for peer in self.getpeerinfo())
def bumpmocktime(self, seconds):
"""Fast forward using setmocktime to self.mocktime + seconds. Requires setmocktime to have
been called at some point in the past."""