Compare commits

...

9 Commits

Author SHA1 Message Date
hodlinator
5b3c8ef31a
Merge 24558c2cf18210f46d6e2fadf0c5c5912f4b8e10 into 5f4422d68dc3530c353af1f87499de1c864b60ad 2025-03-17 03:52:10 +01:00
merge-script
5f4422d68d
Merge bitcoin/bitcoin#32010: qa: Fix TxIndex race conditions
3301d2cbe8c3b76c97285d75fa59637cb6952d0b qa: Wait for txindex to avoid race condition (Hodlinator)
9bfb0d75ba10591cc6c9620f9fd1ecc0e55e7a48 qa: Remove unnecessary -txindex args (Hodlinator)
7ac281c19cd3d11f316dbbb3308eabf1ad4f26d6 qa: Add missing coverage of corrupt indexes (Hodlinator)

Pull request description:

  - Add synchronization in 3 places where if the Transaction Index happens to be slow, we get rare test failures when querying it for transactions (one such case experienced on Windows, prompting investigation).
  - Remove unnecessary TxIndex initialization in some tests.
  - Add some test coverage where TxIndex aspect could be tested in feature_init.py.

ACKs for top commit:
  fjahr:
    re-ACK 3301d2cbe8c3b76c97285d75fa59637cb6952d0b
  mzumsande:
    Code Review ACK 3301d2cbe8c3b76c97285d75fa59637cb6952d0b
  furszy:
    Code review ACK 3301d2cbe8c3b76c97285d75fa59637cb6952d0b
  Prabhat1308:
    Concept ACK [`3301d2c`](3301d2cbe8)

Tree-SHA512: 7c2019e38455f344856aaf6b381faafbd88d53dc88d13309deb718c1dcfbee4ccca7c7f1b66917395503a6f94c3b216a007ad432cc8b93d0309db9805f38d602
2025-03-17 10:28:14 +08:00
Hodlinator
3301d2cbe8
qa: Wait for txindex to avoid race condition
Can be verified to be necessary through adding std::this_thread::sleep_for(0.5s) at the beginning of TxIndex::CustomAppend.
2025-03-10 15:24:16 +01:00
Hodlinator
9bfb0d75ba
qa: Remove unnecessary -txindex args
(Parent commit ensured indexes in feature_init.py are actually used, otherwise they would be removed here as well).
2025-03-07 22:22:31 +01:00
Hodlinator
7ac281c19c
qa: Add missing coverage of corrupt indexes 2025-03-07 22:22:31 +01:00
Hodlinator
24558c2cf1
http: Add logged request ids
Could aid debugging when an HTTP request gets stuck at shutdown.
2025-02-22 00:31:56 +01:00
Hodlinator
a00dfd2046
http: Abort with clear error instead of hanging forever
Could happen when connection doesn't complete for some reason.

Output early warning to give clue about what's up.

Prior check+log message before even starting to wait was a bit too eager to hint at something being wrong.
2025-02-21 16:23:23 +01:00
Hodlinator
757de96db5
http: Ensure eventHTTP is always freed
There is a race between ThreadHTTP exiting and our queuing of the freeing-event (event_base_once).

Additional free was useful in 709 of 962 runs.
2025-02-21 15:20:04 +01:00
Hodlinator
1350087f9a
doc http: Improve documentation around (libevent) HTTP shutdown behavior 2025-02-21 15:20:03 +01:00
9 changed files with 77 additions and 30 deletions

View File

@ -197,10 +197,10 @@ public:
return WITH_LOCK(m_mutex, return m_tracker.size());
}
//! Wait until there are no more connections with active requests in the tracker
void WaitUntilEmpty() const EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
void WaitUntilEmpty(std::chrono::seconds timeout) const EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
{
WAIT_LOCK(m_mutex, lock);
m_cv.wait(lock, [this]() EXCLUSIVE_LOCKS_REQUIRED(m_mutex) { return m_tracker.empty(); });
m_cv.wait_for(lock, timeout, [this]() EXCLUSIVE_LOCKS_REQUIRED(m_mutex) { return m_tracker.empty(); });
}
};
//! Track active requests
@ -258,6 +258,8 @@ std::string RequestMethodString(HTTPRequest::RequestMethod m)
assert(false);
}
static std::atomic_uint32_t g_request_id{0};
/** HTTP request callback */
static void http_request_cb(struct evhttp_request* req, void* arg)
{
@ -284,7 +286,8 @@ static void http_request_cb(struct evhttp_request* req, void* arg)
}
}
}
auto hreq{std::make_unique<HTTPRequest>(req, *static_cast<const util::SignalInterrupt*>(arg))};
const uint32_t request_id = g_request_id++;
auto hreq{std::make_unique<HTTPRequest>(req, *static_cast<const util::SignalInterrupt*>(arg), request_id, /*replySent=*/false)};
// Early address-based allow check
if (!ClientAllowed(hreq->GetPeer())) {
@ -302,8 +305,8 @@ static void http_request_cb(struct evhttp_request* req, void* arg)
return;
}
LogDebug(BCLog::HTTP, "Received a %s request for %s from %s\n",
RequestMethodString(hreq->GetRequestMethod()), SanitizeString(hreq->GetURI(), SAFE_CHARS_URI).substr(0, 100), hreq->GetPeer().ToStringAddrPort());
LogDebug(BCLog::HTTP, "Received a %s request for %s from %s, id: %d\n",
RequestMethodString(hreq->GetRequestMethod()), SanitizeString(hreq->GetURI(), SAFE_CHARS_URI).substr(0, 100), hreq->GetPeer().ToStringAddrPort(), request_id);
// Find registered handler for prefix
std::string strURI = hreq->GetURI();
@ -529,32 +532,50 @@ void StopHTTPServer()
}
g_thread_http_workers.clear();
}
// Unlisten sockets, these are what make the event loop running, which means
// that after this and all connections are closed the event loop will quit.
// Unlisten sockets which would otherwise keep the event loop running. Doing
// so avoids new connections being created, while existing ones continue
// being processed in parallel with this thread until they complete.
for (evhttp_bound_socket *socket : boundSockets) {
evhttp_del_accept_socket(eventHTTP, socket);
}
boundSockets.clear();
{
if (const auto n_connections{g_requests.CountActiveConnections()}; n_connections != 0) {
LogDebug(BCLog::HTTP, "Waiting for %d connections to stop HTTP server\n", n_connections);
// Give clients time to close down TCP connections from their side before we
// free eventHTTP, this avoids issues like RemoteDisconnected exceptions in
// the test framework.
g_requests.WaitUntilEmpty(/*timeout=*/30s);
if (auto connections{g_requests.CountActiveConnections()}) {
LogWarning("%d remaining HTTP connection(s) after 30s timeout, waiting for longer.", connections);
g_requests.WaitUntilEmpty(/*timeout=*/10min);
if ((connections = g_requests.CountActiveConnections())) {
LogError("%d remaining HTTP connection(s) after long timeout. "
"Aborting to avoid potential use-after-frees from "
"connections still running after freeing eventHTTP.", connections);
std::abort();
}
g_requests.WaitUntilEmpty();
}
if (eventHTTP) {
// Schedule a callback to call evhttp_free in the event base thread, so
// that evhttp_free does not need to be called again after the handling
// of unfinished request connections that follows.
event_base_once(eventBase, -1, EV_TIMEOUT, [](evutil_socket_t, short, void*) {
// Schedule a callback to call evhttp_free in the event base thread, as
// otherwise eventHTTP often keeps internal events alive, meaning
// aforementioned thread would never run out of events and exit.
if (event_base_once(eventBase, -1, EV_TIMEOUT, [](evutil_socket_t, short, void*) {
evhttp_free(eventHTTP);
eventHTTP = nullptr;
}, nullptr, nullptr);
}, nullptr, nullptr) != 0) {
LogDebug(BCLog::HTTP, "event_base_once failed");
}
}
if (eventBase) {
LogDebug(BCLog::HTTP, "Waiting for HTTP event thread to exit\n");
if (g_thread_http.joinable()) g_thread_http.join();
if (eventHTTP) {
LogDebug(BCLog::HTTP, "Freeing eventHTTP-event was not picked up by ThreadHTTP before it exited.");
evhttp_free(eventHTTP);
eventHTTP = nullptr;
}
event_base_free(eventBase);
eventBase = nullptr;
} else {
Assume(!g_thread_http.joinable());
}
g_work_queue.reset();
LogDebug(BCLog::HTTP, "Stopped HTTP server\n");
@ -591,8 +612,8 @@ void HTTPEvent::trigger(struct timeval* tv)
else
evtimer_add(ev, tv); // trigger after timeval passed
}
HTTPRequest::HTTPRequest(struct evhttp_request* _req, const util::SignalInterrupt& interrupt, bool _replySent)
: req(_req), m_interrupt(interrupt), replySent(_replySent)
HTTPRequest::HTTPRequest(struct evhttp_request* _req, const util::SignalInterrupt& interrupt, uint32_t id, bool _replySent)
: req(_req), m_interrupt(interrupt), m_id(id), replySent(_replySent)
{
}
@ -653,6 +674,7 @@ void HTTPRequest::WriteReply(int nStatus, std::span<const std::byte> reply)
{
assert(!replySent && req);
if (m_interrupt) {
LogDebug(BCLog::HTTP, "Instructing client to close their TCP connection to us (request id %d).", m_id);
WriteHeader("Connection", "close");
}
// Send event to main http thread to send reply message
@ -660,8 +682,9 @@ void HTTPRequest::WriteReply(int nStatus, std::span<const std::byte> reply)
assert(evb);
evbuffer_add(evb, reply.data(), reply.size());
auto req_copy = req;
HTTPEvent* ev = new HTTPEvent(eventBase, true, [req_copy, nStatus]{
HTTPEvent* ev = new HTTPEvent(eventBase, true, [req_copy, nStatus, id = m_id](){
evhttp_send_reply(req_copy, nStatus, nullptr, nullptr);
LogDebug(BCLog::HTTP, "Replied to request: %d", id);
// Re-enable reading from the socket. This is the second part of the libevent
// workaround above.
if (event_get_version_number() >= 0x02010600 && event_get_version_number() < 0x02010900) {

View File

@ -5,6 +5,7 @@
#ifndef BITCOIN_HTTPSERVER_H
#define BITCOIN_HTTPSERVER_H
#include <cstdint>
#include <functional>
#include <optional>
#include <span>
@ -72,10 +73,11 @@ class HTTPRequest
private:
struct evhttp_request* req;
const util::SignalInterrupt& m_interrupt;
const uint32_t m_id;
bool replySent;
public:
explicit HTTPRequest(struct evhttp_request* req, const util::SignalInterrupt& interrupt, bool replySent = false);
explicit HTTPRequest(struct evhttp_request* req, const util::SignalInterrupt& interrupt, uint32_t id, bool replySent);
~HTTPRequest();
enum RequestMethod {

View File

@ -49,7 +49,7 @@ FUZZ_TARGET(http_request)
}
util::SignalInterrupt interrupt;
HTTPRequest http_request{evreq, interrupt, true};
HTTPRequest http_request{evreq, interrupt, /*id=*/0, /*replySent=*/true};
const HTTPRequest::RequestMethod request_method = http_request.GetRequestMethod();
(void)RequestMethodString(request_method);
(void)http_request.GetURI();

View File

@ -88,7 +88,7 @@ class InitTest(BitcoinTestFramework):
args = ['-txindex=1', '-blockfilterindex=1', '-coinstatsindex=1']
for terminate_line in lines_to_terminate_after:
self.log.info(f"Starting node and will exit after line {terminate_line}")
self.log.info(f"Starting node and will terminate after line {terminate_line}")
with node.busy_wait_for_debug_log([terminate_line]):
if platform.system() == 'Windows':
# CREATE_NEW_PROCESS_GROUP is required in order to be able
@ -108,12 +108,22 @@ class InitTest(BitcoinTestFramework):
'blocks/index/*.ldb': 'Error opening block database.',
'chainstate/*.ldb': 'Error opening coins database.',
'blocks/blk*.dat': 'Error loading block database.',
'indexes/txindex/MANIFEST*': 'LevelDB error: Corruption: CURRENT points to a non-existent file',
# Removing these files does not result in a startup error:
# 'indexes/blockfilter/basic/*.dat', 'indexes/blockfilter/basic/db/*.*', 'indexes/coinstats/db/*.*',
# 'indexes/txindex/*.log', 'indexes/txindex/CURRENT', 'indexes/txindex/LOCK'
}
files_to_perturb = {
'blocks/index/*.ldb': 'Error loading block database.',
'chainstate/*.ldb': 'Error opening coins database.',
'blocks/blk*.dat': 'Corrupted block database detected.',
'indexes/blockfilter/basic/db/*.*': 'LevelDB error: Corruption',
'indexes/coinstats/db/*.*': 'LevelDB error: Corruption',
'indexes/txindex/*.log': 'LevelDB error: Corruption',
'indexes/txindex/CURRENT': 'LevelDB error: Corruption',
# Perturbing these files does not result in a startup error:
# 'indexes/blockfilter/basic/*.dat', 'indexes/txindex/MANIFEST*', 'indexes/txindex/LOCK'
}
for file_patt, err_fragment in files_to_delete.items():
@ -135,9 +145,10 @@ class InitTest(BitcoinTestFramework):
self.stop_node(0)
self.log.info("Test startup errors after perturbing certain essential files")
dirs = ["blocks", "chainstate", "indexes"]
for file_patt, err_fragment in files_to_perturb.items():
shutil.copytree(node.chain_path / "blocks", node.chain_path / "blocks_bak")
shutil.copytree(node.chain_path / "chainstate", node.chain_path / "chainstate_bak")
for dir in dirs:
shutil.copytree(node.chain_path / dir, node.chain_path / f"{dir}_bak")
target_files = list(node.chain_path.glob(file_patt))
for target_file in target_files:
@ -151,10 +162,9 @@ class InitTest(BitcoinTestFramework):
start_expecting_error(err_fragment)
shutil.rmtree(node.chain_path / "blocks")
shutil.rmtree(node.chain_path / "chainstate")
shutil.move(node.chain_path / "blocks_bak", node.chain_path / "blocks")
shutil.move(node.chain_path / "chainstate_bak", node.chain_path / "chainstate")
for dir in dirs:
shutil.rmtree(node.chain_path / dir)
shutil.move(node.chain_path / f"{dir}_bak", node.chain_path / dir)
def init_pid_test(self):
BITCOIN_PID_FILENAME_CUSTOM = "my_fancy_bitcoin_pid_file.foobar"

View File

@ -45,6 +45,7 @@ from test_framework.util import (
assert_equal,
assert_greater_than,
assert_raises_rpc_error,
sync_txindex,
)
from test_framework.wallet import MiniWallet
from test_framework.wallet_util import generate_keypair
@ -270,6 +271,7 @@ class MempoolAcceptanceTest(BitcoinTestFramework):
self.log.info('A coinbase transaction')
# Pick the input of the first tx we created, so it has to be a coinbase tx
sync_txindex(self, node)
raw_tx_coinbase_spent = node.getrawtransaction(txid=node.decoderawtransaction(hexstring=raw_tx_in_block)['vin'][0]['txid'])
tx = tx_from_hex(raw_tx_coinbase_spent)
self.check_mempool_result(

View File

@ -34,6 +34,7 @@ from test_framework.util import (
assert_equal,
assert_greater_than,
assert_raises_rpc_error,
sync_txindex,
)
from test_framework.wallet import (
getnewdestination,
@ -70,7 +71,7 @@ class RawTransactionsTest(BitcoinTestFramework):
self.num_nodes = 3
self.extra_args = [
["-txindex"],
["-txindex"],
[],
["-fastprune", "-prune=1"],
]
# whitelist peers to speed up tx relay / mempool sync
@ -109,6 +110,7 @@ class RawTransactionsTest(BitcoinTestFramework):
self.log.info(f"Test getrawtransaction {'with' if n == 0 else 'without'} -txindex")
if n == 0:
sync_txindex(self, self.nodes[n])
# With -txindex.
# 1. valid parameters - only supply txid
assert_equal(self.nodes[n].getrawtransaction(txId), tx['hex'])

View File

@ -12,6 +12,7 @@ from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (
assert_equal,
assert_raises_rpc_error,
sync_txindex,
)
from test_framework.wallet import MiniWallet
@ -77,6 +78,7 @@ class MerkleBlockTest(BitcoinTestFramework):
assert_equal(sorted(self.nodes[0].verifytxoutproof(self.nodes[0].gettxoutproof([txid1, txid2]))), sorted(txlist))
assert_equal(sorted(self.nodes[0].verifytxoutproof(self.nodes[0].gettxoutproof([txid2, txid1]))), sorted(txlist))
# We can always get a proof if we have a -txindex
sync_txindex(self, self.nodes[1])
assert_equal(self.nodes[0].verifytxoutproof(self.nodes[1].gettxoutproof([txid_spent])), [txid_spent])
# We can't get a proof if we specify transactions from different blocks
assert_raises_rpc_error(-5, "Not all transactions found in specified or retrieved block", self.nodes[0].gettxoutproof, [txid1, txid3])

View File

@ -592,3 +592,10 @@ def find_vout_for_address(node, txid, addr):
if addr == tx["vout"][i]["scriptPubKey"]["address"]:
return i
raise RuntimeError("Vout not found for address: txid=%s, addr=%s" % (txid, addr))
def sync_txindex(test_framework, node):
test_framework.log.debug("Waiting for node txindex to sync")
sync_start = int(time.time())
test_framework.wait_until(lambda: node.getindexinfo("txindex")["txindex"]["synced"])
test_framework.log.debug(f"Synced in {time.time() - sync_start} seconds")

View File

@ -117,7 +117,6 @@ class AddressInputTypeGrouping(BitcoinTestFramework):
self.extra_args = [
[
"-addresstype=bech32",
"-txindex",
],
[
"-addresstype=p2sh-segwit",