Merge bitcoin/bitcoin#33689: http: replace WorkQueue and single threads handling for ThreadPool

38fd85c676 http: replace WorkQueue and threads handling for ThreadPool (furszy)
c323f882ed fuzz: add test case for threadpool (TheCharlatan)
c528dd5f8c util: introduce general purpose thread pool (furszy)
6354b4fd7f tests: log node JSON-RPC errors during test setup (furszy)
45930a7941 http-server: guard against crashes from unhandled exceptions (furszy)

Pull request description:

  This has been a recent discovery; the general thread pool class created for #26966, cleanly
  integrates into the HTTP server. It simplifies init, shutdown and requests execution logic.
  Replacing code that was never unit tested for code that is properly unit and fuzz tested.
  Although our functional test framework extensively uses this RPC interface (that’s how
  we’ve been ensuring its correct behavior so far - which is not the best).

  This clearly separates the responsibilities:
  The HTTP server now focuses solely on receiving and dispatching requests, while ThreadPool handles
  concurrency, queuing, and execution.

  This will also allows us to experiment with further performance improvements at the task queuing and
  execution level, such as a lock-free structure or task prioritization or any other implementation detail
  like coroutines in the future, without having to deal with HTTP code that lives on a different layer.

  Note:
  The rationale behind introducing the ThreadPool first is to be able to easily cherry-pick it across different
  working paths. Some of the ones that are benefited from it are #26966 for the parallelization of the indexes
  initial sync, #31132 for the parallelization of the inputs fetching procedure, #32061 for the libevent replacement,
  the kernel API #30595 (https://github.com/bitcoin/bitcoin/pull/30595#discussion_r2413702370) to avoid blocking validation among others use cases not publicly available.

  Note 2:
  I could have created a wrapper around the existing code and replaced the `WorkQueue` in a subsequent
  commit, but it didn’t seem worth the extra commits and review effort. The `ThreadPool` implements
  essentially the same functionality in a more modern and cleaner way.

ACKs for top commit:
  Eunovo:
    ReACK 38fd85c676
  sedited:
    Re-ACK 38fd85c676
  pinheadmz:
    ACK 38fd85c676

Tree-SHA512: a0330e54ed504330ca874c42d4e318a909f548b2fb9ac46db8badf5935b9eec47dc4ed503d1b6f98574418e3473420ea45f60498be05545c4325cfa89dcca689
This commit is contained in:
merge-script
2026-02-11 18:04:17 +01:00
8 changed files with 694 additions and 120 deletions

View File

@@ -17,6 +17,7 @@
#include <util/signalinterrupt.h>
#include <util/strencodings.h>
#include <util/threadnames.h>
#include <util/threadpool.h>
#include <util/translation.h>
#include <condition_variable>
@@ -49,83 +50,6 @@ using common::InvalidPortErrMsg;
/** Maximum size of http request (request line + headers) */
static const size_t MAX_HEADERS_SIZE = 8192;
/** HTTP request work item */
class HTTPWorkItem final : public HTTPClosure
{
public:
HTTPWorkItem(std::unique_ptr<HTTPRequest> _req, const std::string &_path, const HTTPRequestHandler& _func):
req(std::move(_req)), path(_path), func(_func)
{
}
void operator()() override
{
func(req.get(), path);
}
std::unique_ptr<HTTPRequest> req;
private:
std::string path;
HTTPRequestHandler func;
};
/** Simple work queue for distributing work over multiple threads.
* Work items are simply callable objects.
*/
template <typename WorkItem>
class WorkQueue
{
private:
Mutex cs;
std::condition_variable cond GUARDED_BY(cs);
std::deque<std::unique_ptr<WorkItem>> queue GUARDED_BY(cs);
bool running GUARDED_BY(cs){true};
const size_t maxDepth;
public:
explicit WorkQueue(size_t _maxDepth) : maxDepth(_maxDepth)
{
}
/** Precondition: worker threads have all stopped (they have been joined).
*/
~WorkQueue() = default;
/** Enqueue a work item */
bool Enqueue(WorkItem* item) EXCLUSIVE_LOCKS_REQUIRED(!cs)
{
LOCK(cs);
if (!running || queue.size() >= maxDepth) {
return false;
}
queue.emplace_back(std::unique_ptr<WorkItem>(item));
cond.notify_one();
return true;
}
/** Thread function */
void Run() EXCLUSIVE_LOCKS_REQUIRED(!cs)
{
while (true) {
std::unique_ptr<WorkItem> i;
{
WAIT_LOCK(cs, lock);
while (running && queue.empty())
cond.wait(lock);
if (!running && queue.empty())
break;
i = std::move(queue.front());
queue.pop_front();
}
(*i)();
}
}
/** Interrupt and exit loops */
void Interrupt() EXCLUSIVE_LOCKS_REQUIRED(!cs)
{
LOCK(cs);
running = false;
cond.notify_all();
}
};
struct HTTPPathHandler
{
HTTPPathHandler(std::string _prefix, bool _exactMatch, HTTPRequestHandler _handler):
@@ -145,13 +69,14 @@ static struct event_base* eventBase = nullptr;
static struct evhttp* eventHTTP = nullptr;
//! List of subnets to allow RPC connections from
static std::vector<CSubNet> rpc_allow_subnets;
//! Work queue for handling longer requests off the event loop thread
static std::unique_ptr<WorkQueue<HTTPClosure>> g_work_queue{nullptr};
//! Handlers for (sub)paths
static GlobalMutex g_httppathhandlers_mutex;
static std::vector<HTTPPathHandler> pathHandlers GUARDED_BY(g_httppathhandlers_mutex);
//! Bound listening sockets
static std::vector<evhttp_bound_socket *> boundSockets;
//! Http thread pool - future: encapsulate in HttpContext
static ThreadPool g_threadpool_http("http");
static int g_max_queue_depth{100};
/**
* @brief Helps keep track of open `evhttp_connection`s with active `evhttp_requests`
@@ -327,14 +252,31 @@ static void http_request_cb(struct evhttp_request* req, void* arg)
// Dispatch to worker thread
if (i != iend) {
std::unique_ptr<HTTPWorkItem> item(new HTTPWorkItem(std::move(hreq), path, i->handler));
assert(g_work_queue);
if (g_work_queue->Enqueue(item.get())) {
(void)item.release(); /* if true, queue took ownership */
} else {
if (static_cast<int>(g_threadpool_http.WorkQueueSize()) >= g_max_queue_depth) {
LogWarning("Request rejected because http work queue depth exceeded, it can be increased with the -rpcworkqueue= setting");
item->req->WriteReply(HTTP_SERVICE_UNAVAILABLE, "Work queue depth exceeded");
hreq->WriteReply(HTTP_SERVICE_UNAVAILABLE, "Work queue depth exceeded");
return;
}
auto item = [req = std::move(hreq), in_path = std::move(path), fn = i->handler]() {
std::string err_msg;
try {
fn(req.get(), in_path);
return;
} catch (const std::exception& e) {
LogWarning("Unexpected error while processing request for '%s'. Error msg: '%s'", req->GetURI(), e.what());
err_msg = e.what();
} catch (...) {
LogWarning("Unknown error while processing request for '%s'", req->GetURI());
err_msg = "unknown error";
}
// Reply so the client doesn't hang waiting for the response.
req->WriteHeader("Connection", "close");
// TODO: Implement specific error formatting for the REST and JSON-RPC servers responses.
req->WriteReply(HTTP_INTERNAL_SERVER_ERROR, err_msg);
};
[[maybe_unused]] auto _{g_threadpool_http.Submit(std::move(item))};
} else {
hreq->WriteReply(HTTP_NOT_FOUND);
}
@@ -412,13 +354,6 @@ static bool HTTPBindAddresses(struct evhttp* http)
return !boundSockets.empty();
}
/** Simple wrapper to set thread name and run work queue */
static void HTTPWorkQueueRun(WorkQueue<HTTPClosure>* queue, int worker_num)
{
util::ThreadRename(strprintf("httpworker.%i", worker_num));
queue->Run();
}
/** libevent event log callback */
static void libevent_log_cb(int severity, const char *msg)
{
@@ -475,10 +410,9 @@ bool InitHTTPServer(const util::SignalInterrupt& interrupt)
}
LogDebug(BCLog::HTTP, "Initialized HTTP server\n");
int workQueueDepth = std::max((long)gArgs.GetIntArg("-rpcworkqueue", DEFAULT_HTTP_WORKQUEUE), 1L);
LogDebug(BCLog::HTTP, "creating work queue of depth %d\n", workQueueDepth);
g_max_queue_depth = std::max((long)gArgs.GetIntArg("-rpcworkqueue", DEFAULT_HTTP_WORKQUEUE), 1L);
LogDebug(BCLog::HTTP, "set work queue of depth %d\n", g_max_queue_depth);
g_work_queue = std::make_unique<WorkQueue<HTTPClosure>>(workQueueDepth);
// transfer ownership to eventBase/HTTP via .release()
eventBase = base_ctr.release();
eventHTTP = http_ctr.release();
@@ -494,17 +428,13 @@ void UpdateHTTPServerLogging(bool enable) {
}
static std::thread g_thread_http;
static std::vector<std::thread> g_thread_http_workers;
void StartHTTPServer()
{
int rpcThreads = std::max((long)gArgs.GetIntArg("-rpcthreads", DEFAULT_HTTP_THREADS), 1L);
LogInfo("Starting HTTP server with %d worker threads\n", rpcThreads);
g_threadpool_http.Start(rpcThreads);
g_thread_http = std::thread(ThreadHTTP, eventBase);
for (int i = 0; i < rpcThreads; i++) {
g_thread_http_workers.emplace_back(HTTPWorkQueueRun, g_work_queue.get(), i);
}
}
void InterruptHTTPServer()
@@ -514,21 +444,17 @@ void InterruptHTTPServer()
// Reject requests on current connections
evhttp_set_gencb(eventHTTP, http_reject_request_cb, nullptr);
}
if (g_work_queue) {
g_work_queue->Interrupt();
}
// Interrupt pool after disabling requests
g_threadpool_http.Interrupt();
}
void StopHTTPServer()
{
LogDebug(BCLog::HTTP, "Stopping HTTP server\n");
if (g_work_queue) {
LogDebug(BCLog::HTTP, "Waiting for HTTP worker threads to exit\n");
for (auto& thread : g_thread_http_workers) {
thread.join();
}
g_thread_http_workers.clear();
}
LogDebug(BCLog::HTTP, "Waiting for HTTP worker threads to exit\n");
g_threadpool_http.Stop();
// Unlisten sockets, these are what make the event loop running, which means
// that after this and all connections are closed the event loop will quit.
for (evhttp_bound_socket *socket : boundSockets) {
@@ -556,7 +482,6 @@ void StopHTTPServer()
event_base_free(eventBase);
eventBase = nullptr;
}
g_work_queue.reset();
LogDebug(BCLog::HTTP, "Stopped HTTP server\n");
}