Merge bitcoin/bitcoin#34905: Update string and net utils for future HTTP operations

0e712b3812 Make DynSock accepted sockets queue optional, with precise lifetime (Matthew Zipkin)
3de02abf3f util/test: Add string_view constructor to LineReader and remove StringToBuffer (Matthew Zipkin)
b0ca400612 string: replace AsciiCaseInsensitiveKeyEqual with CaseInsensitiveEqual (Matthew Zipkin)
8172099293 util: get number of bytes consumed from buffer by LineReader (Matthew Zipkin)

Pull request description:

  This is a follow-up to #34242 and is the first few commits of #32061

  As review and refinement of the replacement HTTP server progresses, some new utilities were needed and added. This PR updates those utilities as work continues on #32061.

  ### LineReader

  In order to enforce strict limits on the total size of headers in HTTPRequest, we add a method to `LineReader` to give us the total amount of data that has been read from the buffer so far. See https://github.com/bitcoin/bitcoin/pull/32061#discussion_r2949287329

  ### CaseInsensitiveEqual

  HTTP headers are case-insensitive. An early version of #32061 used an unordered_map for this and therefore we needed a comparator struct. However that unordered_map was replaced by a simpler `std::vector` of `std::pair` so we can remove the struct and use methods that already exist in the codebase.

  ### StringToBytes

  `StringToBuffer` was introduced in #34242 to test LineReader but review of #32061 indicated that it would be more optimal to return a span of bytes instead of a vector. See https://github.com/bitcoin/bitcoin/pull/32061#discussion_r2892431378

  ### Split DynSock constructor for two usecases: listening / accepting sockets

  See https://github.com/bitcoin/bitcoin/pull/32061#discussion_r2895891437. DynSock was introduced in #30988 and is not used anywhere in master yet. If it's used as a listening socket, it provides connected sockets. If it's used as a connected socket, it provides I/O pipes. By making the queue of connected sockets optional we can clean up the ownership / lifetime if the class members.

ACKs for top commit:
  fjahr:
    Code review ACK 0e712b3812
  vasild:
    ACK 0e712b3812

Tree-SHA512: 234c79a00c03cb3952dce2a3c5e59859bd0cbfc5f0a552ad2065e998320a12b533b06adbe294745c690a9e19c2f5f79bca3aa5a44342ee1820037342799566f2
This commit is contained in:
merge-script
2026-04-09 13:43:44 +08:00
12 changed files with 84 additions and 88 deletions

View File

@@ -4,7 +4,7 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <bech32.h>
#include <test/util/str.h>
#include <util/strencodings.h>
#include <boost/test/unit_test.hpp>

View File

@@ -5,7 +5,6 @@
#include <bech32.h>
#include <test/fuzz/fuzz.h>
#include <test/fuzz/FuzzedDataProvider.h>
#include <test/util/str.h>
#include <util/strencodings.h>
#include <cassert>

View File

@@ -13,7 +13,6 @@ add_library(test_util STATIC EXCLUDE_FROM_ALL
random.cpp
script.cpp
setup_common.cpp
str.cpp
time.cpp
transaction_utils.cpp
txmempool.cpp

View File

@@ -346,11 +346,16 @@ void DynSock::Pipe::WaitForDataOrEof(UniqueLock<Mutex>& lock)
});
}
DynSock::DynSock(std::shared_ptr<Pipes> pipes, std::shared_ptr<Queue> accept_sockets)
DynSock::DynSock(std::shared_ptr<Pipes> pipes, Queue* accept_sockets)
: m_pipes{pipes}, m_accept_sockets{accept_sockets}
{
}
DynSock::DynSock(std::shared_ptr<Pipes> pipes)
: m_pipes{pipes}, m_accept_sockets{}
{
}
DynSock::~DynSock()
{
m_pipes->send.Eof();
@@ -369,6 +374,7 @@ ssize_t DynSock::Send(const void* buf, size_t len, int) const
std::unique_ptr<Sock> DynSock::Accept(sockaddr* addr, socklen_t* addr_len) const
{
assert(m_accept_sockets && "Accept() called on non-listening DynSock");
ZeroSock::Accept(addr, addr_len);
return m_accept_sockets->Pop().value_or(nullptr);
}
@@ -403,7 +409,7 @@ bool DynSock::WaitMany(std::chrono::milliseconds timeout, EventsPerSock& events_
if ((events.requested & Sock::RECV) != 0) {
auto dyn_sock = reinterpret_cast<const DynSock*>(sock.get());
uint8_t b;
if (dyn_sock->m_pipes->recv.GetBytes(&b, 1, MSG_PEEK) == 1 || !dyn_sock->m_accept_sockets->Empty()) {
if (dyn_sock->m_pipes->recv.GetBytes(&b, 1, MSG_PEEK) == 1 || (dyn_sock->m_accept_sockets && !dyn_sock->m_accept_sockets->Empty())) {
events.occurred |= Sock::RECV;
at_least_one_event_occurred = true;
}

View File

@@ -5,6 +5,7 @@
#ifndef BITCOIN_TEST_UTIL_NET_H
#define BITCOIN_TEST_UTIL_NET_H
#include <attributes.h>
#include <compat/compat.h>
#include <netmessagemaker.h>
#include <net.h>
@@ -336,7 +337,15 @@ public:
* @param[in] pipes Send/recv pipes used by the Send() and Recv() methods.
* @param[in] accept_sockets Sockets to return by the Accept() method.
*/
explicit DynSock(std::shared_ptr<Pipes> pipes, std::shared_ptr<Queue> accept_sockets);
explicit DynSock(std::shared_ptr<Pipes> pipes, Queue* accept_sockets LIFETIMEBOUND);
/**
* Create a new mocked sock that represents a connected socket. It has pipes
* for data transport but there is no queue because connected sockets do
* not introduce new connected sockets.
* @param[in] pipes Send/recv pipes used by the Send() and Recv() methods.
*/
explicit DynSock(std::shared_ptr<Pipes> pipes);
~DynSock();
@@ -356,7 +365,7 @@ private:
DynSock& operator=(Sock&&) override;
std::shared_ptr<Pipes> m_pipes;
std::shared_ptr<Queue> m_accept_sockets;
Queue* const m_accept_sockets;
};
template <typename... Args>

View File

@@ -1,21 +0,0 @@
// Copyright (c) 2019-present The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <test/util/str.h>
#include <cstdint>
#include <string>
bool CaseInsensitiveEqual(const std::string& s1, const std::string& s2)
{
if (s1.size() != s2.size()) return false;
for (size_t i = 0; i < s1.size(); ++i) {
char c1 = s1[i];
if (c1 >= 'A' && c1 <= 'Z') c1 -= ('A' - 'a');
char c2 = s2[i];
if (c2 >= 'A' && c2 <= 'Z') c2 -= ('A' - 'a');
if (c1 != c2) return false;
}
return true;
}

View File

@@ -7,8 +7,6 @@
#include <string>
bool CaseInsensitiveEqual(const std::string& s1, const std::string& s2);
/**
* Increment a string. Useful to enumerate all fixed length strings with
* characters in [min_char, max_char].

View File

@@ -4,7 +4,6 @@
#include <util/strencodings.h>
#include <util/string.h>
#include <vector>
#include <boost/test/unit_test.hpp>
#include <test/util/common.h>
@@ -43,12 +42,6 @@ void FailFmtWithError(const char* wrong_fmt, std::string_view error)
BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers<WrongNumArgs>(wrong_fmt), const char*, HasReason{error});
}
std::vector<std::byte> StringToBuffer(const std::string& str)
{
auto span = std::as_bytes(std::span(str));
return {span.begin(), span.end()};
}
BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
{
PassFmt<0>("");
@@ -155,49 +148,42 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
HasReason{"tinyformat: Too many conversion specifiers in format string"});
}
BOOST_AUTO_TEST_CASE(ascii_case_insensitive_key_equal_test)
BOOST_AUTO_TEST_CASE(case_insensitive_equal_test)
{
AsciiCaseInsensitiveKeyEqual cmp;
BOOST_CHECK(!cmp("A", "B"));
BOOST_CHECK(!cmp("A", "b"));
BOOST_CHECK(!cmp("a", "B"));
BOOST_CHECK(!cmp("B", "A"));
BOOST_CHECK(!cmp("B", "a"));
BOOST_CHECK(!cmp("b", "A"));
BOOST_CHECK(!cmp("A", "AA"));
BOOST_CHECK(cmp("A-A", "a-a"));
BOOST_CHECK(cmp("A", "A"));
BOOST_CHECK(cmp("A", "a"));
BOOST_CHECK(cmp("a", "a"));
BOOST_CHECK(cmp("B", "b"));
BOOST_CHECK(cmp("ab", "aB"));
BOOST_CHECK(cmp("Ab", "aB"));
BOOST_CHECK(cmp("AB", "ab"));
BOOST_CHECK(!CaseInsensitiveEqual("A", "B"));
BOOST_CHECK(!CaseInsensitiveEqual("A", "b"));
BOOST_CHECK(!CaseInsensitiveEqual("a", "B"));
BOOST_CHECK(!CaseInsensitiveEqual("B", "A"));
BOOST_CHECK(!CaseInsensitiveEqual("B", "a"));
BOOST_CHECK(!CaseInsensitiveEqual("b", "A"));
BOOST_CHECK(!CaseInsensitiveEqual("A", "AA"));
BOOST_CHECK(CaseInsensitiveEqual("A-A", "a-a"));
BOOST_CHECK(CaseInsensitiveEqual("A", "A"));
BOOST_CHECK(CaseInsensitiveEqual("A", "a"));
BOOST_CHECK(CaseInsensitiveEqual("a", "a"));
BOOST_CHECK(CaseInsensitiveEqual("B", "b"));
BOOST_CHECK(CaseInsensitiveEqual("ab", "aB"));
BOOST_CHECK(CaseInsensitiveEqual("Ab", "aB"));
BOOST_CHECK(CaseInsensitiveEqual("AB", "ab"));
// Use a character with value > 127
// to ensure we don't trigger implicit-integer-sign-change
BOOST_CHECK(!cmp("a", "\xe4"));
}
BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test)
{
AsciiCaseInsensitiveHash hsh;
BOOST_CHECK_NE(hsh("A"), hsh("B"));
BOOST_CHECK_NE(hsh("AA"), hsh("A"));
BOOST_CHECK_EQUAL(hsh("A"), hsh("a"));
BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB"));
BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe"));
BOOST_CHECK(!CaseInsensitiveEqual("a", "\xe4"));
}
BOOST_AUTO_TEST_CASE(line_reader_test)
{
{
// Check three lines terminated by \n and \r\n, trimming whitespace
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")};
std::string_view input = "once upon a time\n there was a dog \r\nwho liked food\n";
LineReader reader(input, /*max_line_length=*/128);
BOOST_CHECK_EQUAL(reader.Consumed(), 0);
BOOST_CHECK_EQUAL(reader.Remaining(), 51);
std::optional<std::string> line1{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Consumed(), 17);
BOOST_CHECK_EQUAL(reader.Remaining(), 34);
std::optional<std::string> line2{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Consumed(), 36);
BOOST_CHECK_EQUAL(reader.Remaining(), 15);
std::optional<std::string> line3{reader.ReadLine()};
std::optional<std::string> line4{reader.ReadLine()};
@@ -208,11 +194,13 @@ BOOST_AUTO_TEST_CASE(line_reader_test)
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
BOOST_CHECK_EQUAL(reader.Consumed(), 51);
BOOST_CHECK_EQUAL(reader.Remaining(), 0);
}
{
// Do not exceed max_line_length + 1 while searching for \n
// Test with 22-character line + \n + 23-character line + \n
const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")};
std::string_view input = "once upon a time there\nwas a dog who liked tea\n";
LineReader reader1(input, /*max_line_length=*/22);
// First line is exactly the length of max_line_length
@@ -230,26 +218,26 @@ BOOST_AUTO_TEST_CASE(line_reader_test)
}
{
// Empty lines are empty
const std::vector<std::byte> input{StringToBuffer("\n")};
std::string_view input = "\n";
LineReader reader(input, /*max_line_length=*/1024);
BOOST_CHECK_EQUAL(reader.ReadLine(), "");
BOOST_CHECK(!reader.ReadLine());
}
{
// Empty buffers are null
const std::vector<std::byte> input{StringToBuffer("")};
std::string_view input;
LineReader reader(input, /*max_line_length=*/1024);
BOOST_CHECK(!reader.ReadLine());
}
{
// Even one character is too long, if it's not \n
const std::vector<std::byte> input{StringToBuffer("ab\n")};
std::string_view input = "ab\n";
LineReader reader(input, /*max_line_length=*/1);
// First line is +1 character too long
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
}
{
const std::vector<std::byte> input{StringToBuffer("a\nb\n")};
std::string_view input = "a\nb\n";
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
BOOST_CHECK_EQUAL(reader.ReadLine(), "b");
@@ -257,7 +245,7 @@ BOOST_AUTO_TEST_CASE(line_reader_test)
}
{
// If ReadLine fails, the iterator is reset and we can ReadLength instead
const std::vector<std::byte> input{StringToBuffer("a\nbaboon\n")};
std::string_view input = "a\nbaboon\n";
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
// "baboon" is too long
@@ -273,7 +261,7 @@ BOOST_AUTO_TEST_CASE(line_reader_test)
}
{
// The end of the buffer (EOB) does not count as end of line \n
const std::vector<std::byte> input{StringToBuffer("once upon a time there")};
std::string_view input = "once upon a time there";
LineReader reader(input, /*max_line_length=*/22);
// First line is exactly the length of max_line_length, but that doesn't matter because \n is missing
@@ -285,7 +273,7 @@ BOOST_AUTO_TEST_CASE(line_reader_test)
}
{
// Read specific number of bytes regardless of max_line_length or \n unless buffer is too short
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
std::string_view input = "once upon a time\n there was a dog \r\nwho liked food";
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");

View File

@@ -427,3 +427,16 @@ std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_mu
}
return *parsed_num * unit_amount;
}
bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2)
{
if (s1.size() != s2.size()) return false;
for (size_t i = 0; i < s1.size(); ++i) {
char c1 = s1[i];
if (c1 >= 'A' && c1 <= 'Z') c1 -= ('A' - 'a');
char c2 = s2[i];
if (c2 >= 'A' && c2 <= 'Z') c2 -= ('A' - 'a');
if (c1 != c2) return false;
}
return true;
}

View File

@@ -325,6 +325,14 @@ std::string Capitalize(std::string str);
*/
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
/**
* Locale-independent, ASCII-only comparator
* @param[in] s1 a string to compare
* @param[in] s2 another string to compare
* @returns true if s1 == s2 when both strings are converted to lowercase
*/
bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2);
namespace util {
/** consteval version of HexDigit() without the lookup table. */
consteval uint8_t ConstevalHexDigit(const char c)
@@ -353,20 +361,6 @@ struct Hex {
};
} // namespace detail
struct AsciiCaseInsensitiveKeyEqual {
bool operator()(std::string_view s1, std::string_view s2) const
{
return ToLower(s1) == ToLower(s2);
}
};
struct AsciiCaseInsensitiveHash {
size_t operator()(std::string_view s) const
{
return std::hash<std::string>{}(ToLower(s));
}
};
/**
* ""_hex is a compile-time user-defined literal returning a
* `std::array<std::byte>`, equivalent to ParseHex(). Variants provided:

View File

@@ -70,4 +70,9 @@ size_t LineReader::Remaining() const
{
return std::distance(it, end);
}
size_t LineReader::Consumed() const
{
return std::distance(start, it);
}
} // namespace util

View File

@@ -270,6 +270,7 @@ struct LineReader {
std::span<const std::byte>::iterator it;
explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
explicit LineReader(std::string_view str, size_t max_line_length) : LineReader{std::as_bytes(std::span{str}), max_line_length} {}
/**
* Returns a string from current iterator position up to (but not including) next \n
@@ -295,6 +296,11 @@ struct LineReader {
* Returns remaining size of bytes in buffer
*/
size_t Remaining() const;
/**
* Returns number of bytes already read from buffer
*/
size_t Consumed() const;
};
} // namespace util