mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-06-16 17:48:59 +02:00
string: add LineReader
This is a helper struct to parse HTTP messages from data in buffers from sockets. HTTP messages begin with headers which are CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of body data. Whitespace is trimmed from the field lines but not the body. https://httpwg.org/specs/rfc9110.html#rfc.section.5
This commit is contained in:
committed by
Matthew Zipkin
parent
ee62405cce
commit
1911db8c6d
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <util/strencodings.h>
|
||||
#include <util/string.h>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <test/util/setup_common.h>
|
||||
@@ -41,6 +42,12 @@ void FailFmtWithError(const char* wrong_fmt, std::string_view error)
|
||||
BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers<WrongNumArgs>(wrong_fmt), const char*, HasReason{error});
|
||||
}
|
||||
|
||||
std::vector<std::byte> StringToBuffer(const std::string& str)
|
||||
{
|
||||
auto span = std::as_bytes(std::span(str));
|
||||
return {span.begin(), span.end()};
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
|
||||
{
|
||||
PassFmt<0>("");
|
||||
@@ -181,4 +188,114 @@ BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test)
|
||||
BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe"));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(line_reader_test)
|
||||
{
|
||||
{
|
||||
// Check three lines terminated by \n and \r\n, trimming whitespace
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")};
|
||||
LineReader reader(input, /*max_line_length=*/128);
|
||||
std::optional<std::string> line1{reader.ReadLine()};
|
||||
BOOST_CHECK_EQUAL(reader.Remaining(), 34);
|
||||
std::optional<std::string> line2{reader.ReadLine()};
|
||||
BOOST_CHECK_EQUAL(reader.Remaining(), 15);
|
||||
std::optional<std::string> line3{reader.ReadLine()};
|
||||
std::optional<std::string> line4{reader.ReadLine()};
|
||||
BOOST_CHECK(line1);
|
||||
BOOST_CHECK(line2);
|
||||
BOOST_CHECK(line3);
|
||||
BOOST_CHECK(!line4);
|
||||
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
|
||||
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
|
||||
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
|
||||
}
|
||||
{
|
||||
// Do not exceed max_line_length + 1 while searching for \n
|
||||
// Test with 22-character line + \n + 23-character line + \n
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")};
|
||||
|
||||
LineReader reader1(input, /*max_line_length=*/22);
|
||||
// First line is exactly the length of max_line_length
|
||||
BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there");
|
||||
// Second line is +1 character too long
|
||||
BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
|
||||
|
||||
// Increase max_line_length by 1
|
||||
LineReader reader2(input, /*max_line_length=*/23);
|
||||
// Both lines fit within limit
|
||||
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there");
|
||||
BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea");
|
||||
// End of buffer reached
|
||||
BOOST_CHECK(!reader2.ReadLine());
|
||||
}
|
||||
{
|
||||
// Empty lines are empty
|
||||
const std::vector<std::byte> input{StringToBuffer("\n")};
|
||||
LineReader reader(input, /*max_line_length=*/1024);
|
||||
BOOST_CHECK_EQUAL(reader.ReadLine(), "");
|
||||
BOOST_CHECK(!reader.ReadLine());
|
||||
}
|
||||
{
|
||||
// Empty buffers are null
|
||||
const std::vector<std::byte> input{StringToBuffer("")};
|
||||
LineReader reader(input, /*max_line_length=*/1024);
|
||||
BOOST_CHECK(!reader.ReadLine());
|
||||
}
|
||||
{
|
||||
// Even one character is too long, if it's not \n
|
||||
const std::vector<std::byte> input{StringToBuffer("ab\n")};
|
||||
LineReader reader(input, /*max_line_length=*/1);
|
||||
// First line is +1 character too long
|
||||
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
|
||||
}
|
||||
{
|
||||
const std::vector<std::byte> input{StringToBuffer("a\nb\n")};
|
||||
LineReader reader(input, /*max_line_length=*/1);
|
||||
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLine(), "b");
|
||||
BOOST_CHECK(!reader.ReadLine());
|
||||
}
|
||||
{
|
||||
// If ReadLine fails, the iterator is reset and we can ReadLength instead
|
||||
const std::vector<std::byte> input{StringToBuffer("a\nbaboon\n")};
|
||||
LineReader reader(input, /*max_line_length=*/1);
|
||||
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
|
||||
// "baboon" is too long
|
||||
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(1), "b");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(1), "a");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo");
|
||||
// "on" is too long
|
||||
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(1), "o");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line
|
||||
BOOST_CHECK(!reader.ReadLine());
|
||||
}
|
||||
{
|
||||
// The end of the buffer (EOB) does not count as end of line \n
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time there")};
|
||||
|
||||
LineReader reader(input, /*max_line_length=*/22);
|
||||
// First line is exactly the length of max_line_length, but that doesn't matter because \n is missing
|
||||
BOOST_CHECK(!reader.ReadLine());
|
||||
// Data can still be read using ReadLength
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there");
|
||||
// End of buffer reached
|
||||
BOOST_CHECK_EQUAL(reader.Remaining(), 0);
|
||||
}
|
||||
{
|
||||
// Read specific number of bytes regardless of max_line_length or \n unless buffer is too short
|
||||
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
|
||||
LineReader reader(input, /*max_line_length=*/1);
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
|
||||
BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"});
|
||||
// After the error the iterator is reset so we can try again
|
||||
BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food");
|
||||
// End of buffer reached
|
||||
BOOST_CHECK_EQUAL(reader.Remaining(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
||||
@@ -13,4 +13,58 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
|
||||
if (search.empty()) return;
|
||||
in_out = std::regex_replace(in_out, std::regex(search), substitute);
|
||||
}
|
||||
|
||||
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_line_length)
|
||||
: start(buffer.begin()), end(buffer.end()), max_line_length(max_line_length), it(buffer.begin()) {}
|
||||
|
||||
std::optional<std::string> LineReader::ReadLine()
|
||||
{
|
||||
if (it == end) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto line_start = it;
|
||||
size_t count = 0;
|
||||
while (it != end) {
|
||||
// Read a character from the incoming buffer and increment the iterator
|
||||
auto c = static_cast<char>(*it);
|
||||
++it;
|
||||
++count;
|
||||
// If the character we just consumed was \n, the line is terminated.
|
||||
// The \n itself does not count against max_line_length.
|
||||
if (c == '\n') {
|
||||
const std::string_view untrimmed_line(reinterpret_cast<const char*>(std::to_address(line_start)), count);
|
||||
const std::string_view line = TrimStringView(untrimmed_line); // delete leading and trailing whitespace including \r and \n
|
||||
return std::string(line);
|
||||
}
|
||||
// If the character we just consumed gives us a line length greater
|
||||
// than max_line_length, and we are not at the end of the line (or buffer) yet,
|
||||
// that means the line we are currently reading is too long, and we throw.
|
||||
if (count > max_line_length) {
|
||||
// Reset iterator
|
||||
it = line_start;
|
||||
throw std::runtime_error("max_line_length exceeded by LineReader");
|
||||
}
|
||||
}
|
||||
// End of buffer reached without finding a \n or exceeding max_line_length.
|
||||
// Reset the iterator so the rest of the buffer can be read granularly
|
||||
// with ReadLength() and return null to indicate a line was not found.
|
||||
it = line_start;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Ignores max_line_length but won't overflow
|
||||
std::string LineReader::ReadLength(size_t len)
|
||||
{
|
||||
if (len == 0) return "";
|
||||
if (Remaining() < len) throw std::runtime_error("Not enough data in buffer");
|
||||
std::string out(reinterpret_cast<const char*>(std::to_address(it)), len);
|
||||
it += len;
|
||||
return out;
|
||||
}
|
||||
|
||||
size_t LineReader::Remaining() const
|
||||
{
|
||||
return std::distance(it, end);
|
||||
}
|
||||
} // namespace util
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <locale>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string> // IWYU pragma: export
|
||||
#include <string_view> // IWYU pragma: export
|
||||
@@ -260,6 +261,40 @@ template <typename T1, size_t PREFIX_LEN>
|
||||
return obj.size() >= PREFIX_LEN &&
|
||||
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
|
||||
}
|
||||
|
||||
struct LineReader {
|
||||
const std::span<const std::byte>::iterator start;
|
||||
const std::span<const std::byte>::iterator end;
|
||||
const size_t max_line_length;
|
||||
std::span<const std::byte>::iterator it;
|
||||
|
||||
explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
|
||||
|
||||
/**
|
||||
* Returns a string from current iterator position up to (but not including) next \n
|
||||
* and advances iterator to the character following the \n on success.
|
||||
* Will not return a line longer than max_line_length.
|
||||
* @returns the next string from the buffer.
|
||||
* std::nullopt if end of buffer is reached without finding a \n.
|
||||
* @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n.
|
||||
*/
|
||||
std::optional<std::string> ReadLine();
|
||||
|
||||
/**
|
||||
* Returns string from current iterator position of specified length
|
||||
* if possible and advances iterator on success.
|
||||
* May exceed max_line_length but will not read past end of buffer.
|
||||
* @param[in] len The number of bytes to read from the buffer
|
||||
* @returns a string of the expected length.
|
||||
* @throws a std::runtime_error if there is not enough data in the buffer.
|
||||
*/
|
||||
std::string ReadLength(size_t len);
|
||||
|
||||
/**
|
||||
* Returns remaining size of bytes in buffer
|
||||
*/
|
||||
size_t Remaining() const;
|
||||
};
|
||||
} // namespace util
|
||||
|
||||
#endif // BITCOIN_UTIL_STRING_H
|
||||
|
||||
Reference in New Issue
Block a user