string: add LineReader

This is a helper struct to parse HTTP messages from data in buffers
from sockets. HTTP messages begin with headers which are
CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of
body data. Whitespace is trimmed from the field lines but not the body.

https://httpwg.org/specs/rfc9110.html#rfc.section.5
This commit is contained in:
Matthew Zipkin
2024-09-27 15:22:17 -04:00
committed by Matthew Zipkin
parent ee62405cce
commit 1911db8c6d
3 changed files with 206 additions and 0 deletions

View File

@@ -4,6 +4,7 @@
#include <util/strencodings.h>
#include <util/string.h>
#include <vector>
#include <boost/test/unit_test.hpp>
#include <test/util/setup_common.h>
@@ -41,6 +42,12 @@ void FailFmtWithError(const char* wrong_fmt, std::string_view error)
BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers<WrongNumArgs>(wrong_fmt), const char*, HasReason{error});
}
std::vector<std::byte> StringToBuffer(const std::string& str)
{
auto span = std::as_bytes(std::span(str));
return {span.begin(), span.end()};
}
BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
{
PassFmt<0>("");
@@ -181,4 +188,114 @@ BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test)
BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe"));
}
BOOST_AUTO_TEST_CASE(line_reader_test)
{
{
// Check three lines terminated by \n and \r\n, trimming whitespace
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")};
LineReader reader(input, /*max_line_length=*/128);
std::optional<std::string> line1{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Remaining(), 34);
std::optional<std::string> line2{reader.ReadLine()};
BOOST_CHECK_EQUAL(reader.Remaining(), 15);
std::optional<std::string> line3{reader.ReadLine()};
std::optional<std::string> line4{reader.ReadLine()};
BOOST_CHECK(line1);
BOOST_CHECK(line2);
BOOST_CHECK(line3);
BOOST_CHECK(!line4);
BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
BOOST_CHECK_EQUAL(line3.value(), "who liked food");
}
{
// Do not exceed max_line_length + 1 while searching for \n
// Test with 22-character line + \n + 23-character line + \n
const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")};
LineReader reader1(input, /*max_line_length=*/22);
// First line is exactly the length of max_line_length
BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there");
// Second line is +1 character too long
BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
// Increase max_line_length by 1
LineReader reader2(input, /*max_line_length=*/23);
// Both lines fit within limit
BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there");
BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea");
// End of buffer reached
BOOST_CHECK(!reader2.ReadLine());
}
{
// Empty lines are empty
const std::vector<std::byte> input{StringToBuffer("\n")};
LineReader reader(input, /*max_line_length=*/1024);
BOOST_CHECK_EQUAL(reader.ReadLine(), "");
BOOST_CHECK(!reader.ReadLine());
}
{
// Empty buffers are null
const std::vector<std::byte> input{StringToBuffer("")};
LineReader reader(input, /*max_line_length=*/1024);
BOOST_CHECK(!reader.ReadLine());
}
{
// Even one character is too long, if it's not \n
const std::vector<std::byte> input{StringToBuffer("ab\n")};
LineReader reader(input, /*max_line_length=*/1);
// First line is +1 character too long
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
}
{
const std::vector<std::byte> input{StringToBuffer("a\nb\n")};
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
BOOST_CHECK_EQUAL(reader.ReadLine(), "b");
BOOST_CHECK(!reader.ReadLine());
}
{
// If ReadLine fails, the iterator is reset and we can ReadLength instead
const std::vector<std::byte> input{StringToBuffer("a\nbaboon\n")};
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
// "baboon" is too long
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
BOOST_CHECK_EQUAL(reader.ReadLength(1), "b");
BOOST_CHECK_EQUAL(reader.ReadLength(1), "a");
BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo");
// "on" is too long
BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
BOOST_CHECK_EQUAL(reader.ReadLength(1), "o");
BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line
BOOST_CHECK(!reader.ReadLine());
}
{
// The end of the buffer (EOB) does not count as end of line \n
const std::vector<std::byte> input{StringToBuffer("once upon a time there")};
LineReader reader(input, /*max_line_length=*/22);
// First line is exactly the length of max_line_length, but that doesn't matter because \n is missing
BOOST_CHECK(!reader.ReadLine());
// Data can still be read using ReadLength
BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there");
// End of buffer reached
BOOST_CHECK_EQUAL(reader.Remaining(), 0);
}
{
// Read specific number of bytes regardless of max_line_length or \n unless buffer is too short
const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
LineReader reader(input, /*max_line_length=*/1);
BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"});
// After the error the iterator is reset so we can try again
BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food");
// End of buffer reached
BOOST_CHECK_EQUAL(reader.Remaining(), 0);
}
}
BOOST_AUTO_TEST_SUITE_END()

View File

@@ -13,4 +13,58 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
if (search.empty()) return;
in_out = std::regex_replace(in_out, std::regex(search), substitute);
}
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_line_length)
: start(buffer.begin()), end(buffer.end()), max_line_length(max_line_length), it(buffer.begin()) {}
std::optional<std::string> LineReader::ReadLine()
{
if (it == end) {
return std::nullopt;
}
auto line_start = it;
size_t count = 0;
while (it != end) {
// Read a character from the incoming buffer and increment the iterator
auto c = static_cast<char>(*it);
++it;
++count;
// If the character we just consumed was \n, the line is terminated.
// The \n itself does not count against max_line_length.
if (c == '\n') {
const std::string_view untrimmed_line(reinterpret_cast<const char*>(std::to_address(line_start)), count);
const std::string_view line = TrimStringView(untrimmed_line); // delete leading and trailing whitespace including \r and \n
return std::string(line);
}
// If the character we just consumed gives us a line length greater
// than max_line_length, and we are not at the end of the line (or buffer) yet,
// that means the line we are currently reading is too long, and we throw.
if (count > max_line_length) {
// Reset iterator
it = line_start;
throw std::runtime_error("max_line_length exceeded by LineReader");
}
}
// End of buffer reached without finding a \n or exceeding max_line_length.
// Reset the iterator so the rest of the buffer can be read granularly
// with ReadLength() and return null to indicate a line was not found.
it = line_start;
return std::nullopt;
}
// Ignores max_line_length but won't overflow
std::string LineReader::ReadLength(size_t len)
{
if (len == 0) return "";
if (Remaining() < len) throw std::runtime_error("Not enough data in buffer");
std::string out(reinterpret_cast<const char*>(std::to_address(it)), len);
it += len;
return out;
}
size_t LineReader::Remaining() const
{
return std::distance(it, end);
}
} // namespace util

View File

@@ -11,6 +11,7 @@
#include <cstdint>
#include <cstring>
#include <locale>
#include <optional>
#include <sstream>
#include <string> // IWYU pragma: export
#include <string_view> // IWYU pragma: export
@@ -260,6 +261,40 @@ template <typename T1, size_t PREFIX_LEN>
return obj.size() >= PREFIX_LEN &&
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
}
struct LineReader {
const std::span<const std::byte>::iterator start;
const std::span<const std::byte>::iterator end;
const size_t max_line_length;
std::span<const std::byte>::iterator it;
explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
/**
* Returns a string from current iterator position up to (but not including) next \n
* and advances iterator to the character following the \n on success.
* Will not return a line longer than max_line_length.
* @returns the next string from the buffer.
* std::nullopt if end of buffer is reached without finding a \n.
* @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n.
*/
std::optional<std::string> ReadLine();
/**
* Returns string from current iterator position of specified length
* if possible and advances iterator on success.
* May exceed max_line_length but will not read past end of buffer.
* @param[in] len The number of bytes to read from the buffer
* @returns a string of the expected length.
* @throws a std::runtime_error if there is not enough data in the buffer.
*/
std::string ReadLength(size_t len);
/**
* Returns remaining size of bytes in buffer
*/
size_t Remaining() const;
};
} // namespace util
#endif // BITCOIN_UTIL_STRING_H