From 1911db8c6dc6b32c8971b14b2b271ec39d9f3ab9 Mon Sep 17 00:00:00 2001 From: Matthew Zipkin Date: Fri, 27 Sep 2024 15:22:17 -0400 Subject: [PATCH] string: add LineReader This is a helper struct to parse HTTP messages from data in buffers from sockets. HTTP messages begin with headers which are CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of body data. Whitespace is trimmed from the field lines but not the body. https://httpwg.org/specs/rfc9110.html#rfc.section.5 --- src/test/util_string_tests.cpp | 117 +++++++++++++++++++++++++++++++++ src/util/string.cpp | 54 +++++++++++++++ src/util/string.h | 35 ++++++++++ 3 files changed, 206 insertions(+) diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp index 4a49e5d0905..9f7513fbb30 100644 --- a/src/test/util_string_tests.cpp +++ b/src/test/util_string_tests.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -41,6 +42,12 @@ void FailFmtWithError(const char* wrong_fmt, std::string_view error) BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers(wrong_fmt), const char*, HasReason{error}); } +std::vector StringToBuffer(const std::string& str) +{ + auto span = std::as_bytes(std::span(str)); + return {span.begin(), span.end()}; +} + BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) { PassFmt<0>(""); @@ -181,4 +188,114 @@ BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test) BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe")); } +BOOST_AUTO_TEST_CASE(line_reader_test) +{ + { + // Check three lines terminated by \n and \r\n, trimming whitespace + const std::vector input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")}; + LineReader reader(input, /*max_line_length=*/128); + std::optional line1{reader.ReadLine()}; + BOOST_CHECK_EQUAL(reader.Remaining(), 34); + std::optional line2{reader.ReadLine()}; + BOOST_CHECK_EQUAL(reader.Remaining(), 15); + std::optional line3{reader.ReadLine()}; + std::optional line4{reader.ReadLine()}; + BOOST_CHECK(line1); + BOOST_CHECK(line2); + BOOST_CHECK(line3); + BOOST_CHECK(!line4); + BOOST_CHECK_EQUAL(line1.value(), "once upon a time"); + BOOST_CHECK_EQUAL(line2.value(), "there was a dog"); + BOOST_CHECK_EQUAL(line3.value(), "who liked food"); + } + { + // Do not exceed max_line_length + 1 while searching for \n + // Test with 22-character line + \n + 23-character line + \n + const std::vector input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")}; + + LineReader reader1(input, /*max_line_length=*/22); + // First line is exactly the length of max_line_length + BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there"); + // Second line is +1 character too long + BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + + // Increase max_line_length by 1 + LineReader reader2(input, /*max_line_length=*/23); + // Both lines fit within limit + BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there"); + BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea"); + // End of buffer reached + BOOST_CHECK(!reader2.ReadLine()); + } + { + // Empty lines are empty + const std::vector input{StringToBuffer("\n")}; + LineReader reader(input, /*max_line_length=*/1024); + BOOST_CHECK_EQUAL(reader.ReadLine(), ""); + BOOST_CHECK(!reader.ReadLine()); + } + { + // Empty buffers are null + const std::vector input{StringToBuffer("")}; + LineReader reader(input, /*max_line_length=*/1024); + BOOST_CHECK(!reader.ReadLine()); + } + { + // Even one character is too long, if it's not \n + const std::vector input{StringToBuffer("ab\n")}; + LineReader reader(input, /*max_line_length=*/1); + // First line is +1 character too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + } + { + const std::vector input{StringToBuffer("a\nb\n")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); + BOOST_CHECK_EQUAL(reader.ReadLine(), "b"); + BOOST_CHECK(!reader.ReadLine()); + } + { + // If ReadLine fails, the iterator is reset and we can ReadLength instead + const std::vector input{StringToBuffer("a\nbaboon\n")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); + // "baboon" is too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "b"); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "a"); + BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo"); + // "on" is too long + BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); + BOOST_CHECK_EQUAL(reader.ReadLength(1), "o"); + BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line + BOOST_CHECK(!reader.ReadLine()); + } + { + // The end of the buffer (EOB) does not count as end of line \n + const std::vector input{StringToBuffer("once upon a time there")}; + + LineReader reader(input, /*max_line_length=*/22); + // First line is exactly the length of max_line_length, but that doesn't matter because \n is missing + BOOST_CHECK(!reader.ReadLine()); + // Data can still be read using ReadLength + BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there"); + // End of buffer reached + BOOST_CHECK_EQUAL(reader.Remaining(), 0); + } + { + // Read specific number of bytes regardless of max_line_length or \n unless buffer is too short + const std::vector input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")}; + LineReader reader(input, /*max_line_length=*/1); + BOOST_CHECK_EQUAL(reader.ReadLength(0), ""); + BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc"); + BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a"); + BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t"); + BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"}); + // After the error the iterator is reset so we can try again + BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food"); + // End of buffer reached + BOOST_CHECK_EQUAL(reader.Remaining(), 0); + } +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/string.cpp b/src/util/string.cpp index 507d9d31718..c3b4b474d34 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -13,4 +13,58 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin if (search.empty()) return; in_out = std::regex_replace(in_out, std::regex(search), substitute); } + +LineReader::LineReader(std::span buffer, size_t max_line_length) + : start(buffer.begin()), end(buffer.end()), max_line_length(max_line_length), it(buffer.begin()) {} + +std::optional LineReader::ReadLine() +{ + if (it == end) { + return std::nullopt; + } + + auto line_start = it; + size_t count = 0; + while (it != end) { + // Read a character from the incoming buffer and increment the iterator + auto c = static_cast(*it); + ++it; + ++count; + // If the character we just consumed was \n, the line is terminated. + // The \n itself does not count against max_line_length. + if (c == '\n') { + const std::string_view untrimmed_line(reinterpret_cast(std::to_address(line_start)), count); + const std::string_view line = TrimStringView(untrimmed_line); // delete leading and trailing whitespace including \r and \n + return std::string(line); + } + // If the character we just consumed gives us a line length greater + // than max_line_length, and we are not at the end of the line (or buffer) yet, + // that means the line we are currently reading is too long, and we throw. + if (count > max_line_length) { + // Reset iterator + it = line_start; + throw std::runtime_error("max_line_length exceeded by LineReader"); + } + } + // End of buffer reached without finding a \n or exceeding max_line_length. + // Reset the iterator so the rest of the buffer can be read granularly + // with ReadLength() and return null to indicate a line was not found. + it = line_start; + return std::nullopt; +} + +// Ignores max_line_length but won't overflow +std::string LineReader::ReadLength(size_t len) +{ + if (len == 0) return ""; + if (Remaining() < len) throw std::runtime_error("Not enough data in buffer"); + std::string out(reinterpret_cast(std::to_address(it)), len); + it += len; + return out; +} + +size_t LineReader::Remaining() const +{ + return std::distance(it, end); +} } // namespace util diff --git a/src/util/string.h b/src/util/string.h index 330c2a2a61e..048e83ba6fa 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include // IWYU pragma: export #include // IWYU pragma: export @@ -260,6 +261,40 @@ template return obj.size() >= PREFIX_LEN && std::equal(std::begin(prefix), std::end(prefix), std::begin(obj)); } + +struct LineReader { + const std::span::iterator start; + const std::span::iterator end; + const size_t max_line_length; + std::span::iterator it; + + explicit LineReader(std::span buffer, size_t max_line_length); + + /** + * Returns a string from current iterator position up to (but not including) next \n + * and advances iterator to the character following the \n on success. + * Will not return a line longer than max_line_length. + * @returns the next string from the buffer. + * std::nullopt if end of buffer is reached without finding a \n. + * @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n. + */ + std::optional ReadLine(); + + /** + * Returns string from current iterator position of specified length + * if possible and advances iterator on success. + * May exceed max_line_length but will not read past end of buffer. + * @param[in] len The number of bytes to read from the buffer + * @returns a string of the expected length. + * @throws a std::runtime_error if there is not enough data in the buffer. + */ + std::string ReadLength(size_t len); + + /** + * Returns remaining size of bytes in buffer + */ + size_t Remaining() const; +}; } // namespace util #endif // BITCOIN_UTIL_STRING_H