string: add LineReader

This is a helper struct to parse HTTP messages from data in buffers
from sockets. HTTP messages begin with headers which are
CRLF-terminated lines (\n or \r\n) followed by an arbitrary amount of
body data. Whitespace is trimmed from the field lines but not the body.

https://httpwg.org/specs/rfc9110.html#rfc.section.5
This commit is contained in:
Matthew Zipkin
2024-09-27 15:22:17 -04:00
committed by Matthew Zipkin
parent ee62405cce
commit 1911db8c6d
3 changed files with 206 additions and 0 deletions

View File

@@ -13,4 +13,58 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
if (search.empty()) return;
in_out = std::regex_replace(in_out, std::regex(search), substitute);
}
LineReader::LineReader(std::span<const std::byte> buffer, size_t max_line_length)
: start(buffer.begin()), end(buffer.end()), max_line_length(max_line_length), it(buffer.begin()) {}
std::optional<std::string> LineReader::ReadLine()
{
if (it == end) {
return std::nullopt;
}
auto line_start = it;
size_t count = 0;
while (it != end) {
// Read a character from the incoming buffer and increment the iterator
auto c = static_cast<char>(*it);
++it;
++count;
// If the character we just consumed was \n, the line is terminated.
// The \n itself does not count against max_line_length.
if (c == '\n') {
const std::string_view untrimmed_line(reinterpret_cast<const char*>(std::to_address(line_start)), count);
const std::string_view line = TrimStringView(untrimmed_line); // delete leading and trailing whitespace including \r and \n
return std::string(line);
}
// If the character we just consumed gives us a line length greater
// than max_line_length, and we are not at the end of the line (or buffer) yet,
// that means the line we are currently reading is too long, and we throw.
if (count > max_line_length) {
// Reset iterator
it = line_start;
throw std::runtime_error("max_line_length exceeded by LineReader");
}
}
// End of buffer reached without finding a \n or exceeding max_line_length.
// Reset the iterator so the rest of the buffer can be read granularly
// with ReadLength() and return null to indicate a line was not found.
it = line_start;
return std::nullopt;
}
// Ignores max_line_length but won't overflow
std::string LineReader::ReadLength(size_t len)
{
if (len == 0) return "";
if (Remaining() < len) throw std::runtime_error("Not enough data in buffer");
std::string out(reinterpret_cast<const char*>(std::to_address(it)), len);
it += len;
return out;
}
size_t LineReader::Remaining() const
{
return std::distance(it, end);
}
} // namespace util

View File

@@ -11,6 +11,7 @@
#include <cstdint>
#include <cstring>
#include <locale>
#include <optional>
#include <sstream>
#include <string> // IWYU pragma: export
#include <string_view> // IWYU pragma: export
@@ -260,6 +261,40 @@ template <typename T1, size_t PREFIX_LEN>
return obj.size() >= PREFIX_LEN &&
std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
}
struct LineReader {
const std::span<const std::byte>::iterator start;
const std::span<const std::byte>::iterator end;
const size_t max_line_length;
std::span<const std::byte>::iterator it;
explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
/**
* Returns a string from current iterator position up to (but not including) next \n
* and advances iterator to the character following the \n on success.
* Will not return a line longer than max_line_length.
* @returns the next string from the buffer.
* std::nullopt if end of buffer is reached without finding a \n.
* @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n.
*/
std::optional<std::string> ReadLine();
/**
* Returns string from current iterator position of specified length
* if possible and advances iterator on success.
* May exceed max_line_length but will not read past end of buffer.
* @param[in] len The number of bytes to read from the buffer
* @returns a string of the expected length.
* @throws a std::runtime_error if there is not enough data in the buffer.
*/
std::string ReadLength(size_t len);
/**
* Returns remaining size of bytes in buffer
*/
size_t Remaining() const;
};
} // namespace util
#endif // BITCOIN_UTIL_STRING_H