bitcoin/src/util/strencodings.h
MarcoFalke 9394964f6b
Merge bitcoin/bitcoin#23451: span: Add std::byte helpers
faa3ec2304051be7cfbe301cfbfbda3faf7514fc span: Add std::byte helpers (MarcoFalke)
fa18038f519db76befb9a7bd0b1540143bfeb12b refactor: Use ignore helper when unserializing an invalid pubkey (MarcoFalke)
fabe18d0b39b4b918bf60e3a313eaa36fb4067f2 Use value_type in CDataStream where possible (MarcoFalke)

Pull request description:

  This adds (currently unused) span std::byte helpers, so that they can be used in new code.

  The refactors are also required for https://github.com/bitcoin/bitcoin/pull/23438, but they are split up because the other pull doesn't compile with msvc right now.

  The third commit is not needed for the other pull, but still nice.

ACKs for top commit:
  klementtan:
    reACK  faa3ec2. Verified that all the new `std::byte` helper functions are tested.
  laanwj:
    Code review ACK faa3ec2304051be7cfbe301cfbfbda3faf7514fc

Tree-SHA512: b1f6af39f03ea4dfebf20d4a8538fa993a6104e7fc92ddf0c4606a7efc3ca9a8c1a4741d98a1418569c11bb9ce9258bf0c0c06d93d85ed7e208902a2db04e407
2021-11-24 11:04:37 +01:00

341 lines
12 KiB
C++

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2020 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
/**
* Utilities for converting data from/to strings.
*/
#ifndef BITCOIN_UTIL_STRENCODINGS_H
#define BITCOIN_UTIL_STRENCODINGS_H
#include <attributes.h>
#include <span.h>
#include <util/string.h>
#include <charconv>
#include <cstdint>
#include <iterator>
#include <optional>
#include <string>
#include <vector>
/** Used by SanitizeString() */
enum SafeChars
{
SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
};
/**
* Used by ParseByteUnits()
* Lowercase base 1000
* Uppercase base 1024
*/
enum class ByteUnit : uint64_t {
NOOP = 1ULL,
k = 1000ULL,
K = 1024ULL,
m = 1'000'000ULL,
M = 1ULL << 20,
g = 1'000'000'000ULL,
G = 1ULL << 30,
t = 1'000'000'000'000ULL,
T = 1ULL << 40,
};
/**
* Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
* addresses, but avoid anything even possibly remotely dangerous like & or >
* @param[in] str The string to sanitize
* @param[in] rule The set of safe chars to choose (default: least restrictive)
* @return A new string without unsafe chars
*/
std::string SanitizeString(const std::string& str, int rule = SAFE_CHARS_DEFAULT);
std::vector<unsigned char> ParseHex(const char* psz);
std::vector<unsigned char> ParseHex(const std::string& str);
signed char HexDigit(char c);
/* Returns true if each character in str is a hex character, and has an even
* number of hex digits.*/
bool IsHex(const std::string& str);
/**
* Return true if the string is a hex number, optionally prefixed with "0x"
*/
bool IsHexNumber(const std::string& str);
std::vector<unsigned char> DecodeBase64(const char* p, bool* pf_invalid = nullptr);
std::string DecodeBase64(const std::string& str, bool* pf_invalid = nullptr);
std::string EncodeBase64(Span<const unsigned char> input);
inline std::string EncodeBase64(Span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
inline std::string EncodeBase64(const std::string& str) { return EncodeBase64(MakeUCharSpan(str)); }
std::vector<unsigned char> DecodeBase32(const char* p, bool* pf_invalid = nullptr);
std::string DecodeBase32(const std::string& str, bool* pf_invalid = nullptr);
/**
* Base32 encode.
* If `pad` is true, then the output will be padded with '=' so that its length
* is a multiple of 8.
*/
std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);
/**
* Base32 encode.
* If `pad` is true, then the output will be padded with '=' so that its length
* is a multiple of 8.
*/
std::string EncodeBase32(const std::string& str, bool pad = true);
void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut);
// LocaleIndependentAtoi is provided for backwards compatibility reasons.
//
// New code should use ToIntegral or the ParseInt* functions
// which provide parse error feedback.
//
// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour
// of atoi and atoi64 as they behave under the "C" locale.
template <typename T>
T LocaleIndependentAtoi(const std::string& str)
{
static_assert(std::is_integral<T>::value);
T result;
// Emulate atoi(...) handling of white space and leading +/-.
std::string s = TrimString(str);
if (!s.empty() && s[0] == '+') {
if (s.length() >= 2 && s[1] == '-') {
return 0;
}
s = s.substr(1);
}
auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
if (error_condition != std::errc{}) {
return 0;
}
return result;
}
/**
* Tests if the given character is a decimal digit.
* @param[in] c character to test
* @return true if the argument is a decimal digit; otherwise false.
*/
constexpr bool IsDigit(char c)
{
return c >= '0' && c <= '9';
}
/**
* Tests if the given character is a whitespace character. The whitespace characters
* are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
* tab ('\t'), and vertical tab ('\v').
*
* This function is locale independent. Under the C locale this function gives the
* same result as std::isspace.
*
* @param[in] c character to test
* @return true if the argument is a whitespace character; otherwise false
*/
constexpr inline bool IsSpace(char c) noexcept {
return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
}
/**
* Convert string to integral type T. Leading whitespace, a leading +, or any
* trailing character fail the parsing. The required format expressed as regex
* is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
*
* @returns std::nullopt if the entire string could not be parsed, or if the
* parsed value is not in the range representable by the type T.
*/
template <typename T>
std::optional<T> ToIntegral(const std::string& str)
{
static_assert(std::is_integral<T>::value);
T result;
const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
return std::nullopt;
}
return result;
}
/**
* Convert string to signed 32-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseInt32(const std::string& str, int32_t *out);
/**
* Convert string to signed 64-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseInt64(const std::string& str, int64_t *out);
/**
* Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseUInt8(const std::string& str, uint8_t *out);
/**
* Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if the entire string could not be parsed or if overflow or underflow occurred.
*/
[[nodiscard]] bool ParseUInt16(const std::string& str, uint16_t* out);
/**
* Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseUInt32(const std::string& str, uint32_t *out);
/**
* Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
[[nodiscard]] bool ParseUInt64(const std::string& str, uint64_t *out);
/**
* Convert a span of bytes to a lower-case hexadecimal string.
*/
std::string HexStr(const Span<const uint8_t> s);
inline std::string HexStr(const Span<const char> s) { return HexStr(MakeUCharSpan(s)); }
inline std::string HexStr(const Span<const std::byte> s) { return HexStr(MakeUCharSpan(s)); }
/**
* Format a paragraph of text to a fixed width, adding spaces for
* indentation to any added line.
*/
std::string FormatParagraph(const std::string& in, size_t width = 79, size_t indent = 0);
/**
* Timing-attack-resistant comparison.
* Takes time proportional to length
* of first argument.
*/
template <typename T>
bool TimingResistantEqual(const T& a, const T& b)
{
if (b.size() == 0) return a.size() == 0;
size_t accumulator = a.size() ^ b.size();
for (size_t i = 0; i < a.size(); i++)
accumulator |= a[i] ^ b[i%b.size()];
return accumulator == 0;
}
/** Parse number as fixed point according to JSON number syntax.
* See https://json.org/number.gif
* @returns true on success, false on error.
* @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
*/
[[nodiscard]] bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out);
/** Convert from one power-of-2 number base to another. */
template<int frombits, int tobits, bool pad, typename O, typename I>
bool ConvertBits(const O& outfn, I it, I end) {
size_t acc = 0;
size_t bits = 0;
constexpr size_t maxv = (1 << tobits) - 1;
constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
while (it != end) {
acc = ((acc << frombits) | *it) & max_acc;
bits += frombits;
while (bits >= tobits) {
bits -= tobits;
outfn((acc >> bits) & maxv);
}
++it;
}
if (pad) {
if (bits) outfn((acc << (tobits - bits)) & maxv);
} else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
return false;
}
return true;
}
/**
* Converts the given character to its lowercase equivalent.
* This function is locale independent. It only converts uppercase
* characters in the standard 7-bit ASCII range.
* This is a feature, not a limitation.
*
* @param[in] c the character to convert to lowercase.
* @return the lowercase equivalent of c; or the argument
* if no conversion is possible.
*/
constexpr char ToLower(char c)
{
return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
}
/**
* Returns the lowercase equivalent of the given string.
* This function is locale independent. It only converts uppercase
* characters in the standard 7-bit ASCII range.
* This is a feature, not a limitation.
*
* @param[in] str the string to convert to lowercase.
* @returns lowercased equivalent of str
*/
std::string ToLower(const std::string& str);
/**
* Converts the given character to its uppercase equivalent.
* This function is locale independent. It only converts lowercase
* characters in the standard 7-bit ASCII range.
* This is a feature, not a limitation.
*
* @param[in] c the character to convert to uppercase.
* @return the uppercase equivalent of c; or the argument
* if no conversion is possible.
*/
constexpr char ToUpper(char c)
{
return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
}
/**
* Returns the uppercase equivalent of the given string.
* This function is locale independent. It only converts lowercase
* characters in the standard 7-bit ASCII range.
* This is a feature, not a limitation.
*
* @param[in] str the string to convert to uppercase.
* @returns UPPERCASED EQUIVALENT OF str
*/
std::string ToUpper(const std::string& str);
/**
* Capitalizes the first character of the given string.
* This function is locale independent. It only converts lowercase
* characters in the standard 7-bit ASCII range.
* This is a feature, not a limitation.
*
* @param[in] str the string to capitalize.
* @returns string with the first letter capitalized.
*/
std::string Capitalize(std::string str);
/**
* Parse a string with suffix unit [k|K|m|M|g|G|t|T].
* Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
* Lowercase units are 1000 base. Uppercase units are 1024 base.
* Examples: 2m,27M,19g,41T
*
* @param[in] str the string to convert into bytes
* @param[in] default_multiplier if no unit is found in str use this unit
* @returns optional uint64_t bytes from str or nullopt
* if ToIntegral is false, str is empty, trailing whitespace or overflow
*/
std::optional<uint64_t> ParseByteUnits(const std::string& str, ByteUnit default_multiplier);
#endif // BITCOIN_UTIL_STRENCODINGS_H