From 5676aec1e1a6d2c6fd3099e120e263a0a7def089 Mon Sep 17 00:00:00 2001
From: josibake <josibake@protonmail.com>
Date: Tue, 18 Jul 2023 22:22:26 +0200
Subject: [PATCH 1/2] refactor: Model the bech32 charlimit as an Enum

Bech32(m) was defined with a 90 character limit so that certain
guarantees for error detection could be made for segwit addresses.
However, there is nothing about the encoding scheme itself that requires
a limit and in practice bech32(m) has been used without the 90 char
limit (e.g. lightning invoices).

Further, increasing the character limit doesn't do away with error
detection, it simply lessons the guarantees.

Model charlimit as an Enum, so that if a different address scheme is
using bech32(m), the character limit for that address scheme can be
used, rather than always using the 90 charlimit defined for segwit
addresses.

upate comment
---
 src/bech32.cpp | 13 +++++++------
 src/bech32.h   | 12 ++++++++++--
 2 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/bech32.cpp b/src/bech32.cpp
index ba3c419d8b6..6a0956f1b68 100644
--- a/src/bech32.cpp
+++ b/src/bech32.cpp
@@ -370,11 +370,12 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values
 }
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str) {
+DecodeResult Decode(const std::string& str, CharLimit limit) {
     std::vector<int> errors;
     if (!CheckCharacters(str, errors)) return {};
     size_t pos = str.rfind('1');
-    if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
+    if (str.size() > limit) return {};
+    if (pos == str.npos || pos == 0 || pos + 7 > str.size()) {
         return {};
     }
     data values(str.size() - 1 - pos);
@@ -397,12 +398,12 @@ DecodeResult Decode(const std::string& str) {
 }
 
 /** Find index of an incorrect character in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) {
     std::vector<int> error_locations{};
 
-    if (str.size() > 90) {
-        error_locations.resize(str.size() - 90);
-        std::iota(error_locations.begin(), error_locations.end(), 90);
+    if (str.size() > limit) {
+        error_locations.resize(str.size() - limit);
+        std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit));
         return std::make_pair("Bech32 string too long", std::move(error_locations));
     }
 
diff --git a/src/bech32.h b/src/bech32.h
index 5e89e6efdaa..fe2a276ae07 100644
--- a/src/bech32.h
+++ b/src/bech32.h
@@ -28,6 +28,14 @@ enum class Encoding {
     BECH32M, //!< Bech32m encoding as defined in BIP350
 };
 
+/** Character limits for Bech32(m) encoded strings. Character limits are how we provide error location guarantees.
+ *  These values should never exceed 2^31 - 1 (max value for a 32-bit int), since there are places where we may need to
+ *  convert the CharLimit::VALUE to an int. In practice, this should never happen since this CharLimit applies to an address encoding
+ *  and we would never encode an address with such a massive value */
+enum CharLimit : size_t {
+    BECH32 = 90,            //!< BIP173/350 imposed character limit for Bech32(m) encoded addresses. This guarantees finding up to 4 errors.
+};
+
 /** Encode a Bech32 or Bech32m string. If hrp contains uppercase characters, this will cause an
  *  assertion error. Encoding must be one of BECH32 or BECH32M. */
 std::string Encode(Encoding encoding, const std::string& hrp, const std::vector<uint8_t>& values);
@@ -43,10 +51,10 @@ struct DecodeResult
 };
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str);
+DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::BECH32);
 
 /** Return the positions of errors in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str);
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::BECH32);
 
 } // namespace bech32
 

From 7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C5=91rinc?= <lorinc.pap@gmail.com>
Date: Sat, 9 Mar 2024 13:32:38 +0100
Subject: [PATCH 2/2] refactor: replace hardcoded numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace ExpandHRP with a PreparePolynomialCoefficients function. Instead
of using a hardcoded value for the size of the array (90 in this case)
and a hardcoded value for the checksum, use the actual values vector and
define checksum size as a constexpr. Use the new CHECKSUM_SIZE
throughout instead 6.

Co-authored-by: Lőrinc <pap.lorinc@gmail.com>
---
 src/bech32.cpp | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/bech32.cpp b/src/bech32.cpp
index 6a0956f1b68..c3c4ca8006f 100644
--- a/src/bech32.cpp
+++ b/src/bech32.cpp
@@ -19,6 +19,9 @@ namespace
 
 typedef std::vector<uint8_t> data;
 
+/** The Bech32 and Bech32m checksum size */
+constexpr size_t CHECKSUM_SIZE = 6;
+
 /** The Bech32 and Bech32m character set for encoding. */
 const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
 
@@ -308,18 +311,18 @@ bool CheckCharacters(const std::string& str, std::vector<int>& errors)
     return errors.empty();
 }
 
-/** Expand a HRP for use in checksum computation. */
-data ExpandHRP(const std::string& hrp)
+std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
 {
     data ret;
-    ret.reserve(hrp.size() + 90);
-    ret.resize(hrp.size() * 2 + 1);
-    for (size_t i = 0; i < hrp.size(); ++i) {
-        unsigned char c = hrp[i];
-        ret[i] = c >> 5;
-        ret[i + hrp.size() + 1] = c & 0x1f;
-    }
-    ret[hrp.size()] = 0;
+    ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE);
+
+    /** Expand a HRP for use in checksum computation. */
+    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
+    ret.push_back(0);
+    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
+
+    ret.insert(ret.end(), values.begin(), values.end());
+
     return ret;
 }
 
@@ -331,7 +334,8 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
     // list of values would result in a new valid list. For that reason, Bech32 requires the
     // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
     // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
-    const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values));
+    auto enc = PreparePolynomialCoefficients(hrp, values);
+    const uint32_t check = PolyMod(enc);
     if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
     if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
     return Encoding::INVALID;
@@ -340,11 +344,11 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
 /** Create a checksum. */
 data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
 {
-    data enc = Cat(ExpandHRP(hrp), values);
-    enc.resize(enc.size() + 6); // Append 6 zeroes
+    auto enc = PreparePolynomialCoefficients(hrp, values);
+    enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
     uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
-    data ret(6);
-    for (size_t i = 0; i < 6; ++i) {
+    data ret(CHECKSUM_SIZE);
+    for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
         // Convert the 5-bit groups in mod to checksum values.
         ret[i] = (mod >> (5 * (5 - i))) & 31;
     }
@@ -375,7 +379,7 @@ DecodeResult Decode(const std::string& str, CharLimit limit) {
     if (!CheckCharacters(str, errors)) return {};
     size_t pos = str.rfind('1');
     if (str.size() > limit) return {};
-    if (pos == str.npos || pos == 0 || pos + 7 > str.size()) {
+    if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
         return {};
     }
     data values(str.size() - 1 - pos);
@@ -394,7 +398,7 @@ DecodeResult Decode(const std::string& str, CharLimit limit) {
     }
     Encoding result = VerifyChecksum(hrp, values);
     if (result == Encoding::INVALID) return {};
-    return {result, std::move(hrp), data(values.begin(), values.end() - 6)};
+    return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
 }
 
 /** Find index of an incorrect character in a Bech32 string. */
@@ -415,7 +419,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
     if (pos == str.npos) {
         return std::make_pair("Missing separator", std::vector<int>{});
     }
-    if (pos == 0 || pos + 7 > str.size()) {
+    if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
         error_locations.push_back(pos);
         return std::make_pair("Invalid separator position", std::move(error_locations));
     }
@@ -442,9 +446,10 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
     std::optional<Encoding> error_encoding;
     for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
         std::vector<int> possible_errors;
-        // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
+        // Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
         // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
-        uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
+        auto enc = PreparePolynomialCoefficients(hrp, values);
+        uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
 
         // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
         // constant) should be 0 - hence 0 indicates there are no errors present.