util/string: Allow Split to include the separator

When splitting a string, sometimes the separator needs to be included.
Split will now optionally include the separator at the end of the left
side of the splits, i.e. it appears at the end of the splits, except
for the last one.

Specifically, for musig() descriptors, Split is used to separate a
musig() from any derivation path that follows it by splitting on the
closing parentheses. Since that parentheses is needed for Func() and
Expr(), Split() needs to preserve the end parentheses instead of
discarding it.
This commit is contained in:
Ava Chow
2025-04-14 13:31:31 -07:00
parent 8811312571
commit 12bc1d0b1e
2 changed files with 29 additions and 4 deletions

View File

@@ -1246,6 +1246,12 @@ BOOST_AUTO_TEST_CASE(test_script_parsing)
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "two");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "three");
results = Split(input, '#', /*include_sep=*/true);
BOOST_CHECK_EQUAL(results.size(), 3U);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "one#");
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "two#");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "three");
input = "*foo*bar*";
results = Split(input, '*');
BOOST_CHECK_EQUAL(results.size(), 4U);
@@ -1253,6 +1259,13 @@ BOOST_AUTO_TEST_CASE(test_script_parsing)
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "foo");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "bar");
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
results = Split(input, '*', /*include_sep=*/true);
BOOST_CHECK_EQUAL(results.size(), 4U);
BOOST_CHECK_EQUAL(SpanToStr(results[0]), "*");
BOOST_CHECK_EQUAL(SpanToStr(results[1]), "foo*");
BOOST_CHECK_EQUAL(SpanToStr(results[2]), "bar*");
BOOST_CHECK_EQUAL(SpanToStr(results[3]), "");
}
BOOST_AUTO_TEST_CASE(test_SplitString)

View File

@@ -100,18 +100,30 @@ void ReplaceAll(std::string& in_out, const std::string& search, const std::strin
*
* If sep does not occur in sp, a singleton with the entirety of sp is returned.
*
* @param[in] include_sep Whether to include the separator at the end of the left side of the splits.
*
* Note that this function does not care about braces, so splitting
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
*
* If include_sep == true, splitting "foo(bar(1),2),3) on ','
* will return:
* - foo(bar(1),
* - 2),
* - 3)
*/
template <typename T = std::span<const char>>
std::vector<T> Split(const std::span<const char>& sp, std::string_view separators)
std::vector<T> Split(const std::span<const char>& sp, std::string_view separators, bool include_sep = false)
{
std::vector<T> ret;
auto it = sp.begin();
auto start = it;
while (it != sp.end()) {
if (separators.find(*it) != std::string::npos) {
ret.emplace_back(start, it);
if (include_sep) {
ret.emplace_back(start, it + 1);
} else {
ret.emplace_back(start, it);
}
start = it + 1;
}
++it;
@@ -128,9 +140,9 @@ std::vector<T> Split(const std::span<const char>& sp, std::string_view separator
* "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
*/
template <typename T = std::span<const char>>
std::vector<T> Split(const std::span<const char>& sp, char sep)
std::vector<T> Split(const std::span<const char>& sp, char sep, bool include_sep = false)
{
return Split<T>(sp, std::string_view{&sep, 1});
return Split<T>(sp, std::string_view{&sep, 1}, include_sep);
}
[[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep)