clusterlin: support fixing linearizations (feature)

This also updates FixLinearization to just be a thin wrapper around Linearize.
In a future commit, FixLinearization will be removed entirely.
This commit is contained in:
Pieter Wuille
2025-10-23 19:16:50 -04:00
parent 755f0900a2
commit 01ffcf464a
3 changed files with 44 additions and 54 deletions

View File

@@ -1338,8 +1338,9 @@ public:
* @param[in] rng_seed A random number seed to control search order. This prevents peers
* from predicting exactly which clusters would be hard for us to
* linearize.
* @param[in] old_linearization An existing linearization for the cluster (which must be
* topologically valid), or empty.
* @param[in] old_linearization An existing linearization for the cluster, or empty.
* @param[in] is_topological (Only relevant if old_linearization is not empty) Whether
* old_linearization is topologically valid.
* @return A tuple of:
* - The resulting linearization. It is guaranteed to be at least as
* good (in the feerate diagram sense) as old_linearization.
@@ -1348,12 +1349,13 @@ public:
* - How many optimization steps were actually performed.
*/
template<typename SetType>
std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}) noexcept
std::tuple<std::vector<DepGraphIndex>, bool, uint64_t> Linearize(const DepGraph<SetType>& depgraph, uint64_t max_iterations, uint64_t rng_seed, std::span<const DepGraphIndex> old_linearization = {}, bool is_topological = true) noexcept
{
/** Initialize a spanning forest data structure for this cluster. */
SpanningForestState forest(depgraph, rng_seed);
if (!old_linearization.empty()) {
forest.LoadLinearization(old_linearization);
if (!is_topological) forest.MakeTopological();
} else {
forest.MakeTopological();
}
@@ -1573,36 +1575,14 @@ void PostLinearize(const DepGraph<SetType>& depgraph, std::span<DepGraphIndex> l
}
}
/** Make linearization topological, retaining its ordering where possible. */
/** Make linearization topological, reusing information from the old linearization where possible. */
template<typename SetType>
void FixLinearization(const DepGraph<SetType>& depgraph, std::span<DepGraphIndex> linearization) noexcept
{
// This algorithm can be summarized as moving every element in the linearization backwards
// until it is placed after all its ancestors.
SetType done;
const auto len = linearization.size();
// Iterate over the elements of linearization from back to front (i is distance from back).
for (DepGraphIndex i = 0; i < len; ++i) {
/** The element at that position. */
DepGraphIndex elem = linearization[len - 1 - i];
/** j represents how far from the back of the linearization elem should be placed. */
DepGraphIndex j = i;
// Figure out which elements need to be moved before elem.
SetType place_before = done & depgraph.Ancestors(elem);
// Find which position to place elem in (updating j), continuously moving the elements
// in between forward.
while (place_before.Any()) {
// j cannot be 0 here; if it was, then there was necessarily nothing earlier which
// elem needs to be placed before anymore, and place_before would be empty.
Assume(j > 0);
auto to_swap = linearization[len - 1 - (j - 1)];
place_before.Reset(to_swap);
linearization[len - 1 - (j--)] = to_swap;
}
// Put elem in its final position and mark it as done.
linearization[len - 1 - j] = elem;
done.Set(elem);
}
// TODO: update call sites to use Linearize directly.
auto [new_lin, _opt, _steps] = Linearize(depgraph, /*max_iterations=*/0, /*rng_seed=*/0, linearization, /*is_topological=*/false);
Assume(new_lin.size() == linearization.size());
std::copy(new_lin.begin(), new_lin.end(), linearization.begin());
}
} // namespace cluster_linearize

View File

@@ -68,7 +68,8 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
for (int iter = 0; iter < 200; ++iter) {
bool opt;
uint64_t cost{0};
switch (rng.randrange(3)) {
bool is_topological{true};
switch (rng.randrange(4)) {
case 0:
// Use empty input linearization.
lin.clear();
@@ -77,12 +78,17 @@ void TestOptimalLinearization(const std::vector<uint8_t>& enc, const std::vector
// Reuse previous optimal linearization as input.
break;
case 2:
// Construct random input linearization.
// Construct random valid input linearization.
std::shuffle(lin.begin(), lin.end(), rng);
FixLinearization(depgraph, lin);
std::sort(lin.begin(), lin.end(), [&](auto a, auto b) { return depgraph.Ancestors(a).Count() < depgraph.Ancestors(b).Count(); });
break;
case 3:
// Construct random potentially invalid input linearization.
std::shuffle(lin.begin(), lin.end(), rng);
is_topological = false;
break;
}
std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin);
std::tie(lin, opt, cost) = Linearize(depgraph, 1000000000000, rng.rand64(), lin, is_topological);
BOOST_CHECK(opt);
BOOST_CHECK(cost <= MaxOptimalLinearizationIters(depgraph.TxCount()));
SanityCheck(depgraph, lin);

View File

@@ -998,35 +998,40 @@ FUZZ_TARGET(clusterlin_linearize)
DepGraph<TestBitSet> depgraph;
uint64_t rng_seed{0};
uint64_t iter_count{0};
uint8_t make_connected{1};
uint8_t flags{7};
try {
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> make_connected;
reader >> VARINT(iter_count) >> Using<DepGraphFormatter>(depgraph) >> rng_seed >> flags;
} catch (const std::ios_base::failure&) {}
bool make_connected = flags & 1;
// The following 3 booleans have 4 combinations:
// - (flags & 6) == 0: do not provide input linearization.
// - (flags & 6) == 2: provide potentially non-topological input.
// - (flags & 6) == 4: provide topological input linearization, but do not claim it is
// topological.
// - (flags & 6) == 6: provide topological input linearization, and claim it is topological.
bool provide_input = flags & 6;
bool provide_topological_input = flags & 4;
bool claim_topological_input = (flags & 6) == 6;
// The most complicated graphs are connected ones (other ones just split up). Optionally force
// the graph to be connected.
if (make_connected) MakeConnected(depgraph);
// Optionally construct an old linearization for it.
std::vector<DepGraphIndex> old_linearization;
{
uint8_t have_old_linearization{0};
try {
reader >> have_old_linearization;
} catch(const std::ios_base::failure&) {}
if (have_old_linearization & 1) {
old_linearization = ReadLinearization(depgraph, reader);
SanityCheck(depgraph, old_linearization);
}
if (provide_input) {
old_linearization = ReadLinearization(depgraph, reader, /*topological=*/provide_topological_input);
if (provide_topological_input) SanityCheck(depgraph, old_linearization);
}
// Invoke Linearize().
iter_count &= 0x7ffff;
auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization);
auto [linearization, optimal, cost] = Linearize(depgraph, iter_count, rng_seed, old_linearization, /*is_topological=*/claim_topological_input);
SanityCheck(depgraph, linearization);
auto chunking = ChunkLinearization(depgraph, linearization);
// Linearization must always be as good as the old one, if provided.
if (!old_linearization.empty()) {
// Linearization must always be as good as the old one, if provided and topological (even when
// not claimed to be topological).
if (provide_topological_input) {
auto old_chunking = ChunkLinearization(depgraph, old_linearization);
auto cmp = CompareChunks(chunking, old_chunking);
assert(cmp >= 0);
@@ -1231,12 +1236,11 @@ FUZZ_TARGET(clusterlin_fix_linearization)
// Sanity check it (which includes testing whether it is topological).
SanityCheck(depgraph, linearization_fixed);
// FixLinearization does not modify the topological prefix of linearization.
assert(std::equal(linearization.begin(), linearization.begin() + topo_prefix,
linearization_fixed.begin()));
// This also means that if linearization was entirely topological, FixLinearization cannot have
// modified it. This is implied by the assertion above already, but repeat it explicitly.
// If linearization was entirely topological, FixLinearization cannot worsen it.
if (topo_prefix == linearization.size()) {
assert(linearization == linearization_fixed);
auto chunking = ChunkLinearization(depgraph, linearization);
auto chunking_fixed = ChunkLinearization(depgraph, linearization_fixed);
auto cmp = CompareChunks(chunking_fixed, chunking);
assert(cmp >= 0);
}
}