Merge bitcoin/bitcoin#21726: Improve Indices on pruned nodes via prune blockers

71c3f0356c move-only: Rename index + pruning functional test (Fabian Jahr)
de08932efa test: Update test for indices on pruned nodes (Fabian Jahr)
825d19839b Index: Allow coinstatsindex with pruning enabled (Fabian Jahr)
f08c9fb0c6 Index: Use prune locks for blockfilterindex (Fabian Jahr)
2561823531 blockstorage: Add prune locks to BlockManager (Fabian Jahr)
231fc7b035 refactor: Introduce GetFirstStoredBlock helper function (Fabian Jahr)

Pull request description:

  # Motivation
  The main motivation of this change and only behavior change noticeable by user is to allow running `coinstatsindex` on pruned nodes as has been requested [here for example](https://twitter.com/benthecarman/status/1388170854140452870?s=20).

  # Background
  `coinstatsindex` on pruned nodes can be enabled in a much simpler than it is done here but it comes with downside. The ability to run `blockfilterindex`on pruned nodes was added in #15946 but it also added the `blockfilterindex` as a dependency to `validation` and it introduced two new circular dependencies. Enabling `coinstatsindex` on pruned nodes in a similar way would add it as a dependency as well and introduce another circular dependency.

  Instead, this PR introduces a `m_prune_blockers` map to `BlockManager` as a flexible approach to block pruning. Entities like `blockfilterindex`, for example, can add a key and a height to block pruning over that height. These entities need to update that value to allow more pruning when they are ready.

  # Alternative approach
  Upon completing the first draft of this PR I found #19463 as an alternative that follows the same but follows a very different approach. I am listing the main differences here as I see them:
  - Usage of globals
  - Blocks pruning with a start and a stop height
  - Can persist blockers across restarts
  - Blockers can be set/unset via RPCs

  Personally, I don't think any of these are necessary to be added here but if the general approach or specific features are more appealing to reviewers I am happy to change to a solution based on that PR or port over specific parts of it here.

ACKs for top commit:
  mzumsande:
    Code review ACK 71c3f0356c
  ryanofsky:
    Code review ACK 71c3f0356c. Changes since last review: just tweaking comments and asserts, and rebasing
  w0xlt:
    tACK 71c3f0356c on signet.

Tree-SHA512: de7efda08b44aa31013fbebc47a02cd2de32db170b570f9643e1f013fee0e8e7ca3068952d1acc6e5e74a70910735c5f263437981ad73df841ad945b52d36b71
This commit is contained in:
fanquake
2022-04-26 19:19:56 +01:00
15 changed files with 265 additions and 116 deletions

View File

@ -1,78 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2020-2021 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Test blockfilterindex in conjunction with prune."""
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (
assert_equal,
assert_greater_than,
assert_raises_rpc_error,
)
class FeatureBlockfilterindexPruneTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 1
self.extra_args = [["-fastprune", "-prune=1", "-blockfilterindex=1"]]
def sync_index(self, height):
expected = {'basic block filter index': {'synced': True, 'best_block_height': height}}
self.wait_until(lambda: self.nodes[0].getindexinfo() == expected)
def run_test(self):
self.log.info("check if we can access a blockfilter when pruning is enabled but no blocks are actually pruned")
self.sync_index(height=200)
assert_greater_than(len(self.nodes[0].getblockfilter(self.nodes[0].getbestblockhash())['filter']), 0)
self.generate(self.nodes[0], 500)
self.sync_index(height=700)
self.log.info("prune some blocks")
pruneheight = self.nodes[0].pruneblockchain(400)
# the prune heights used here and below are magic numbers that are determined by the
# thresholds at which block files wrap, so they depend on disk serialization and default block file size.
assert_equal(pruneheight, 249)
self.log.info("check if we can access the tips blockfilter when we have pruned some blocks")
assert_greater_than(len(self.nodes[0].getblockfilter(self.nodes[0].getbestblockhash())['filter']), 0)
self.log.info("check if we can access the blockfilter of a pruned block")
assert_greater_than(len(self.nodes[0].getblockfilter(self.nodes[0].getblockhash(2))['filter']), 0)
# mine and sync index up to a height that will later be the pruneheight
self.generate(self.nodes[0], 51)
self.sync_index(height=751)
self.log.info("start node without blockfilterindex")
self.restart_node(0, extra_args=["-fastprune", "-prune=1"])
self.log.info("make sure accessing the blockfilters throws an error")
assert_raises_rpc_error(-1, "Index is not enabled for filtertype basic", self.nodes[0].getblockfilter, self.nodes[0].getblockhash(2))
self.generate(self.nodes[0], 749)
self.log.info("prune exactly up to the blockfilterindexes best block while blockfilters are disabled")
pruneheight_2 = self.nodes[0].pruneblockchain(1000)
assert_equal(pruneheight_2, 751)
self.restart_node(0, extra_args=["-fastprune", "-prune=1", "-blockfilterindex=1"])
self.log.info("make sure that we can continue with the partially synced index after having pruned up to the index height")
self.sync_index(height=1500)
self.log.info("prune below the blockfilterindexes best block while blockfilters are disabled")
self.restart_node(0, extra_args=["-fastprune", "-prune=1"])
self.generate(self.nodes[0], 1000)
pruneheight_3 = self.nodes[0].pruneblockchain(2000)
assert_greater_than(pruneheight_3, pruneheight_2)
self.stop_node(0)
self.log.info("make sure we get an init error when starting the node again with block filters")
self.nodes[0].assert_start_raises_init_error(
extra_args=["-fastprune", "-prune=1", "-blockfilterindex=1"],
expected_msg="Error: basic block filter index best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)",
)
self.log.info("make sure the node starts again with the -reindex arg")
self.start_node(0, extra_args=["-fastprune", "-prune=1", "-blockfilterindex", "-reindex"])
if __name__ == '__main__':
FeatureBlockfilterindexPruneTest().main()

View File

@ -0,0 +1,156 @@
#!/usr/bin/env python3
# Copyright (c) 2020-2021 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Test indices in conjunction with prune."""
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (
assert_equal,
assert_greater_than,
assert_raises_rpc_error,
p2p_port,
)
class FeatureIndexPruneTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 4
self.extra_args = [
["-fastprune", "-prune=1", "-blockfilterindex=1"],
["-fastprune", "-prune=1", "-coinstatsindex=1"],
["-fastprune", "-prune=1", "-blockfilterindex=1", "-coinstatsindex=1"],
[]
]
def sync_index(self, height):
expected_filter = {
'basic block filter index': {'synced': True, 'best_block_height': height},
}
self.wait_until(lambda: self.nodes[0].getindexinfo() == expected_filter)
expected_stats = {
'coinstatsindex': {'synced': True, 'best_block_height': height}
}
self.wait_until(lambda: self.nodes[1].getindexinfo() == expected_stats)
expected = {**expected_filter, **expected_stats}
self.wait_until(lambda: self.nodes[2].getindexinfo() == expected)
def reconnect_nodes(self):
self.connect_nodes(0,1)
self.connect_nodes(0,2)
self.connect_nodes(0,3)
def mine_batches(self, blocks):
n = blocks // 250
for _ in range(n):
self.generate(self.nodes[0], 250)
self.generate(self.nodes[0], blocks % 250)
self.sync_blocks()
def restart_without_indices(self):
for i in range(3):
self.restart_node(i, extra_args=["-fastprune", "-prune=1"])
self.reconnect_nodes()
def run_test(self):
filter_nodes = [self.nodes[0], self.nodes[2]]
stats_nodes = [self.nodes[1], self.nodes[2]]
self.log.info("check if we can access blockfilters and coinstats when pruning is enabled but no blocks are actually pruned")
self.sync_index(height=200)
tip = self.nodes[0].getbestblockhash()
for node in filter_nodes:
assert_greater_than(len(node.getblockfilter(tip)['filter']), 0)
for node in stats_nodes:
assert(node.gettxoutsetinfo(hash_type="muhash", hash_or_height=tip)['muhash'])
self.mine_batches(500)
self.sync_index(height=700)
self.log.info("prune some blocks")
for node in self.nodes[:2]:
with node.assert_debug_log(['limited pruning to height 689']):
pruneheight_new = node.pruneblockchain(400)
# the prune heights used here and below are magic numbers that are determined by the
# thresholds at which block files wrap, so they depend on disk serialization and default block file size.
assert_equal(pruneheight_new, 249)
self.log.info("check if we can access the tips blockfilter and coinstats when we have pruned some blocks")
tip = self.nodes[0].getbestblockhash()
for node in filter_nodes:
assert_greater_than(len(node.getblockfilter(tip)['filter']), 0)
for node in stats_nodes:
assert(node.gettxoutsetinfo(hash_type="muhash", hash_or_height=tip)['muhash'])
self.log.info("check if we can access the blockfilter and coinstats of a pruned block")
height_hash = self.nodes[0].getblockhash(2)
for node in filter_nodes:
assert_greater_than(len(node.getblockfilter(height_hash)['filter']), 0)
for node in stats_nodes:
assert(node.gettxoutsetinfo(hash_type="muhash", hash_or_height=height_hash)['muhash'])
# mine and sync index up to a height that will later be the pruneheight
self.generate(self.nodes[0], 51)
self.sync_index(height=751)
self.restart_without_indices()
self.log.info("make sure trying to access the indices throws errors")
for node in filter_nodes:
msg = "Index is not enabled for filtertype basic"
assert_raises_rpc_error(-1, msg, node.getblockfilter, height_hash)
for node in stats_nodes:
msg = "Querying specific block heights requires coinstatsindex"
assert_raises_rpc_error(-8, msg, node.gettxoutsetinfo, "muhash", height_hash)
self.mine_batches(749)
self.log.info("prune exactly up to the indices best blocks while the indices are disabled")
for i in range(3):
pruneheight_2 = self.nodes[i].pruneblockchain(1000)
assert_equal(pruneheight_2, 751)
# Restart the nodes again with the indices activated
self.restart_node(i, extra_args=self.extra_args[i])
self.log.info("make sure that we can continue with the partially synced indices after having pruned up to the index height")
self.sync_index(height=1500)
self.log.info("prune further than the indices best blocks while the indices are disabled")
self.restart_without_indices()
self.mine_batches(1000)
for i in range(3):
pruneheight_3 = self.nodes[i].pruneblockchain(2000)
assert_greater_than(pruneheight_3, pruneheight_2)
self.stop_node(i)
self.log.info("make sure we get an init error when starting the nodes again with the indices")
filter_msg = "Error: basic block filter index best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)"
stats_msg = "Error: coinstatsindex best block of the index goes beyond pruned data. Please disable the index or reindex (which will download the whole blockchain again)"
for i, msg in enumerate([filter_msg, stats_msg, filter_msg]):
self.nodes[i].assert_start_raises_init_error(extra_args=self.extra_args[i], expected_msg=msg)
self.log.info("make sure the nodes start again with the indices and an additional -reindex arg")
ip_port = "127.0.0.1:" + str(p2p_port(3))
for i in range(3):
# The nodes need to be reconnected to the non-pruning node upon restart, otherwise they will be stuck
restart_args = self.extra_args[i]+["-reindex", f"-connect={ip_port}"]
self.restart_node(i, extra_args=restart_args)
self.sync_blocks(timeout=300)
for node in self.nodes[:2]:
with node.assert_debug_log(['limited pruning to height 2489']):
pruneheight_new = node.pruneblockchain(2500)
assert_equal(pruneheight_new, 2006)
self.log.info("ensure that prune locks don't prevent indices from failing in a reorg scenario")
with self.nodes[0].assert_debug_log(['basic block filter index prune lock moved back to 2480']):
self.nodes[3].invalidateblock(self.nodes[0].getblockhash(2480))
self.generate(self.nodes[3], 30)
self.sync_blocks()
if __name__ == '__main__':
FeatureIndexPruneTest().main()

View File

@ -137,10 +137,6 @@ class PruneTest(BitcoinTestFramework):
expected_msg='Error: Prune mode is incompatible with -txindex.',
extra_args=['-prune=550', '-txindex'],
)
self.nodes[0].assert_start_raises_init_error(
expected_msg='Error: Prune mode is incompatible with -coinstatsindex.',
extra_args=['-prune=550', '-coinstatsindex'],
)
self.nodes[0].assert_start_raises_init_error(
expected_msg='Error: Prune mode is incompatible with -reindex-chainstate. Use full -reindex instead.',
extra_args=['-prune=550', '-reindex-chainstate'],

View File

@ -82,6 +82,7 @@ EXTENDED_SCRIPTS = [
# Longest test should go first, to favor running tests in parallel
'feature_pruning.py',
'feature_dbcrash.py',
'feature_index_prune.py',
]
BASE_SCRIPTS = [
@ -333,7 +334,6 @@ BASE_SCRIPTS = [
'feature_help.py',
'feature_shutdown.py',
'p2p_ibd_txrelay.py',
'feature_blockfilterindex_prune.py'
# Don't append tests at the end to avoid merge conflicts
# Put them in a random line within the section that fits their approximate run-time
]