mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-02-09 08:43:04 +01:00
Merge bitcoin/bitcoin#33878: refactor, docs: Embedded ASMap [2/3]: Refactor asmap internals and add documentation
4fec726c4drefactor: Simplify Interpret asmap function (Fabian Jahr)79e97d45c1doc: Add more extensive docs to asmap implementation (Fabian Jahr)cf4943fdcdrefactor: Use span instead of vector for data in util/asmap (Fabian Jahr)385c34a052refactor: Unify asmap version calculation and naming (Fabian Jahr)fa41fc6a1arefactor: Operate on bytes instead of bits in Asmap code (Fabian Jahr) Pull request description: This is a second slice carved out of #28792. It contains the following changes that are crucial for the embedding of asmap data which is added the following PR in the series (probably this will remain in #28792). The changes are: - Modernizes and simplifies the asmap code by operating on `std::byte` instead of bits - Unifies asmap version calculation and naming (previously it was called version and checksum interchangeably) - Operate on a `span` rather than a vector in the asmap internal to prevent holding the asmap data in memory twice - Add more extensive documentation to the asmap implementation - Unify asmap casing in implemetation function names The first three commits were already part of #28792, the others are new. The documentation commit came out of feedback gathered at the latest CoreDev. The primary input for the documentation was the documentation that already existed in the Python implementation (`contrib/asmap/asmap.py`) but there are several other comments as well. Please note: I have also asked several LLMs to provide suggestions on how to explain pieces of the implementation and better demonstrate how the parts work together. I have copied bits and pieces that I liked but everything has been edited further by me and obviously all mistakes here are my own. ACKs for top commit: hodlinator: re-ACK4fec726c4dsipa: ACK4fec726c4dsedited: Re-ACK4fec726c4dTree-SHA512: 950a591c3fcc9ddb28fcfdc3164ad3fbd325fa5004533c4a8b670fbf8b956060a0daeedd1fc2fced1f761ac49cd992b79cabe12ef46bc60b2559a7a613d0e166
This commit is contained in:
@@ -157,7 +157,7 @@ class _Instruction(Enum):
|
||||
JUMP = 1
|
||||
# A match instruction, encoded as [1,1,0] inspects 1 or more of the next unused bits
|
||||
# in the input with its argument. If they all match, execution continues. If they do
|
||||
# not, failure is returned. If a default instruction has been executed before, instead
|
||||
# not, failure (represented by 0) is returned. If a default instruction has been executed before, instead
|
||||
# of failure the default instruction's argument is returned. It is followed by an
|
||||
# integer in match encoding, and a subprogram. That value is at least 2 bits and at
|
||||
# most 9 bits. An n-bit value signifies matching (n-1) bits in the input with the lower
|
||||
|
||||
@@ -156,7 +156,7 @@ void AddrManImpl::Serialize(Stream& s_) const
|
||||
* * for each new bucket:
|
||||
* * number of elements
|
||||
* * for each element: index in the serialized "all new addresses"
|
||||
* * asmap checksum
|
||||
* * asmap version
|
||||
*
|
||||
* 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it
|
||||
* as incompatible. This is necessary because it did not check the version number on
|
||||
@@ -222,9 +222,9 @@ void AddrManImpl::Serialize(Stream& s_) const
|
||||
}
|
||||
}
|
||||
}
|
||||
// Store asmap checksum after bucket entries so that it
|
||||
// Store asmap version after bucket entries so that it
|
||||
// can be ignored by older clients for backward compatibility.
|
||||
s << m_netgroupman.GetAsmapChecksum();
|
||||
s << m_netgroupman.GetAsmapVersion();
|
||||
}
|
||||
|
||||
template <typename Stream>
|
||||
@@ -330,16 +330,16 @@ void AddrManImpl::Unserialize(Stream& s_)
|
||||
}
|
||||
}
|
||||
|
||||
// If the bucket count and asmap checksum haven't changed, then attempt
|
||||
// If the bucket count and asmap version haven't changed, then attempt
|
||||
// to restore the entries to the buckets/positions they were in before
|
||||
// serialization.
|
||||
uint256 supplied_asmap_checksum{m_netgroupman.GetAsmapChecksum()};
|
||||
uint256 serialized_asmap_checksum;
|
||||
uint256 supplied_asmap_version{m_netgroupman.GetAsmapVersion()};
|
||||
uint256 serialized_asmap_version;
|
||||
if (format >= Format::V2_ASMAP) {
|
||||
s >> serialized_asmap_checksum;
|
||||
s >> serialized_asmap_version;
|
||||
}
|
||||
const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT &&
|
||||
serialized_asmap_checksum == supplied_asmap_checksum};
|
||||
serialized_asmap_version == supplied_asmap_version};
|
||||
|
||||
if (!restore_bucketing) {
|
||||
LogDebug(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n");
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
static constexpr size_t NUM_SOURCES = 64;
|
||||
static constexpr size_t NUM_ADDRESSES_PER_SOURCE = 256;
|
||||
|
||||
static NetGroupManager EMPTY_NETGROUPMAN{std::vector<bool>()};
|
||||
static auto EMPTY_NETGROUPMAN{NetGroupManager::NoAsmap()};
|
||||
static constexpr uint32_t ADDRMAN_CONSISTENCY_CHECK_RATIO{0};
|
||||
|
||||
static std::vector<CAddress> g_sources;
|
||||
|
||||
17
src/init.cpp
17
src/init.cpp
@@ -96,6 +96,7 @@
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <condition_variable>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
@@ -1559,9 +1560,9 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
|
||||
ApplyArgsManOptions(args, peerman_opts);
|
||||
|
||||
{
|
||||
|
||||
// Read asmap file if configured
|
||||
std::vector<bool> asmap;
|
||||
// Read asmap file if configured and initialize
|
||||
// Netgroupman with or without it
|
||||
assert(!node.netgroupman);
|
||||
if (args.IsArgSet("-asmap") && !args.IsArgNegated("-asmap")) {
|
||||
fs::path asmap_path = args.GetPathArg("-asmap");
|
||||
if (asmap_path.empty()) {
|
||||
@@ -1575,21 +1576,19 @@ bool AppInitMain(NodeContext& node, interfaces::BlockAndHeaderTipInfo* tip_info)
|
||||
InitError(strprintf(_("Could not find asmap file %s"), fs::quoted(fs::PathToString(asmap_path))));
|
||||
return false;
|
||||
}
|
||||
asmap = DecodeAsmap(asmap_path);
|
||||
std::vector<std::byte> asmap{DecodeAsmap(asmap_path)};
|
||||
if (asmap.size() == 0) {
|
||||
InitError(strprintf(_("Could not parse asmap file %s"), fs::quoted(fs::PathToString(asmap_path))));
|
||||
return false;
|
||||
}
|
||||
const uint256 asmap_version = (HashWriter{} << asmap).GetHash();
|
||||
const uint256 asmap_version = AsmapVersion(asmap);
|
||||
node.netgroupman = std::make_unique<NetGroupManager>(NetGroupManager::WithLoadedAsmap(std::move(asmap)));
|
||||
LogInfo("Using asmap version %s for IP bucketing", asmap_version.ToString());
|
||||
} else {
|
||||
node.netgroupman = std::make_unique<NetGroupManager>(NetGroupManager::NoAsmap());
|
||||
LogInfo("Using /16 prefix for IP bucketing");
|
||||
}
|
||||
|
||||
// Initialize netgroup manager
|
||||
assert(!node.netgroupman);
|
||||
node.netgroupman = std::make_unique<NetGroupManager>(std::move(asmap));
|
||||
|
||||
// Initialize addrman
|
||||
assert(!node.addrman);
|
||||
uiInterface.InitMessage(_("Loading P2P addresses…"));
|
||||
|
||||
@@ -6,13 +6,14 @@
|
||||
|
||||
#include <hash.h>
|
||||
#include <logging.h>
|
||||
#include <uint256.h>
|
||||
#include <util/asmap.h>
|
||||
|
||||
uint256 NetGroupManager::GetAsmapChecksum() const
|
||||
{
|
||||
if (!m_asmap.size()) return {};
|
||||
#include <cstddef>
|
||||
|
||||
return (HashWriter{} << m_asmap).GetHash();
|
||||
uint256 NetGroupManager::GetAsmapVersion() const
|
||||
{
|
||||
return AsmapVersion(m_asmap);
|
||||
}
|
||||
|
||||
std::vector<unsigned char> NetGroupManager::GetGroup(const CNetAddr& address) const
|
||||
@@ -81,33 +82,27 @@ std::vector<unsigned char> NetGroupManager::GetGroup(const CNetAddr& address) co
|
||||
uint32_t NetGroupManager::GetMappedAS(const CNetAddr& address) const
|
||||
{
|
||||
uint32_t net_class = address.GetNetClass();
|
||||
if (m_asmap.size() == 0 || (net_class != NET_IPV4 && net_class != NET_IPV6)) {
|
||||
if (m_asmap.empty() || (net_class != NET_IPV4 && net_class != NET_IPV6)) {
|
||||
return 0; // Indicates not found, safe because AS0 is reserved per RFC7607.
|
||||
}
|
||||
std::vector<bool> ip_bits(128);
|
||||
std::vector<std::byte> ip_bytes(16);
|
||||
if (address.HasLinkedIPv4()) {
|
||||
// For lookup, treat as if it was just an IPv4 address (IPV4_IN_IPV6_PREFIX + IPv4 bits)
|
||||
for (int8_t byte_i = 0; byte_i < 12; ++byte_i) {
|
||||
for (uint8_t bit_i = 0; bit_i < 8; ++bit_i) {
|
||||
ip_bits[byte_i * 8 + bit_i] = (IPV4_IN_IPV6_PREFIX[byte_i] >> (7 - bit_i)) & 1;
|
||||
}
|
||||
}
|
||||
std::copy_n(std::as_bytes(std::span{IPV4_IN_IPV6_PREFIX}).begin(),
|
||||
IPV4_IN_IPV6_PREFIX.size(), ip_bytes.begin());
|
||||
uint32_t ipv4 = address.GetLinkedIPv4();
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
ip_bits[96 + i] = (ipv4 >> (31 - i)) & 1;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
ip_bytes[12 + i] = std::byte((ipv4 >> (24 - i * 8)) & 0xFF);
|
||||
}
|
||||
} else {
|
||||
// Use all 128 bits of the IPv6 address otherwise
|
||||
assert(address.IsIPv6());
|
||||
auto addr_bytes = address.GetAddrBytes();
|
||||
for (int8_t byte_i = 0; byte_i < 16; ++byte_i) {
|
||||
uint8_t cur_byte = addr_bytes[byte_i];
|
||||
for (uint8_t bit_i = 0; bit_i < 8; ++bit_i) {
|
||||
ip_bits[byte_i * 8 + bit_i] = (cur_byte >> (7 - bit_i)) & 1;
|
||||
}
|
||||
}
|
||||
assert(addr_bytes.size() == ip_bytes.size());
|
||||
std::copy_n(std::as_bytes(std::span{addr_bytes}).begin(),
|
||||
addr_bytes.size(), ip_bytes.begin());
|
||||
}
|
||||
uint32_t mapped_as = Interpret(m_asmap, ip_bits);
|
||||
uint32_t mapped_as = Interpret(m_asmap, ip_bytes);
|
||||
return mapped_as;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <netaddress.h>
|
||||
#include <uint256.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
@@ -15,12 +16,25 @@
|
||||
*/
|
||||
class NetGroupManager {
|
||||
public:
|
||||
explicit NetGroupManager(std::vector<bool> asmap)
|
||||
: m_asmap{std::move(asmap)}
|
||||
{}
|
||||
NetGroupManager(const NetGroupManager&) = delete;
|
||||
NetGroupManager(NetGroupManager&&) = default;
|
||||
NetGroupManager& operator=(const NetGroupManager&) = delete;
|
||||
NetGroupManager& operator=(NetGroupManager&&) = delete;
|
||||
|
||||
/** Get a checksum identifying the asmap being used. */
|
||||
uint256 GetAsmapChecksum() const;
|
||||
static NetGroupManager WithEmbeddedAsmap(std::span<const std::byte> asmap) {
|
||||
return NetGroupManager(asmap, {});
|
||||
}
|
||||
|
||||
static NetGroupManager WithLoadedAsmap(std::vector<std::byte>&& asmap) {
|
||||
return NetGroupManager(std::span{asmap}, std::move(asmap));
|
||||
}
|
||||
|
||||
static NetGroupManager NoAsmap() {
|
||||
return NetGroupManager({}, {});
|
||||
}
|
||||
|
||||
/** Get the asmap version, a checksum identifying the asmap being used. */
|
||||
uint256 GetAsmapVersion() const;
|
||||
|
||||
/**
|
||||
* Get the canonical identifier of the network group for address.
|
||||
@@ -52,7 +66,10 @@ public:
|
||||
bool UsingASMap() const;
|
||||
|
||||
private:
|
||||
/** Compressed IP->ASN mapping, loaded from a file when a node starts.
|
||||
/** Compressed IP->ASN mapping.
|
||||
*
|
||||
* Data may be loaded from a file when a node starts or embedded in the
|
||||
* binary.
|
||||
*
|
||||
* This mapping is then used for bucketing nodes in Addrman and for
|
||||
* ensuring we connect to a diverse set of peers in Connman. The map is
|
||||
@@ -69,8 +86,19 @@ private:
|
||||
* re-bucketed.
|
||||
*
|
||||
* This is initialized in the constructor, const, and therefore is
|
||||
* thread-safe. */
|
||||
const std::vector<bool> m_asmap;
|
||||
* thread-safe. m_asmap can either point to m_loaded_asmap which holds
|
||||
* data loaded from an external file at runtime or it can point to embedded
|
||||
* asmap data.
|
||||
*/
|
||||
const std::span<const std::byte> m_asmap;
|
||||
std::vector<std::byte> m_loaded_asmap;
|
||||
|
||||
explicit NetGroupManager(std::span<const std::byte> embedded_asmap, std::vector<std::byte>&& loaded_asmap)
|
||||
: m_asmap{embedded_asmap},
|
||||
m_loaded_asmap{std::move(loaded_asmap)}
|
||||
{
|
||||
assert(m_loaded_asmap.empty() || m_asmap.data() == m_loaded_asmap.data());
|
||||
}
|
||||
};
|
||||
|
||||
#endif // BITCOIN_NETGROUP_H
|
||||
|
||||
@@ -25,7 +25,7 @@ using namespace std::literals;
|
||||
using node::NodeContext;
|
||||
using util::ToString;
|
||||
|
||||
static NetGroupManager EMPTY_NETGROUPMAN{std::vector<bool>()};
|
||||
static auto EMPTY_NETGROUPMAN{NetGroupManager::NoAsmap()};
|
||||
static const bool DETERMINISTIC{true};
|
||||
|
||||
static int32_t GetCheckRatio(const NodeContext& node_ctx)
|
||||
@@ -47,20 +47,6 @@ static CService ResolveService(const std::string& ip, uint16_t port = 0)
|
||||
return serv.value_or(CService{});
|
||||
}
|
||||
|
||||
|
||||
static std::vector<bool> FromBytes(std::span<const std::byte> source)
|
||||
{
|
||||
int vector_size(source.size() * 8);
|
||||
std::vector<bool> result(vector_size);
|
||||
for (int byte_i = 0; byte_i < vector_size / 8; ++byte_i) {
|
||||
uint8_t cur_byte{std::to_integer<uint8_t>(source[byte_i])};
|
||||
for (int bit_i = 0; bit_i < 8; ++bit_i) {
|
||||
result[byte_i * 8 + bit_i] = (cur_byte >> bit_i) & 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
BOOST_FIXTURE_TEST_SUITE(addrman_tests, BasicTestingSetup)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(addrman_simple)
|
||||
@@ -638,8 +624,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket_legacy)
|
||||
// 101.8.0.0/16 AS8
|
||||
BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket)
|
||||
{
|
||||
std::vector<bool> asmap = FromBytes(test::data::asmap);
|
||||
NetGroupManager ngm_asmap{asmap};
|
||||
auto ngm_asmap{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)};
|
||||
|
||||
CAddress addr1 = CAddress(ResolveService("250.1.1.1", 8333), NODE_NONE);
|
||||
CAddress addr2 = CAddress(ResolveService("250.1.1.1", 9999), NODE_NONE);
|
||||
@@ -692,8 +677,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket)
|
||||
{
|
||||
std::vector<bool> asmap = FromBytes(test::data::asmap);
|
||||
NetGroupManager ngm_asmap{asmap};
|
||||
auto ngm_asmap{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)};
|
||||
|
||||
CAddress addr1 = CAddress(ResolveService("250.1.2.1", 8333), NODE_NONE);
|
||||
CAddress addr2 = CAddress(ResolveService("250.1.2.1", 9999), NODE_NONE);
|
||||
@@ -770,8 +754,7 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(addrman_serialization)
|
||||
{
|
||||
std::vector<bool> asmap1 = FromBytes(test::data::asmap);
|
||||
NetGroupManager netgroupman{asmap1};
|
||||
auto netgroupman{NetGroupManager::WithEmbeddedAsmap(test::data::asmap)};
|
||||
|
||||
const auto ratio = GetCheckRatio(m_node);
|
||||
auto addrman_asmap1 = std::make_unique<AddrMan>(netgroupman, DETERMINISTIC, ratio);
|
||||
|
||||
@@ -6,28 +6,18 @@
|
||||
#include <netgroup.h>
|
||||
#include <test/fuzz/fuzz.h>
|
||||
#include <util/asmap.h>
|
||||
#include <util/strencodings.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
using namespace util::hex_literals;
|
||||
|
||||
//! asmap code that consumes nothing
|
||||
static const std::vector<bool> IPV6_PREFIX_ASMAP = {};
|
||||
static const std::vector<std::byte> IPV6_PREFIX_ASMAP = {};
|
||||
|
||||
//! asmap code that consumes the 96 prefix bits of ::ffff:0/96 (IPv4-in-IPv6 map)
|
||||
static const std::vector<bool> IPV4_PREFIX_ASMAP = {
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, // Match 0x00
|
||||
true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, // Match 0xFF
|
||||
true, true, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true // Match 0xFF
|
||||
};
|
||||
static const auto IPV4_PREFIX_ASMAP = "fb03ec0fb03fc0fe00fb03ec0fb03fc0fe00fb03ec0fb0fffffeff"_hex_v;
|
||||
|
||||
FUZZ_TARGET(asmap)
|
||||
{
|
||||
@@ -37,14 +27,9 @@ FUZZ_TARGET(asmap)
|
||||
bool ipv6 = buffer[0] & 128;
|
||||
const size_t addr_size = ipv6 ? ADDR_IPV6_SIZE : ADDR_IPV4_SIZE;
|
||||
if (buffer.size() < size_t(1 + asmap_size + addr_size)) return;
|
||||
std::vector<bool> asmap = ipv6 ? IPV6_PREFIX_ASMAP : IPV4_PREFIX_ASMAP;
|
||||
asmap.reserve(asmap.size() + 8 * asmap_size);
|
||||
for (int i = 0; i < asmap_size; ++i) {
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
asmap.push_back((buffer[1 + i] >> j) & 1);
|
||||
}
|
||||
}
|
||||
if (!SanityCheckASMap(asmap, 128)) return;
|
||||
std::vector<std::byte> asmap = ipv6 ? IPV6_PREFIX_ASMAP : IPV4_PREFIX_ASMAP;
|
||||
std::ranges::copy(std::as_bytes(buffer.subspan(1, asmap_size)), std::back_inserter(asmap));
|
||||
if (!CheckStandardAsmap(asmap)) return;
|
||||
|
||||
const uint8_t* addr_data = buffer.data() + 1 + asmap_size;
|
||||
CNetAddr net_addr;
|
||||
@@ -57,6 +42,6 @@ FUZZ_TARGET(asmap)
|
||||
memcpy(&ipv4, addr_data, addr_size);
|
||||
net_addr.SetIP(CNetAddr{ipv4});
|
||||
}
|
||||
NetGroupManager netgroupman{asmap};
|
||||
auto netgroupman{NetGroupManager::WithEmbeddedAsmap(asmap)};
|
||||
(void)netgroupman.GetMappedAS(net_addr);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,24 @@
|
||||
|
||||
#include <cassert>
|
||||
|
||||
std::vector<std::byte> BitsToBytes(std::span<const uint8_t> bits) noexcept
|
||||
{
|
||||
std::vector<std::byte> ret;
|
||||
uint8_t next_byte{0};
|
||||
int next_byte_bits{0};
|
||||
for (uint8_t val : bits) {
|
||||
next_byte |= (val & 1) << (next_byte_bits++);
|
||||
if (next_byte_bits == 8) {
|
||||
ret.push_back(std::byte(next_byte));
|
||||
next_byte = 0;
|
||||
next_byte_bits = 0;
|
||||
}
|
||||
}
|
||||
if (next_byte_bits) ret.push_back(std::byte(next_byte));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
FUZZ_TARGET(asmap_direct)
|
||||
{
|
||||
// Encoding: [asmap using 1 bit / byte] 0xFF [addr using 1 bit / byte]
|
||||
@@ -28,22 +46,24 @@ FUZZ_TARGET(asmap_direct)
|
||||
}
|
||||
if (!sep_pos_opt) return; // Needs exactly 1 separator
|
||||
const size_t sep_pos{sep_pos_opt.value()};
|
||||
if (buffer.size() - sep_pos - 1 > 128) return; // At most 128 bits in IP address
|
||||
const size_t ip_len{buffer.size() - sep_pos - 1};
|
||||
if (ip_len > 128) return; // At most 128 bits in IP address
|
||||
|
||||
// Checks on asmap
|
||||
std::vector<bool> asmap(buffer.begin(), buffer.begin() + sep_pos);
|
||||
if (SanityCheckASMap(asmap, buffer.size() - 1 - sep_pos)) {
|
||||
auto asmap = BitsToBytes(buffer.first(sep_pos));
|
||||
if (SanityCheckAsmap(asmap, ip_len)) {
|
||||
// Verify that for valid asmaps, no prefix (except up to 7 zero padding bits) is valid.
|
||||
std::vector<bool> asmap_prefix = asmap;
|
||||
while (!asmap_prefix.empty() && asmap_prefix.size() + 7 > asmap.size() && asmap_prefix.back() == false) {
|
||||
asmap_prefix.pop_back();
|
||||
}
|
||||
while (!asmap_prefix.empty()) {
|
||||
asmap_prefix.pop_back();
|
||||
assert(!SanityCheckASMap(asmap_prefix, buffer.size() - 1 - sep_pos));
|
||||
for (size_t prefix_len = sep_pos - 1; prefix_len > 0; --prefix_len) {
|
||||
auto prefix = BitsToBytes(buffer.first(prefix_len));
|
||||
// We have to skip the prefixes of the same length as the original
|
||||
// asmap, since they will contain some zero padding bits in the last
|
||||
// byte.
|
||||
if (prefix.size() == asmap.size()) continue;
|
||||
assert(!SanityCheckAsmap(prefix, ip_len));
|
||||
}
|
||||
|
||||
// No address input should trigger assertions in interpreter
|
||||
std::vector<bool> addr(buffer.begin() + sep_pos + 1, buffer.end());
|
||||
auto addr = BitsToBytes(buffer.subspan(sep_pos + 1));
|
||||
(void)Interpret(asmap, addr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ FUZZ_TARGET(p2p_handshake, .init = ::initialize)
|
||||
chainman.ResetIbd();
|
||||
|
||||
node::Warnings warnings{};
|
||||
NetGroupManager netgroupman{{}};
|
||||
auto netgroupman{NetGroupManager::NoAsmap()};
|
||||
AddrMan addrman{netgroupman, /*deterministic=*/true, /*consistency_check_ratio=*/0};
|
||||
auto peerman = PeerManager::make(connman, addrman,
|
||||
/*banman=*/nullptr, chainman,
|
||||
|
||||
@@ -65,11 +65,6 @@ template<typename B = uint8_t>
|
||||
return ret;
|
||||
}
|
||||
|
||||
[[nodiscard]] inline std::vector<bool> ConsumeRandomLengthBitVector(FuzzedDataProvider& fuzzed_data_provider, const std::optional<size_t>& max_length = std::nullopt) noexcept
|
||||
{
|
||||
return BytesToBits(ConsumeRandomLengthByteVector(fuzzed_data_provider, max_length));
|
||||
}
|
||||
|
||||
[[nodiscard]] inline DataStream ConsumeDataStream(FuzzedDataProvider& fuzzed_data_provider, const std::optional<size_t>& max_length = std::nullopt) noexcept
|
||||
{
|
||||
return DataStream{ConsumeRandomLengthByteVector(fuzzed_data_provider, max_length)};
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <util/sock.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
@@ -234,9 +235,11 @@ public:
|
||||
|
||||
[[nodiscard]] inline NetGroupManager ConsumeNetGroupManager(FuzzedDataProvider& fuzzed_data_provider) noexcept
|
||||
{
|
||||
std::vector<bool> asmap = ConsumeRandomLengthBitVector(fuzzed_data_provider);
|
||||
if (!SanityCheckASMap(asmap, 128)) asmap.clear();
|
||||
return NetGroupManager(asmap);
|
||||
std::vector<std::byte> asmap{ConsumeRandomLengthByteVector<std::byte>(fuzzed_data_provider)};
|
||||
if (!CheckStandardAsmap(asmap)) {
|
||||
return NetGroupManager::NoAsmap();
|
||||
}
|
||||
return NetGroupManager::WithLoadedAsmap(std::move(asmap));
|
||||
}
|
||||
|
||||
inline CSubNet ConsumeSubNet(FuzzedDataProvider& fuzzed_data_provider) noexcept
|
||||
|
||||
@@ -326,7 +326,7 @@ BOOST_AUTO_TEST_CASE(subnet_test)
|
||||
|
||||
BOOST_AUTO_TEST_CASE(netbase_getgroup)
|
||||
{
|
||||
NetGroupManager netgroupman{std::vector<bool>()}; // use /16
|
||||
auto netgroupman{NetGroupManager::NoAsmap()}; // use /16
|
||||
BOOST_CHECK(netgroupman.GetGroup(ResolveIP("127.0.0.1")) == std::vector<unsigned char>({0})); // Local -> !Routable()
|
||||
BOOST_CHECK(netgroupman.GetGroup(ResolveIP("257.0.0.1")) == std::vector<unsigned char>({0})); // !Valid -> !Routable()
|
||||
BOOST_CHECK(netgroupman.GetGroup(ResolveIP("10.0.0.1")) == std::vector<unsigned char>({0})); // RFC1918 -> !Routable()
|
||||
@@ -631,17 +631,8 @@ BOOST_AUTO_TEST_CASE(asmap_test_vectors)
|
||||
"33e53662a7d72a29477b5beb35710591d3e23e5f0379baea62ffdee535bcdf879cbf69b88d7ea37c8015381cf"
|
||||
"63dc33d28f757a4a5e15d6a08"_hex};
|
||||
|
||||
// Convert to std::vector<bool> format that the ASMap interpreter uses.
|
||||
std::vector<bool> asmap_bits;
|
||||
asmap_bits.reserve(ASMAP_DATA.size() * 8);
|
||||
for (auto byte : ASMAP_DATA) {
|
||||
for (int bit = 0; bit < 8; ++bit) {
|
||||
asmap_bits.push_back((std::to_integer<uint8_t>(byte) >> bit) & 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Construct NetGroupManager with this data.
|
||||
NetGroupManager netgroup{std::move(asmap_bits)};
|
||||
auto netgroup{NetGroupManager::WithEmbeddedAsmap(ASMAP_DATA)};
|
||||
BOOST_CHECK(netgroup.UsingASMap());
|
||||
|
||||
// Check some randomly-generated IPv6 addresses in it (biased towards the very beginning and
|
||||
|
||||
@@ -353,7 +353,7 @@ TestingSetup::TestingSetup(
|
||||
|
||||
if (!opts.setup_net) return;
|
||||
|
||||
m_node.netgroupman = std::make_unique<NetGroupManager>(/*asmap=*/std::vector<bool>());
|
||||
m_node.netgroupman = std::make_unique<NetGroupManager>(NetGroupManager::NoAsmap());
|
||||
m_node.addrman = std::make_unique<AddrMan>(*m_node.netgroupman,
|
||||
/*deterministic=*/false,
|
||||
m_node.args->GetIntArg("-checkaddrman", 0));
|
||||
|
||||
@@ -5,43 +5,107 @@
|
||||
#include <util/asmap.h>
|
||||
|
||||
#include <clientversion.h>
|
||||
#include <hash.h>
|
||||
#include <logging.h>
|
||||
#include <serialize.h>
|
||||
#include <streams.h>
|
||||
#include <uint256.h>
|
||||
#include <util/fs.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
* ASMap (Autonomous System Map) Implementation
|
||||
*
|
||||
* Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
|
||||
* Uses a binary trie structure encoded as bytecode instructions that are interpreted
|
||||
* at runtime to find the ASN for a given IP address.
|
||||
*
|
||||
* The format of the asmap data is a bit-packed binary format where the entire mapping
|
||||
* is treated as a continuous sequence of bits. Instructions and their arguments are
|
||||
* encoded using variable numbers of bits and concatenated together without regard for
|
||||
* byte boundaries. The bits are stored in bytes using little-endian bit ordering.
|
||||
*
|
||||
* The data structure internally represents the mapping as a binary trie where:
|
||||
* - Unassigned subnets (no ASN mapping present) map to 0
|
||||
* - Subnets mapped entirely to one ASN become leaf nodes
|
||||
* - Subnets whose lower and upper halves have different mappings branch into subtrees
|
||||
*
|
||||
* The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
|
||||
* MATCH, DEFAULT) to efficiently represent the trie.
|
||||
*/
|
||||
|
||||
namespace {
|
||||
|
||||
// Indicates decoding errors or invalid data
|
||||
constexpr uint32_t INVALID = 0xFFFFFFFF;
|
||||
|
||||
uint32_t DecodeBits(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos, uint8_t minval, const std::vector<uint8_t> &bit_sizes)
|
||||
/**
|
||||
* Extract a single bit from byte array using little-endian bit ordering (LSB first).
|
||||
* Used for ASMap data.
|
||||
*/
|
||||
inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
|
||||
{
|
||||
uint32_t val = minval;
|
||||
const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
|
||||
++bitpos;
|
||||
return bit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a single bit from byte array using big-endian bit ordering (MSB first).
|
||||
* Used for IP addresses to match network byte order conventions.
|
||||
*/
|
||||
inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
|
||||
{
|
||||
const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
|
||||
++bitpos;
|
||||
return bit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Variable-length integer decoder using a custom encoding scheme.
|
||||
*
|
||||
* The encoding is easiest to describe using an example. Let's say minval=100 and
|
||||
* bit_sizes=[4,2,2,3]. In that case:
|
||||
* - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
|
||||
* - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
|
||||
* - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
|
||||
* - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
|
||||
*
|
||||
* In general, every number is encoded as:
|
||||
* - First, k "1"-bits, where k is the class the number falls in
|
||||
* - Then, a "0"-bit, unless k is the highest class
|
||||
* - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
|
||||
*/
|
||||
uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
|
||||
{
|
||||
uint32_t val = minval; // Start with minimum encodable value
|
||||
bool bit;
|
||||
for (std::vector<uint8_t>::const_iterator bit_sizes_it = bit_sizes.begin();
|
||||
bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
|
||||
if (bit_sizes_it + 1 != bit_sizes.end()) {
|
||||
if (bitpos == endpos) break;
|
||||
bit = *bitpos;
|
||||
bitpos++;
|
||||
for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
|
||||
// Read continuation bit to determine if we're in this class
|
||||
if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class
|
||||
if (bitpos >= data.size() * 8) break;
|
||||
bit = ConsumeBitLE(bitpos, data);
|
||||
} else {
|
||||
bit = 0;
|
||||
bit = 0; // Last class has no continuation bit
|
||||
}
|
||||
if (bit) {
|
||||
val += (1 << *bit_sizes_it);
|
||||
// If the value will not fit in this class, subtract its range from val,
|
||||
// emit a "1" bit and continue with the next class
|
||||
val += (1 << *bit_sizes_it); // Add size of this class
|
||||
} else {
|
||||
// Decode the position within this class in big endian
|
||||
for (int b = 0; b < *bit_sizes_it; b++) {
|
||||
if (bitpos == endpos) return INVALID; // Reached EOF in mantissa
|
||||
bit = *bitpos;
|
||||
bitpos++;
|
||||
val += bit << (*bit_sizes_it - 1 - b);
|
||||
if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
|
||||
bit = ConsumeBitLE(bitpos, data);
|
||||
val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
|
||||
}
|
||||
return val;
|
||||
}
|
||||
@@ -49,142 +113,187 @@ uint32_t DecodeBits(std::vector<bool>::const_iterator& bitpos, const std::vector
|
||||
return INVALID; // Reached EOF in exponent
|
||||
}
|
||||
|
||||
/**
|
||||
* Instruction Set
|
||||
*
|
||||
* The instruction set is designed to efficiently encode a binary trie
|
||||
* that maps IP prefixes to ASNs. Each instruction type serves a specific
|
||||
* role in trie traversal and evaluation.
|
||||
*/
|
||||
enum class Instruction : uint32_t
|
||||
{
|
||||
// A return instruction, encoded as [0], returns a constant ASN.
|
||||
// It is followed by an integer using the ASN encoding.
|
||||
RETURN = 0,
|
||||
// A jump instruction, encoded as [1,0], inspects the next unused bit in the input
|
||||
// and either continues execution (if 0), or skips a specified number of bits (if 1).
|
||||
// It is followed by an integer using jump encoding.
|
||||
JUMP = 1,
|
||||
// A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
|
||||
// in the input. If they all match, execution continues. If not, the default ASN is returned
|
||||
// (or 0 if unset). The match value encodes both the pattern and its length.
|
||||
MATCH = 2,
|
||||
// A default instruction, encoded as [1,1,1], sets the default variable to its argument,
|
||||
// and continues execution. It is followed by an integer in ASN encoding.
|
||||
DEFAULT = 3,
|
||||
};
|
||||
|
||||
const std::vector<uint8_t> TYPE_BIT_SIZES{0, 0, 1};
|
||||
Instruction DecodeType(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
// Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
|
||||
constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
|
||||
Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
|
||||
{
|
||||
return Instruction(DecodeBits(bitpos, endpos, 0, TYPE_BIT_SIZES));
|
||||
return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
|
||||
}
|
||||
|
||||
const std::vector<uint8_t> ASN_BIT_SIZES{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
|
||||
uint32_t DecodeASN(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
// ASN encoding: Can encode ASNs from 1 to ~16.7 million.
|
||||
// Uses variable-length encoding optimized for real-world ASN distribution.
|
||||
// ASN 0 is reserved and used if there isn't a match.
|
||||
constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
|
||||
uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 1, ASN_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
|
||||
}
|
||||
|
||||
|
||||
const std::vector<uint8_t> MATCH_BIT_SIZES{1, 2, 3, 4, 5, 6, 7, 8};
|
||||
uint32_t DecodeMatch(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
// MATCH argument: Values in [2, 511]. The highest set bit determines the match length
|
||||
// n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
|
||||
constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
|
||||
uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 2, MATCH_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
|
||||
}
|
||||
|
||||
|
||||
const std::vector<uint8_t> JUMP_BIT_SIZES{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
|
||||
uint32_t DecodeJump(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
// JUMP offset: Minimum value 17. Variable-length coded and may be large
|
||||
// for skipping big subtrees.
|
||||
constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
|
||||
uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 17, JUMP_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
|
||||
}
|
||||
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
uint32_t Interpret(const std::vector<bool> &asmap, const std::vector<bool> &ip)
|
||||
/**
|
||||
* Execute the ASMap bytecode to find the ASN for an IP
|
||||
*
|
||||
* This function interprets the asmap bytecode and uses bits from the IP
|
||||
* address to navigate through the encoded trie structure, ultimately
|
||||
* returning an ASN value.
|
||||
*/
|
||||
uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
|
||||
{
|
||||
std::vector<bool>::const_iterator pos = asmap.begin();
|
||||
const std::vector<bool>::const_iterator endpos = asmap.end();
|
||||
uint8_t bits = ip.size();
|
||||
size_t pos{0};
|
||||
const size_t endpos{asmap.size() * 8};
|
||||
uint8_t ip_bit{0};
|
||||
const uint8_t ip_bits_end = ip.size() * 8;
|
||||
uint32_t default_asn = 0;
|
||||
uint32_t jump, match, matchlen;
|
||||
Instruction opcode;
|
||||
while (pos != endpos) {
|
||||
opcode = DecodeType(pos, endpos);
|
||||
while (pos < endpos) {
|
||||
Instruction opcode = DecodeType(pos, asmap);
|
||||
if (opcode == Instruction::RETURN) {
|
||||
default_asn = DecodeASN(pos, endpos);
|
||||
if (default_asn == INVALID) break; // ASN straddles EOF
|
||||
return default_asn;
|
||||
// Found leaf node - return the ASN
|
||||
uint32_t asn = DecodeASN(pos, asmap);
|
||||
if (asn == INVALID) break; // ASN straddles EOF
|
||||
return asn;
|
||||
} else if (opcode == Instruction::JUMP) {
|
||||
jump = DecodeJump(pos, endpos);
|
||||
// Binary branch: if IP bit is 1, jump forward; else continue
|
||||
uint32_t jump = DecodeJump(pos, asmap);
|
||||
if (jump == INVALID) break; // Jump offset straddles EOF
|
||||
if (bits == 0) break; // No input bits left
|
||||
if (int64_t{jump} >= int64_t{endpos - pos}) break; // Jumping past EOF
|
||||
if (ip[ip.size() - bits]) {
|
||||
pos += jump;
|
||||
if (ip_bit == ip_bits_end) break; // No input bits left
|
||||
if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
|
||||
if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian)
|
||||
pos += jump; // Bit = 1: skip to right subtree
|
||||
}
|
||||
bits--;
|
||||
// Bit = 0: fall through to left subtree
|
||||
} else if (opcode == Instruction::MATCH) {
|
||||
match = DecodeMatch(pos, endpos);
|
||||
// Compare multiple IP bits against a pattern
|
||||
// The match value encodes both length and pattern:
|
||||
// - highest set bit position determines length (bit_width - 1)
|
||||
// - lower bits contain the pattern to compare
|
||||
uint32_t match = DecodeMatch(pos, asmap);
|
||||
if (match == INVALID) break; // Match bits straddle EOF
|
||||
matchlen = std::bit_width(match) - 1;
|
||||
if (bits < matchlen) break; // Not enough input bits
|
||||
for (uint32_t bit = 0; bit < matchlen; bit++) {
|
||||
if ((ip[ip.size() - bits]) != ((match >> (matchlen - 1 - bit)) & 1)) {
|
||||
return default_asn;
|
||||
int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits
|
||||
if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
|
||||
for (int bit = 0; bit < matchlen; bit++) {
|
||||
if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
|
||||
return default_asn; // Pattern mismatch - use default
|
||||
}
|
||||
bits--;
|
||||
}
|
||||
// Pattern matched - continue execution
|
||||
} else if (opcode == Instruction::DEFAULT) {
|
||||
default_asn = DecodeASN(pos, endpos);
|
||||
// Update the default ASN for subsequent MATCH failures
|
||||
default_asn = DecodeASN(pos, asmap);
|
||||
if (default_asn == INVALID) break; // ASN straddles EOF
|
||||
} else {
|
||||
break; // Instruction straddles EOF
|
||||
}
|
||||
}
|
||||
assert(false); // Reached EOF without RETURN, or aborted (see any of the breaks above) - should have been caught by SanityCheckASMap below
|
||||
// Reached EOF without RETURN, or aborted (see any of the breaks above)
|
||||
// - should have been caught by SanityCheckAsmap below
|
||||
assert(false);
|
||||
return 0; // 0 is not a valid ASN
|
||||
}
|
||||
|
||||
bool SanityCheckASMap(const std::vector<bool>& asmap, int bits)
|
||||
/**
|
||||
* Validates ASMap structure by simulating all possible execution paths.
|
||||
* Ensures well-formed bytecode, valid jumps, and proper termination.
|
||||
*/
|
||||
bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
|
||||
{
|
||||
const std::vector<bool>::const_iterator begin = asmap.begin(), endpos = asmap.end();
|
||||
std::vector<bool>::const_iterator pos = begin;
|
||||
size_t pos{0};
|
||||
const size_t endpos{asmap.size() * 8};
|
||||
std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
|
||||
jumps.reserve(bits);
|
||||
Instruction prevopcode = Instruction::JUMP;
|
||||
bool had_incomplete_match = false;
|
||||
bool had_incomplete_match = false; // Track <8 bit matches for efficiency check
|
||||
|
||||
while (pos != endpos) {
|
||||
uint32_t offset = pos - begin;
|
||||
if (!jumps.empty() && offset >= jumps.back().first) return false; // There was a jump into the middle of the previous instruction
|
||||
Instruction opcode = DecodeType(pos, endpos);
|
||||
// There was a jump into the middle of the previous instruction
|
||||
if (!jumps.empty() && pos >= jumps.back().first) return false;
|
||||
|
||||
Instruction opcode = DecodeType(pos, asmap);
|
||||
if (opcode == Instruction::RETURN) {
|
||||
if (prevopcode == Instruction::DEFAULT) return false; // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
|
||||
uint32_t asn = DecodeASN(pos, endpos);
|
||||
// There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
|
||||
if (prevopcode == Instruction::DEFAULT) return false;
|
||||
uint32_t asn = DecodeASN(pos, asmap);
|
||||
if (asn == INVALID) return false; // ASN straddles EOF
|
||||
if (jumps.empty()) {
|
||||
// Nothing to execute anymore
|
||||
if (endpos - pos > 7) return false; // Excessive padding
|
||||
while (pos != endpos) {
|
||||
if (*pos) return false; // Nonzero padding bit
|
||||
++pos;
|
||||
if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
|
||||
}
|
||||
return true; // Sanely reached EOF
|
||||
} else {
|
||||
// Continue by pretending we jumped to the next instruction
|
||||
offset = pos - begin;
|
||||
if (offset != jumps.back().first) return false; // Unreachable code
|
||||
if (pos != jumps.back().first) return false; // Unreachable code
|
||||
bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
|
||||
jumps.pop_back();
|
||||
prevopcode = Instruction::JUMP;
|
||||
}
|
||||
} else if (opcode == Instruction::JUMP) {
|
||||
uint32_t jump = DecodeJump(pos, endpos);
|
||||
uint32_t jump = DecodeJump(pos, asmap);
|
||||
if (jump == INVALID) return false; // Jump offset straddles EOF
|
||||
if (int64_t{jump} > int64_t{endpos - pos}) return false; // Jump out of range
|
||||
if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
|
||||
if (bits == 0) return false; // Consuming bits past the end of the input
|
||||
--bits;
|
||||
uint32_t jump_offset = pos - begin + jump;
|
||||
uint32_t jump_offset = pos + jump;
|
||||
if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
|
||||
jumps.emplace_back(jump_offset, bits);
|
||||
jumps.emplace_back(jump_offset, bits); // Queue jump target for validation
|
||||
prevopcode = Instruction::JUMP;
|
||||
} else if (opcode == Instruction::MATCH) {
|
||||
uint32_t match = DecodeMatch(pos, endpos);
|
||||
uint32_t match = DecodeMatch(pos, asmap);
|
||||
if (match == INVALID) return false; // Match bits straddle EOF
|
||||
int matchlen = std::bit_width(match) - 1;
|
||||
if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
|
||||
if (matchlen < 8 && had_incomplete_match) return false; // Within a sequence of matches only at most one should be incomplete
|
||||
// Within a sequence of matches only at most one should be incomplete
|
||||
if (matchlen < 8 && had_incomplete_match) return false;
|
||||
had_incomplete_match = (matchlen < 8);
|
||||
if (bits < matchlen) return false; // Consuming bits past the end of the input
|
||||
bits -= matchlen;
|
||||
prevopcode = Instruction::MATCH;
|
||||
} else if (opcode == Instruction::DEFAULT) {
|
||||
if (prevopcode == Instruction::DEFAULT) return false; // There should not be two successive DEFAULTs (they could be combined into one)
|
||||
uint32_t asn = DecodeASN(pos, endpos);
|
||||
// There should not be two successive DEFAULTs (they could be combined into one)
|
||||
if (prevopcode == Instruction::DEFAULT) return false;
|
||||
uint32_t asn = DecodeASN(pos, asmap);
|
||||
if (asn == INVALID) return false; // ASN straddles EOF
|
||||
prevopcode = Instruction::DEFAULT;
|
||||
} else {
|
||||
@@ -194,27 +303,53 @@ bool SanityCheckASMap(const std::vector<bool>& asmap, int bits)
|
||||
return false; // Reached EOF without RETURN instruction
|
||||
}
|
||||
|
||||
std::vector<bool> DecodeAsmap(fs::path path)
|
||||
/**
|
||||
* Provides a safe interface for validating ASMap data before use.
|
||||
* Returns true if the data is valid for 128 bits long inputs.
|
||||
*/
|
||||
bool CheckStandardAsmap(const std::span<const std::byte> data)
|
||||
{
|
||||
if (!SanityCheckAsmap(data, 128)) {
|
||||
LogWarning("Sanity check of asmap data failed\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads an ASMap file from disk and validates it.
|
||||
*/
|
||||
std::vector<std::byte> DecodeAsmap(fs::path path)
|
||||
{
|
||||
std::vector<bool> bits;
|
||||
FILE *filestr = fsbridge::fopen(path, "rb");
|
||||
AutoFile file{filestr};
|
||||
if (file.IsNull()) {
|
||||
LogWarning("Failed to open asmap file from disk");
|
||||
return bits;
|
||||
return {};
|
||||
}
|
||||
int64_t length{file.size()};
|
||||
LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
|
||||
uint8_t cur_byte;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
file >> cur_byte;
|
||||
for (int bit = 0; bit < 8; ++bit) {
|
||||
bits.push_back((cur_byte >> bit) & 1);
|
||||
}
|
||||
}
|
||||
if (!SanityCheckASMap(bits, 128)) {
|
||||
|
||||
// Read entire file into memory
|
||||
std::vector<std::byte> buffer(length);
|
||||
file.read(buffer);
|
||||
|
||||
if (!CheckStandardAsmap(buffer)) {
|
||||
LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
|
||||
return {};
|
||||
}
|
||||
return bits;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes SHA256 hash of ASMap data for versioning and consistency checks.
|
||||
*/
|
||||
uint256 AsmapVersion(const std::span<const std::byte> data)
|
||||
{
|
||||
if (data.empty()) return {};
|
||||
|
||||
HashWriter asmap_hasher;
|
||||
asmap_hasher << data;
|
||||
return asmap_hasher.GetHash();
|
||||
}
|
||||
|
||||
@@ -5,16 +5,23 @@
|
||||
#ifndef BITCOIN_UTIL_ASMAP_H
|
||||
#define BITCOIN_UTIL_ASMAP_H
|
||||
|
||||
#include <uint256.h>
|
||||
#include <util/fs.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
uint32_t Interpret(const std::vector<bool> &asmap, const std::vector<bool> &ip);
|
||||
uint32_t Interpret(std::span<const std::byte> asmap, std::span<const std::byte> ip);
|
||||
|
||||
bool SanityCheckASMap(const std::vector<bool>& asmap, int bits);
|
||||
bool SanityCheckAsmap(std::span<const std::byte> asmap, int bits);
|
||||
/** Check standard asmap data (128 bits for IPv6) */
|
||||
bool CheckStandardAsmap(std::span<const std::byte> data);
|
||||
|
||||
/** Read asmap from provided binary file */
|
||||
std::vector<bool> DecodeAsmap(fs::path path);
|
||||
/** Read and check asmap from provided binary file */
|
||||
std::vector<std::byte> DecodeAsmap(fs::path path);
|
||||
/** Calculate the asmap version, a checksum identifying the asmap being used. */
|
||||
uint256 AsmapVersion(std::span<const std::byte> data);
|
||||
|
||||
#endif // BITCOIN_UTIL_ASMAP_H
|
||||
|
||||
@@ -18,7 +18,7 @@ from test_framework.test_framework import BitcoinTestFramework
|
||||
from test_framework.util import assert_equal
|
||||
|
||||
ASMAP = 'src/test/data/asmap.raw' # path to unit test skeleton asmap
|
||||
VERSION = 'fec61fa21a9f46f3b17bdcd660d7f4cd90b966aad3aec593c99b35f0aca15853'
|
||||
VERSION = 'bafc9da308f45179443bd1d22325400ac9104f741522d003e3fac86700f68895'
|
||||
|
||||
def expected_messages(filename):
|
||||
return [f'Opened asmap file "{filename}" (59 bytes) from disk',
|
||||
|
||||
Reference in New Issue
Block a user