Merge #16702: p2p: supplying and using asmap to improve IP bucketing in addrman

3c1bc40205 Add extra logging of asmap use and bucketing (Gleb Naumenko)
e4658aa8ea Return mapped AS in RPC call getpeerinfo (Gleb Naumenko)
ec45646de9 Integrate ASN bucketing in Addrman and add tests (Gleb Naumenko)
8feb4e4b66  Add asmap utility which queries a mapping (Gleb Naumenko)

Pull request description:

  This PR attempts to solve the problem explained in #16599.
  A particular attack which encouraged us to work on this issue is explained here  [[Erebus Attack against Bitcoin Peer-to-Peer Network](https://erebus-attack.comp.nus.edu.sg/)] (by @muoitranduc)

  Instead of relying on /16 prefix to diversify the connections every node creates, we would instead rely on the (ip -> ASN) mapping, if this mapping is provided.

  A .map file can be created by every user independently based on a router dump, or provided along with the Bitcoin release. Currently we use the python scripts written by @sipa to create a .map file, which is no larger than 2MB (awesome!).

  Here I suggest adding a field to peers.dat which would represent a hash of asmap file used while serializing addrman (or 0 for /16 prefix legacy approach).
  In this case, every time the file is updated (or grouping method changed), all buckets will be re-computed.
  I believe that alternative selective re-bucketing for only updated ranges would require substantial changes.

  TODO:
  - ~~more unit tests~~
  - ~~find a way to test the code without including >1 MB mapping file in the repo.~~
  - find a way to check that mapping file is not corrupted (checksum?)
  - comments and separate tests for asmap.cpp
  - make python code for .map generation public
  - figure out asmap distribution (?)

  ~Interesting corner case: I’m using std::hash to compute a fingerprint of asmap, and std::hash returns size_t. I guess  if a user updates the OS to 64-bit, then the hash of asap will change? Does it even matter?~

ACKs for top commit:
  laanwj:
    re-ACK 3c1bc40205
  jamesob:
    ACK 3c1bc40205 ([`jamesob/ackr/16702.3.naumenkogs.p2p_supplying_and_using`](https://github.com/jamesob/bitcoin/tree/ackr/16702.3.naumenkogs.p2p_supplying_and_using))
  jonatack:
    ACK 3c1bc40205

Tree-SHA512: e2dc6171188d5cdc2ab2c022fa49ed73a14a0acb8ae4c5ffa970172a0365942a249ad3d57e5fb134bc156a3492662c983f74bd21e78d316629dcadf71576800c
This commit is contained in:
Wladimir J. van der Laan
2020-01-29 13:53:46 +01:00
15 changed files with 641 additions and 103 deletions

View File

@@ -5,6 +5,8 @@
#include <test/util/setup_common.h>
#include <string>
#include <boost/test/unit_test.hpp>
#include <util/asmap.h>
#include <test/data/asmap.raw.h>
#include <hash.h>
#include <netbase.h>
@@ -12,13 +14,18 @@
class CAddrManTest : public CAddrMan
{
private:
bool deterministic;
public:
explicit CAddrManTest(bool makeDeterministic = true)
explicit CAddrManTest(bool makeDeterministic = true,
std::vector<bool> asmap = std::vector<bool>())
{
if (makeDeterministic) {
// Set addrman addr placement to be deterministic.
MakeDeterministic();
}
deterministic = makeDeterministic;
m_asmap = asmap;
}
//! Ensure that bucket placement is always the same for testing purposes.
@@ -46,6 +53,21 @@ public:
CAddrMan::Delete(nId);
}
// Used to test deserialization
std::pair<int, int> GetBucketAndEntry(const CAddress& addr)
{
LOCK(cs);
int nId = mapAddr[addr];
for (int bucket = 0; bucket < ADDRMAN_NEW_BUCKET_COUNT; ++bucket) {
for (int entry = 0; entry < ADDRMAN_BUCKET_SIZE; ++entry) {
if (nId == vvNew[bucket][entry]) {
return std::pair<int, int>(bucket, entry);
}
}
}
return std::pair<int, int>(-1, -1);
}
// Simulates connection failure so that we can test eviction of offline nodes
void SimConnFail(CService& addr)
{
@@ -57,6 +79,16 @@ public:
int64_t nLastTry = GetAdjustedTime()-61;
Attempt(addr, count_failure, nLastTry);
}
void Clear()
{
CAddrMan::Clear();
if (deterministic) {
nKey.SetNull();
insecure_rand = FastRandomContext(true);
}
}
};
static CNetAddr ResolveIP(const std::string& ip)
@@ -74,6 +106,18 @@ static CService ResolveService(const std::string& ip, const int port = 0)
}
static std::vector<bool> FromBytes(const unsigned char* source, int vector_size) {
std::vector<bool> result(vector_size);
for (int byte_i = 0; byte_i < vector_size / 8; ++byte_i) {
unsigned char cur_byte = source[byte_i];
for (int bit_i = 0; bit_i < 8; ++bit_i) {
result[byte_i * 8 + bit_i] = (cur_byte >> bit_i) & 1;
}
}
return result;
}
BOOST_FIXTURE_TEST_SUITE(addrman_tests, BasicTestingSetup)
BOOST_AUTO_TEST_CASE(addrman_simple)
@@ -400,6 +444,139 @@ BOOST_AUTO_TEST_CASE(addrman_getaddr)
}
BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket_legacy)
{
CAddrManTest addrman;
CAddress addr1 = CAddress(ResolveService("250.1.1.1", 8333), NODE_NONE);
CAddress addr2 = CAddress(ResolveService("250.1.1.1", 9999), NODE_NONE);
CNetAddr source1 = ResolveIP("250.1.1.1");
CAddrInfo info1 = CAddrInfo(addr1, source1);
uint256 nKey1 = (uint256)(CHashWriter(SER_GETHASH, 0) << 1).GetHash();
uint256 nKey2 = (uint256)(CHashWriter(SER_GETHASH, 0) << 2).GetHash();
std::vector<bool> asmap; // use /16
BOOST_CHECK_EQUAL(info1.GetTriedBucket(nKey1, asmap), 40);
// Test: Make sure key actually randomizes bucket placement. A fail on
// this test could be a security issue.
BOOST_CHECK(info1.GetTriedBucket(nKey1, asmap) != info1.GetTriedBucket(nKey2, asmap));
// Test: Two addresses with same IP but different ports can map to
// different buckets because they have different keys.
CAddrInfo info2 = CAddrInfo(addr2, source1);
BOOST_CHECK(info1.GetKey() != info2.GetKey());
BOOST_CHECK(info1.GetTriedBucket(nKey1, asmap) != info2.GetTriedBucket(nKey1, asmap));
std::set<int> buckets;
for (int i = 0; i < 255; i++) {
CAddrInfo infoi = CAddrInfo(
CAddress(ResolveService("250.1.1." + std::to_string(i)), NODE_NONE),
ResolveIP("250.1.1." + std::to_string(i)));
int bucket = infoi.GetTriedBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same /16 prefix should
// never get more than 8 buckets with legacy grouping
BOOST_CHECK_EQUAL(buckets.size(), 8U);
buckets.clear();
for (int j = 0; j < 255; j++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("250." + std::to_string(j) + ".1.1"), NODE_NONE),
ResolveIP("250." + std::to_string(j) + ".1.1"));
int bucket = infoj.GetTriedBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the different /16 prefix should map to more than
// 8 buckets with legacy grouping
BOOST_CHECK_EQUAL(buckets.size(), 160U);
}
BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket_legacy)
{
CAddrManTest addrman;
CAddress addr1 = CAddress(ResolveService("250.1.2.1", 8333), NODE_NONE);
CAddress addr2 = CAddress(ResolveService("250.1.2.1", 9999), NODE_NONE);
CNetAddr source1 = ResolveIP("250.1.2.1");
CAddrInfo info1 = CAddrInfo(addr1, source1);
uint256 nKey1 = (uint256)(CHashWriter(SER_GETHASH, 0) << 1).GetHash();
uint256 nKey2 = (uint256)(CHashWriter(SER_GETHASH, 0) << 2).GetHash();
std::vector<bool> asmap; // use /16
// Test: Make sure the buckets are what we expect
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, asmap), 786);
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, source1, asmap), 786);
// Test: Make sure key actually randomizes bucket placement. A fail on
// this test could be a security issue.
BOOST_CHECK(info1.GetNewBucket(nKey1, asmap) != info1.GetNewBucket(nKey2, asmap));
// Test: Ports should not affect bucket placement in the addr
CAddrInfo info2 = CAddrInfo(addr2, source1);
BOOST_CHECK(info1.GetKey() != info2.GetKey());
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, asmap), info2.GetNewBucket(nKey1, asmap));
std::set<int> buckets;
for (int i = 0; i < 255; i++) {
CAddrInfo infoi = CAddrInfo(
CAddress(ResolveService("250.1.1." + std::to_string(i)), NODE_NONE),
ResolveIP("250.1.1." + std::to_string(i)));
int bucket = infoi.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same group (\16 prefix for IPv4) should
// always map to the same bucket.
BOOST_CHECK_EQUAL(buckets.size(), 1U);
buckets.clear();
for (int j = 0; j < 4 * 255; j++) {
CAddrInfo infoj = CAddrInfo(CAddress(
ResolveService(
std::to_string(250 + (j / 255)) + "." + std::to_string(j % 256) + ".1.1"), NODE_NONE),
ResolveIP("251.4.1.1"));
int bucket = infoj.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same source groups should map to NO MORE
// than 64 buckets.
BOOST_CHECK(buckets.size() <= 64);
buckets.clear();
for (int p = 0; p < 255; p++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("250.1.1.1"), NODE_NONE),
ResolveIP("250." + std::to_string(p) + ".1.1"));
int bucket = infoj.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the different source groups should map to MORE
// than 64 buckets.
BOOST_CHECK(buckets.size() > 64);
}
// The following three test cases use asmap.raw
// We use an artificial minimal mock mapping
// 250.0.0.0/8 AS1000
// 101.1.0.0/16 AS1
// 101.2.0.0/16 AS2
// 101.3.0.0/16 AS3
// 101.4.0.0/16 AS4
// 101.5.0.0/16 AS5
// 101.6.0.0/16 AS6
// 101.7.0.0/16 AS7
// 101.8.0.0/16 AS8
BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket)
{
CAddrManTest addrman;
@@ -415,43 +592,44 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_tried_bucket)
uint256 nKey1 = (uint256)(CHashWriter(SER_GETHASH, 0) << 1).GetHash();
uint256 nKey2 = (uint256)(CHashWriter(SER_GETHASH, 0) << 2).GetHash();
std::vector<bool> asmap = FromBytes(asmap_raw, sizeof(asmap_raw) * 8);
BOOST_CHECK_EQUAL(info1.GetTriedBucket(nKey1), 40);
BOOST_CHECK_EQUAL(info1.GetTriedBucket(nKey1, asmap), 236);
// Test: Make sure key actually randomizes bucket placement. A fail on
// this test could be a security issue.
BOOST_CHECK(info1.GetTriedBucket(nKey1) != info1.GetTriedBucket(nKey2));
BOOST_CHECK(info1.GetTriedBucket(nKey1, asmap) != info1.GetTriedBucket(nKey2, asmap));
// Test: Two addresses with same IP but different ports can map to
// different buckets because they have different keys.
CAddrInfo info2 = CAddrInfo(addr2, source1);
BOOST_CHECK(info1.GetKey() != info2.GetKey());
BOOST_CHECK(info1.GetTriedBucket(nKey1) != info2.GetTriedBucket(nKey1));
BOOST_CHECK(info1.GetTriedBucket(nKey1, asmap) != info2.GetTriedBucket(nKey1, asmap));
std::set<int> buckets;
for (int i = 0; i < 255; i++) {
CAddrInfo infoi = CAddrInfo(
CAddress(ResolveService("250.1.1." + std::to_string(i)), NODE_NONE),
ResolveIP("250.1.1." + std::to_string(i)));
int bucket = infoi.GetTriedBucket(nKey1);
for (int j = 0; j < 255; j++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("101." + std::to_string(j) + ".1.1"), NODE_NONE),
ResolveIP("101." + std::to_string(j) + ".1.1"));
int bucket = infoj.GetTriedBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same group (\16 prefix for IPv4) should
// never get more than 8 buckets
BOOST_CHECK_EQUAL(buckets.size(), 8U);
// Test: IP addresses in the different /16 prefix MAY map to more than
// 8 buckets.
BOOST_CHECK(buckets.size() > 8);
buckets.clear();
for (int j = 0; j < 255; j++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("250." + std::to_string(j) + ".1.1"), NODE_NONE),
ResolveIP("250." + std::to_string(j) + ".1.1"));
int bucket = infoj.GetTriedBucket(nKey1);
int bucket = infoj.GetTriedBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the different groups should map to more than
// 8 buckets.
BOOST_CHECK_EQUAL(buckets.size(), 160U);
// Test: IP addresses in the different /16 prefix MAY NOT map to more than
// 8 buckets.
BOOST_CHECK(buckets.size() == 8);
}
BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket)
@@ -468,29 +646,31 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket)
uint256 nKey1 = (uint256)(CHashWriter(SER_GETHASH, 0) << 1).GetHash();
uint256 nKey2 = (uint256)(CHashWriter(SER_GETHASH, 0) << 2).GetHash();
std::vector<bool> asmap = FromBytes(asmap_raw, sizeof(asmap_raw) * 8);
// Test: Make sure the buckets are what we expect
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1), 786);
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, source1), 786);
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, asmap), 795);
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, source1, asmap), 795);
// Test: Make sure key actually randomizes bucket placement. A fail on
// this test could be a security issue.
BOOST_CHECK(info1.GetNewBucket(nKey1) != info1.GetNewBucket(nKey2));
BOOST_CHECK(info1.GetNewBucket(nKey1, asmap) != info1.GetNewBucket(nKey2, asmap));
// Test: Ports should not affect bucket placement in the addr
CAddrInfo info2 = CAddrInfo(addr2, source1);
BOOST_CHECK(info1.GetKey() != info2.GetKey());
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1), info2.GetNewBucket(nKey1));
BOOST_CHECK_EQUAL(info1.GetNewBucket(nKey1, asmap), info2.GetNewBucket(nKey1, asmap));
std::set<int> buckets;
for (int i = 0; i < 255; i++) {
CAddrInfo infoi = CAddrInfo(
CAddress(ResolveService("250.1.1." + std::to_string(i)), NODE_NONE),
ResolveIP("250.1.1." + std::to_string(i)));
int bucket = infoi.GetNewBucket(nKey1);
int bucket = infoi.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same group (\16 prefix for IPv4) should
// always map to the same bucket.
// Test: IP addresses in the same /16 prefix
// usually map to the same bucket.
BOOST_CHECK_EQUAL(buckets.size(), 1U);
buckets.clear();
@@ -499,24 +679,103 @@ BOOST_AUTO_TEST_CASE(caddrinfo_get_new_bucket)
ResolveService(
std::to_string(250 + (j / 255)) + "." + std::to_string(j % 256) + ".1.1"), NODE_NONE),
ResolveIP("251.4.1.1"));
int bucket = infoj.GetNewBucket(nKey1);
int bucket = infoj.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the same source groups should map to no more
// than 64 buckets.
// Test: IP addresses in the same source /16 prefix should not map to more
// than 64 buckets.
BOOST_CHECK(buckets.size() <= 64);
buckets.clear();
for (int p = 0; p < 255; p++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("250.1.1.1"), NODE_NONE),
ResolveIP("250." + std::to_string(p) + ".1.1"));
int bucket = infoj.GetNewBucket(nKey1);
ResolveIP("101." + std::to_string(p) + ".1.1"));
int bucket = infoj.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the different source groups should map to more
// than 64 buckets.
BOOST_CHECK(buckets.size() > 64);
// Test: IP addresses in the different source /16 prefixes usually map to MORE
// than 1 bucket.
BOOST_CHECK(buckets.size() > 1);
buckets.clear();
for (int p = 0; p < 255; p++) {
CAddrInfo infoj = CAddrInfo(
CAddress(ResolveService("250.1.1.1"), NODE_NONE),
ResolveIP("250." + std::to_string(p) + ".1.1"));
int bucket = infoj.GetNewBucket(nKey1, asmap);
buckets.insert(bucket);
}
// Test: IP addresses in the different source /16 prefixes sometimes map to NO MORE
// than 1 bucket.
BOOST_CHECK(buckets.size() == 1);
}
BOOST_AUTO_TEST_CASE(addrman_serialization)
{
std::vector<bool> asmap1 = FromBytes(asmap_raw, sizeof(asmap_raw) * 8);
CAddrManTest addrman_asmap1(true, asmap1);
CAddrManTest addrman_asmap1_dup(true, asmap1);
CAddrManTest addrman_noasmap;
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
CAddress addr = CAddress(ResolveService("250.1.1.1"), NODE_NONE);
CNetAddr default_source;
addrman_asmap1.Add(addr, default_source);
stream << addrman_asmap1;
// serizalizing/deserializing addrman with the same asmap
stream >> addrman_asmap1_dup;
std::pair<int, int> bucketAndEntry_asmap1 = addrman_asmap1.GetBucketAndEntry(addr);
std::pair<int, int> bucketAndEntry_asmap1_dup = addrman_asmap1_dup.GetBucketAndEntry(addr);
BOOST_CHECK(bucketAndEntry_asmap1.second != -1);
BOOST_CHECK(bucketAndEntry_asmap1_dup.second != -1);
BOOST_CHECK(bucketAndEntry_asmap1.first == bucketAndEntry_asmap1_dup.first);
BOOST_CHECK(bucketAndEntry_asmap1.second == bucketAndEntry_asmap1_dup.second);
// deserializing asmaped peers.dat to non-asmaped addrman
stream << addrman_asmap1;
stream >> addrman_noasmap;
std::pair<int, int> bucketAndEntry_noasmap = addrman_noasmap.GetBucketAndEntry(addr);
BOOST_CHECK(bucketAndEntry_noasmap.second != -1);
BOOST_CHECK(bucketAndEntry_asmap1.first != bucketAndEntry_noasmap.first);
BOOST_CHECK(bucketAndEntry_asmap1.second != bucketAndEntry_noasmap.second);
// deserializing non-asmaped peers.dat to asmaped addrman
addrman_asmap1.Clear();
addrman_noasmap.Clear();
addrman_noasmap.Add(addr, default_source);
stream << addrman_noasmap;
stream >> addrman_asmap1;
std::pair<int, int> bucketAndEntry_asmap1_deser = addrman_asmap1.GetBucketAndEntry(addr);
BOOST_CHECK(bucketAndEntry_asmap1_deser.second != -1);
BOOST_CHECK(bucketAndEntry_asmap1_deser.first != bucketAndEntry_noasmap.first);
BOOST_CHECK(bucketAndEntry_asmap1_deser.first == bucketAndEntry_asmap1_dup.first);
BOOST_CHECK(bucketAndEntry_asmap1_deser.second == bucketAndEntry_asmap1_dup.second);
// used to map to different buckets, now maps to the same bucket.
addrman_asmap1.Clear();
addrman_noasmap.Clear();
CAddress addr1 = CAddress(ResolveService("250.1.1.1"), NODE_NONE);
CAddress addr2 = CAddress(ResolveService("250.2.1.1"), NODE_NONE);
addrman_noasmap.Add(addr, default_source);
addrman_noasmap.Add(addr2, default_source);
std::pair<int, int> bucketAndEntry_noasmap_addr1 = addrman_noasmap.GetBucketAndEntry(addr1);
std::pair<int, int> bucketAndEntry_noasmap_addr2 = addrman_noasmap.GetBucketAndEntry(addr2);
BOOST_CHECK(bucketAndEntry_noasmap_addr1.first != bucketAndEntry_noasmap_addr2.first);
BOOST_CHECK(bucketAndEntry_noasmap_addr1.second != bucketAndEntry_noasmap_addr2.second);
stream << addrman_noasmap;
stream >> addrman_asmap1;
std::pair<int, int> bucketAndEntry_asmap1_deser_addr1 = addrman_asmap1.GetBucketAndEntry(addr1);
std::pair<int, int> bucketAndEntry_asmap1_deser_addr2 = addrman_asmap1.GetBucketAndEntry(addr2);
BOOST_CHECK(bucketAndEntry_asmap1_deser_addr1.first == bucketAndEntry_asmap1_deser_addr2.first);
BOOST_CHECK(bucketAndEntry_asmap1_deser_addr1.second != bucketAndEntry_asmap1_deser_addr2.second);
}