From 42ec5585424ceb91bed07826dde15697c020661a Mon Sep 17 00:00:00 2001 From: Gleb Naumenko Date: Tue, 11 Aug 2020 10:42:26 +0300 Subject: [PATCH 1/4] Justify the choice of ADDR cache lifetime --- src/net.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/net.cpp b/src/net.cpp index 883e57bdf00..7841965f601 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -2545,6 +2545,31 @@ std::vector CConnman::GetAddresses(Network requestor_network, size_t m if (m_addr_response_caches.find(requestor_network) == m_addr_response_caches.end() || m_addr_response_caches[requestor_network].m_update_addr_response < current_time) { m_addr_response_caches[requestor_network].m_addrs_response_cache = GetAddresses(max_addresses, max_pct); + + // Choosing a proper cache lifetime is a trade-off between the privacy leak minimization + // and the usefulness of ADDR responses to honest users. + // + // Longer cache lifetime makes it more difficult for an attacker to scrape + // enough AddrMan data to maliciously infer something useful. + // By the time an attacker scraped enough AddrMan records, most of + // the records should be old enough to not leak topology info by + // e.g. analyzing real-time changes in timestamps. + // + // It takes only several hundred requests to scrape everything from an AddrMan containing 100,000 nodes, + // so ~24 hours of cache lifetime indeed makes the data less inferable by the time + // most of it could be scraped (considering that timestamps are updated via + // ADDR self-announcements and when nodes communicate). + // We also should be robust to those attacks which may not require scraping *full* victim's AddrMan + // (because even several timestamps of the same handful of nodes may leak privacy). + // + // On the other hand, longer cache lifetime makes ADDR responses + // outdated and less useful for an honest requestor, e.g. if most nodes + // in the ADDR response are no longer active. + // + // However, the churn in the network is known to be rather low. Since we consider + // nodes to be "terrible" (see IsTerrible()) if the timestamps are older than 30 days, + // max. 24 hours of "penalty" due to cache shouldn't make any meaningful difference + // in terms of the freshness of the response. m_addr_response_caches[requestor_network].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6)); } return m_addr_response_caches[requestor_network].m_addrs_response_cache; From 81b00f87800f40cb14f2131ff27668bd2bb9e551 Mon Sep 17 00:00:00 2001 From: Gleb Naumenko Date: Tue, 11 Aug 2020 12:41:26 +0300 Subject: [PATCH 2/4] Add indexing ADDR cache by local socket addr --- src/net.cpp | 20 ++++++++++++++------ src/net.h | 12 ++++++++---- src/net_processing.cpp | 2 +- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/net.cpp b/src/net.cpp index 7841965f601..e43b4806123 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -94,6 +94,7 @@ const std::string NET_MESSAGE_COMMAND_OTHER = "*other*"; static const uint64_t RANDOMIZER_ID_NETGROUP = 0x6c0edd8036ef4036ULL; // SHA256("netgroup")[0:8] static const uint64_t RANDOMIZER_ID_LOCALHOSTNONCE = 0xd93e69e2bbfa5735ULL; // SHA256("localhostnonce")[0:8] +static const uint64_t RANDOMIZER_ID_ADDRCACHE = 0x1cf2e4ddd306dda9ULL; // SHA256("addrcache")[0:8] // // Global state variables // @@ -2539,12 +2540,19 @@ std::vector CConnman::GetAddresses(size_t max_addresses, size_t max_pc return addresses; } -std::vector CConnman::GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct) +std::vector CConnman::GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct) { + SOCKET socket; + WITH_LOCK(requestor.cs_hSocket, socket = requestor.hSocket); + auto local_socket_bytes = GetBindAddress(socket).GetAddrBytes(); + uint64_t cache_id = GetDeterministicRandomizer(RANDOMIZER_ID_ADDRCACHE) + .Write(requestor.addr.GetNetwork()) + .Write(local_socket_bytes.data(), local_socket_bytes.size()) + .Finalize(); const auto current_time = GetTime(); - if (m_addr_response_caches.find(requestor_network) == m_addr_response_caches.end() || - m_addr_response_caches[requestor_network].m_update_addr_response < current_time) { - m_addr_response_caches[requestor_network].m_addrs_response_cache = GetAddresses(max_addresses, max_pct); + if (m_addr_response_caches.find(cache_id) == m_addr_response_caches.end() || + m_addr_response_caches[cache_id].m_update_addr_response < current_time) { + m_addr_response_caches[cache_id].m_addrs_response_cache = GetAddresses(max_addresses, max_pct); // Choosing a proper cache lifetime is a trade-off between the privacy leak minimization // and the usefulness of ADDR responses to honest users. @@ -2570,9 +2578,9 @@ std::vector CConnman::GetAddresses(Network requestor_network, size_t m // nodes to be "terrible" (see IsTerrible()) if the timestamps are older than 30 days, // max. 24 hours of "penalty" due to cache shouldn't make any meaningful difference // in terms of the freshness of the response. - m_addr_response_caches[requestor_network].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6)); + m_addr_response_caches[cache_id].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6)); } - return m_addr_response_caches[requestor_network].m_addrs_response_cache; + return m_addr_response_caches[cache_id].m_addrs_response_cache; } bool CConnman::AddNode(const std::string& strNode) diff --git a/src/net.h b/src/net.h index c72eada3ff4..c9ab579eb24 100644 --- a/src/net.h +++ b/src/net.h @@ -269,7 +269,7 @@ public: * A non-malicious call (from RPC or a peer with addr permission) should * call the function without a parameter to avoid using the cache. */ - std::vector GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct); + std::vector GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct); // This allows temporarily exceeding m_max_outbound_full_relay, with the goal of finding // a peer that is better than all our current peers. @@ -447,15 +447,19 @@ private: /** * Addr responses stored in different caches - * per network prevent cross-network node identification. + * per (network, local socket) prevent cross-network node identification. * If a node for example is multi-homed under Tor and IPv6, * a single cache (or no cache at all) would let an attacker * to easily detect that it is the same node by comparing responses. + * Indexing by local socket prevents leakage when a node has multiple + * listening addresses on the same network. + * * The used memory equals to 1000 CAddress records (or around 32 bytes) per * distinct Network (up to 5) we have/had an inbound peer from, - * resulting in at most ~160 KB. + * resulting in at most ~160 KB. Every separate local socket may + * add up to ~160 KB extra. */ - std::map m_addr_response_caches; + std::map m_addr_response_caches; /** * Services this instance offers. diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 60bdfbe9f5d..bf359a0d686 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -3516,7 +3516,7 @@ void PeerLogicValidation::ProcessMessage(CNode& pfrom, const std::string& msg_ty if (pfrom.HasPermission(PF_ADDR)) { vAddr = m_connman.GetAddresses(MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND); } else { - vAddr = m_connman.GetAddresses(pfrom.addr.GetNetwork(), MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND); + vAddr = m_connman.GetAddresses(pfrom, MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND); } FastRandomContext insecure_rand; for (const CAddress &addr : vAddr) { From 83ad65f31b5c9441ae1618614082e584854a14e1 Mon Sep 17 00:00:00 2001 From: Gleb Naumenko Date: Tue, 11 Aug 2020 13:39:56 +0300 Subject: [PATCH 3/4] Address nits in ADDR caching --- src/net.cpp | 12 ++++++------ src/net.h | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/net.cpp b/src/net.cpp index e43b4806123..8ac45dbcb59 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -2550,10 +2550,10 @@ std::vector CConnman::GetAddresses(CNode& requestor, size_t max_addres .Write(local_socket_bytes.data(), local_socket_bytes.size()) .Finalize(); const auto current_time = GetTime(); - if (m_addr_response_caches.find(cache_id) == m_addr_response_caches.end() || - m_addr_response_caches[cache_id].m_update_addr_response < current_time) { - m_addr_response_caches[cache_id].m_addrs_response_cache = GetAddresses(max_addresses, max_pct); - + auto r = m_addr_response_caches.emplace(cache_id, CachedAddrResponse{}); + CachedAddrResponse& cache_entry = r.first->second; + if (cache_entry.m_cache_entry_expiration < current_time) { // If emplace() added new one it has expiration 0. + cache_entry.m_addrs_response_cache = GetAddresses(max_addresses, max_pct); // Choosing a proper cache lifetime is a trade-off between the privacy leak minimization // and the usefulness of ADDR responses to honest users. // @@ -2578,9 +2578,9 @@ std::vector CConnman::GetAddresses(CNode& requestor, size_t max_addres // nodes to be "terrible" (see IsTerrible()) if the timestamps are older than 30 days, // max. 24 hours of "penalty" due to cache shouldn't make any meaningful difference // in terms of the freshness of the response. - m_addr_response_caches[cache_id].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6)); + cache_entry.m_cache_entry_expiration = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6)); } - return m_addr_response_caches[cache_id].m_addrs_response_cache; + return cache_entry.m_addrs_response_cache; } bool CConnman::AddNode(const std::string& strNode) diff --git a/src/net.h b/src/net.h index c9ab579eb24..21faea591ac 100644 --- a/src/net.h +++ b/src/net.h @@ -442,7 +442,7 @@ private: */ struct CachedAddrResponse { std::vector m_addrs_response_cache; - std::chrono::microseconds m_update_addr_response{0}; + std::chrono::microseconds m_cache_entry_expiration{0}; }; /** @@ -454,10 +454,10 @@ private: * Indexing by local socket prevents leakage when a node has multiple * listening addresses on the same network. * - * The used memory equals to 1000 CAddress records (or around 32 bytes) per + * The used memory equals to 1000 CAddress records (or around 40 bytes) per * distinct Network (up to 5) we have/had an inbound peer from, - * resulting in at most ~160 KB. Every separate local socket may - * add up to ~160 KB extra. + * resulting in at most ~196 KB. Every separate local socket may + * add up to ~196 KB extra. */ std::map m_addr_response_caches; From 0d04784af151de249bbbcbad51e6e8ad9af8f5a3 Mon Sep 17 00:00:00 2001 From: Gleb Naumenko Date: Sun, 23 Aug 2020 17:31:45 +0300 Subject: [PATCH 4/4] Refactor the functional test --- test/functional/p2p_getaddr_caching.py | 47 ++++++++------------------ 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/test/functional/p2p_getaddr_caching.py b/test/functional/p2p_getaddr_caching.py index 6622ea9ec20..2b75ad51754 100755 --- a/test/functional/p2p_getaddr_caching.py +++ b/test/functional/p2p_getaddr_caching.py @@ -5,13 +5,8 @@ """Test addr response caching""" import time -from test_framework.messages import ( - CAddress, - NODE_NETWORK, - NODE_WITNESS, - msg_addr, - msg_getaddr, -) + +from test_framework.messages import msg_getaddr from test_framework.p2p import ( P2PInterface, p2p_lock @@ -21,21 +16,9 @@ from test_framework.util import ( assert_equal, ) +# As defined in net_processing. MAX_ADDR_TO_SEND = 1000 - -def gen_addrs(n): - addrs = [] - for i in range(n): - addr = CAddress() - addr.time = int(time.time()) - addr.nServices = NODE_NETWORK | NODE_WITNESS - # Use first octets to occupy different AddrMan buckets - first_octet = i >> 8 - second_octet = i % 256 - addr.ip = "{}.{}.1.1".format(first_octet, second_octet) - addr.port = 8333 - addrs.append(addr) - return addrs +MAX_PCT_ADDR_TO_SEND = 23 class AddrReceiver(P2PInterface): @@ -62,18 +45,16 @@ class AddrTest(BitcoinTestFramework): self.num_nodes = 1 def run_test(self): - self.log.info('Create connection that sends and requests addr messages') - addr_source = self.nodes[0].add_p2p_connection(P2PInterface()) - - msg_send_addrs = msg_addr() self.log.info('Fill peer AddrMan with a lot of records') - # Since these addrs are sent from the same source, not all of them will be stored, - # because we allocate a limited number of AddrMan buckets per addr source. - total_addrs = 10000 - addrs = gen_addrs(total_addrs) - for i in range(int(total_addrs/MAX_ADDR_TO_SEND)): - msg_send_addrs.addrs = addrs[i * MAX_ADDR_TO_SEND:(i + 1) * MAX_ADDR_TO_SEND] - addr_source.send_and_ping(msg_send_addrs) + for i in range(10000): + first_octet = i >> 8 + second_octet = i % 256 + a = "{}.{}.1.1".format(first_octet, second_octet) + self.nodes[0].addpeeraddress(a, 8333) + + # Need to make sure we hit MAX_ADDR_TO_SEND records in the addr response later because + # only a fraction of all known addresses can be cached and returned. + assert(len(self.nodes[0].getnodeaddresses(0)) > int(MAX_ADDR_TO_SEND / (MAX_PCT_ADDR_TO_SEND / 100))) responses = [] self.log.info('Send many addr requests within short time to receive same response') @@ -89,7 +70,7 @@ class AddrTest(BitcoinTestFramework): responses.append(addr_receiver.get_received_addrs()) for response in responses[1:]: assert_equal(response, responses[0]) - assert(len(response) < MAX_ADDR_TO_SEND) + assert(len(response) == MAX_ADDR_TO_SEND) cur_mock_time += 3 * 24 * 60 * 60 self.nodes[0].setmocktime(cur_mock_time)