From b4c5fda417dd9ff8bf9fe24a87d384a649e3730d Mon Sep 17 00:00:00 2001 From: John Newbery Date: Thu, 3 Dec 2020 10:54:57 +0000 Subject: [PATCH 1/7] [addrman] Fix new table bucketing during unserialization An addrman entry can appear in up to 8 new table buckets. We store this entry->bucket indexing during shutdown so that on restart we can restore the entries to their correct buckets. Commit ec45646de9e62b3d42c85716bfeb06d8f2b507dc broke the deserialization code so that each entry could only be put in up to one new bucket. Fix that. --- src/addrman.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index 9ac67b7af6a..bcf36d9b3f8 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -500,7 +500,9 @@ public: nTried -= nLost; // Store positions in the new table buckets to apply later (if possible). - std::map entryToBucket; // Represents which entry belonged to which bucket when serializing + // An entry may appear in up to ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets, + // so we store all bucket-entry_index pairs to iterate through later. + std::vector> bucket_entries; for (int bucket = 0; bucket < nUBuckets; bucket++) { int nSize = 0; @@ -509,7 +511,7 @@ public: int nIndex = 0; s >> nIndex; if (nIndex >= 0 && nIndex < nNew) { - entryToBucket[nIndex] = bucket; + bucket_entries.emplace_back(bucket, nIndex); } } } @@ -523,9 +525,10 @@ public: s >> serialized_asmap_version; } - for (int n = 0; n < nNew; n++) { - CAddrInfo &info = mapInfo[n]; - int bucket = entryToBucket[n]; + for (auto bucket_entry : bucket_entries) { + int bucket{bucket_entry.first}; + const int n{bucket_entry.second}; + CAddrInfo& info = mapInfo[n]; int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) { From 009b8e0fdf3bfb11668edacced5d8b70726d5d0e Mon Sep 17 00:00:00 2001 From: John Newbery Date: Thu, 3 Dec 2020 11:31:08 +0000 Subject: [PATCH 2/7] [addrman] Improve variable naming/code style of touched code. --- src/addrman.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index bcf36d9b3f8..4cd4106ff40 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -504,14 +504,14 @@ public: // so we store all bucket-entry_index pairs to iterate through later. std::vector> bucket_entries; - for (int bucket = 0; bucket < nUBuckets; bucket++) { - int nSize = 0; - s >> nSize; - for (int n = 0; n < nSize; n++) { - int nIndex = 0; - s >> nIndex; - if (nIndex >= 0 && nIndex < nNew) { - bucket_entries.emplace_back(bucket, nIndex); + for (int bucket = 0; bucket < nUBuckets; ++bucket) { + int num_entries{0}; + s >> num_entries; + for (int n = 0; n < num_entries; ++n) { + int entry_index{0}; + s >> entry_index; + if (entry_index >= 0 && entry_index < nNew) { + bucket_entries.emplace_back(bucket, entry_index); } } } @@ -527,13 +527,13 @@ public: for (auto bucket_entry : bucket_entries) { int bucket{bucket_entry.first}; - const int n{bucket_entry.second}; - CAddrInfo& info = mapInfo[n]; + const int entry_index{bucket_entry.second}; + CAddrInfo& info = mapInfo[entry_index]; int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) { // Bucketing has not changed, using existing bucket positions for the new table - vvNew[bucket][nUBucketPos] = n; + vvNew[bucket][nUBucketPos] = entry_index; info.nRefCount++; } else { // In case the new table data cannot be used (format unknown, bucket count wrong or new asmap), @@ -542,7 +542,7 @@ public: bucket = info.GetNewBucket(nKey, m_asmap); nUBucketPos = info.GetBucketPosition(nKey, true, bucket); if (vvNew[bucket][nUBucketPos] == -1) { - vvNew[bucket][nUBucketPos] = n; + vvNew[bucket][nUBucketPos] = entry_index; info.nRefCount++; } } From 8062d928ce5c495c1b6ecd18e4b30c12da822d90 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Thu, 3 Dec 2020 11:00:08 +0000 Subject: [PATCH 3/7] [addrman] Rename asmap version to asmap checksum Version implies that higher numbers take precendence. This is really a checksum, to check whether the provided asmap is the same as the one used when the peers.dat file was serialized. Also update the comments to explain where/why this is used. --- src/addrman.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index 4cd4106ff40..75e9f36b860 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -340,6 +340,7 @@ public: * * for each bucket: * * number of elements * * for each element: index + * * asmap checksum * * 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it * as incompatible. This is necessary because it did not check the version number on @@ -348,8 +349,8 @@ public: * Notice that vvTried, mapAddr and vVector are never encoded explicitly; * they are instead reconstructed from the other information. * - * vvNew is serialized, but only used if ADDRMAN_UNKNOWN_BUCKET_COUNT didn't change, - * otherwise it is reconstructed as well. + * vvNew is serialized, but only used if ADDRMAN_NEW_BUCKET_COUNT and the asmap checksum + * didn't change, otherwise it is reconstructed as well. * * This format is more complex, but significantly smaller (at most 1.5 MiB), and supports * changes to the ADDRMAN_ parameters without breaking the on-disk structure. @@ -413,13 +414,13 @@ public: } } } - // Store asmap version after bucket entries so that it + // Store asmap checksum after bucket entries so that it // can be ignored by older clients for backward compatibility. - uint256 asmap_version; + uint256 asmap_checksum; if (m_asmap.size() != 0) { - asmap_version = SerializeHash(m_asmap); + asmap_checksum = SerializeHash(m_asmap); } - s << asmap_version; + s << asmap_checksum; } template @@ -516,13 +517,13 @@ public: } } - uint256 supplied_asmap_version; + uint256 supplied_asmap_checksum; if (m_asmap.size() != 0) { - supplied_asmap_version = SerializeHash(m_asmap); + supplied_asmap_checksum = SerializeHash(m_asmap); } - uint256 serialized_asmap_version; + uint256 serialized_asmap_checksum; if (format >= Format::V2_ASMAP) { - s >> serialized_asmap_version; + s >> serialized_asmap_checksum; } for (auto bucket_entry : bucket_entries) { @@ -531,7 +532,7 @@ public: CAddrInfo& info = mapInfo[entry_index]; int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && - info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_version == supplied_asmap_version) { + info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_checksum == supplied_asmap_checksum) { // Bucketing has not changed, using existing bucket positions for the new table vvNew[bucket][nUBucketPos] = entry_index; info.nRefCount++; From a5c9b04959f443372400f9a736c6eaf5502284a1 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Thu, 3 Dec 2020 11:12:10 +0000 Subject: [PATCH 4/7] [addrman] Don't rebucket new table entries unnecessarily Only rebucket if the asmap checksum has changed, not if the file format has changed but no asmap is provided. Also, don't try to add an entry to another bucket if it already appears in ADDRMAN_NEW_BUCKETS_PER_ADDRESS buckets. --- src/addrman.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index 75e9f36b860..693860aad9a 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -517,6 +517,8 @@ public: } } + // Attempt to restore the entry's new buckets if the bucket count and asmap + // checksum haven't changed uint256 supplied_asmap_checksum; if (m_asmap.size() != 0) { supplied_asmap_checksum = SerializeHash(m_asmap); @@ -525,19 +527,26 @@ public: if (format >= Format::V2_ASMAP) { s >> serialized_asmap_checksum; } + const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && + serialized_asmap_checksum == supplied_asmap_checksum}; for (auto bucket_entry : bucket_entries) { int bucket{bucket_entry.first}; const int entry_index{bucket_entry.second}; CAddrInfo& info = mapInfo[entry_index]; + + // The entry shouldn't appear in more than + // ADDRMAN_NEW_BUCKETS_PER_ADDRESS. If it has already, just skip + // this bucket_entry. + if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue; + int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (format >= Format::V2_ASMAP && nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && vvNew[bucket][nUBucketPos] == -1 && - info.nRefCount < ADDRMAN_NEW_BUCKETS_PER_ADDRESS && serialized_asmap_checksum == supplied_asmap_checksum) { + if (restore_bucketing && vvNew[bucket][nUBucketPos] == -1) { // Bucketing has not changed, using existing bucket positions for the new table vvNew[bucket][nUBucketPos] = entry_index; info.nRefCount++; } else { - // In case the new table data cannot be used (format unknown, bucket count wrong or new asmap), + // In case the new table data cannot be used (bucket count wrong or new asmap), // try to give them a reference based on their primary source address. LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); bucket = info.GetNewBucket(nKey, m_asmap); From ac3547eddd8a7d67b4103508f30d5d02a9c1f148 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Thu, 3 Dec 2020 11:31:08 +0000 Subject: [PATCH 5/7] [addrman] Improve variable naming/code style of touched code. --- src/addrman.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index 693860aad9a..cde864f2590 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -517,8 +517,9 @@ public: } } - // Attempt to restore the entry's new buckets if the bucket count and asmap - // checksum haven't changed + // If the bucket count and asmap checksum haven't changed, then attempt + // to restore the entries to the buckets/positions they were in before + // serialization. uint256 supplied_asmap_checksum; if (m_asmap.size() != 0) { supplied_asmap_checksum = SerializeHash(m_asmap); @@ -540,20 +541,20 @@ public: // this bucket_entry. if (info.nRefCount >= ADDRMAN_NEW_BUCKETS_PER_ADDRESS) continue; - int nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (restore_bucketing && vvNew[bucket][nUBucketPos] == -1) { + int bucket_position = info.GetBucketPosition(nKey, true, bucket); + if (restore_bucketing && vvNew[bucket][bucket_position] == -1) { // Bucketing has not changed, using existing bucket positions for the new table - vvNew[bucket][nUBucketPos] = entry_index; - info.nRefCount++; + vvNew[bucket][bucket_position] = entry_index; + ++info.nRefCount; } else { // In case the new table data cannot be used (bucket count wrong or new asmap), // try to give them a reference based on their primary source address. LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); bucket = info.GetNewBucket(nKey, m_asmap); - nUBucketPos = info.GetBucketPosition(nKey, true, bucket); - if (vvNew[bucket][nUBucketPos] == -1) { - vvNew[bucket][nUBucketPos] = entry_index; - info.nRefCount++; + bucket_position = info.GetBucketPosition(nKey, true, bucket); + if (vvNew[bucket][bucket_position] == -1) { + vvNew[bucket][bucket_position] = entry_index; + ++info.nRefCount; } } } From 436292367c1d737cf73bd985293539500d1206f5 Mon Sep 17 00:00:00 2001 From: John Newbery Date: Fri, 29 Jan 2021 10:18:31 +0000 Subject: [PATCH 6/7] [addrman] Improve serialization comments Thanks to Vasil Dimov for these suggestions --- src/addrman.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/addrman.h b/src/addrman.h index cde864f2590..9ca0a989c0a 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -335,23 +335,20 @@ public: * * nNew * * nTried * * number of "new" buckets XOR 2**30 - * * all nNew addrinfos in vvNew - * * all nTried addrinfos in vvTried - * * for each bucket: + * * all new addresses (total count: nNew) + * * all tried addresses (total count: nTried) + * * for each new bucket: * * number of elements - * * for each element: index + * * for each element: index in the serialized "all new addresses" * * asmap checksum * * 2**30 is xorred with the number of buckets to make addrman deserializer v0 detect it * as incompatible. This is necessary because it did not check the version number on * deserialization. * - * Notice that vvTried, mapAddr and vVector are never encoded explicitly; + * vvNew, vvTried, mapInfo, mapAddr and vRandom are never encoded explicitly; * they are instead reconstructed from the other information. * - * vvNew is serialized, but only used if ADDRMAN_NEW_BUCKET_COUNT and the asmap checksum - * didn't change, otherwise it is reconstructed as well. - * * This format is more complex, but significantly smaller (at most 1.5 MiB), and supports * changes to the ADDRMAN_ parameters without breaking the on-disk structure. * From 4676a4fb5be0f6ef0b3f71c1f4361c20f7cb0e0b Mon Sep 17 00:00:00 2001 From: John Newbery Date: Fri, 29 Jan 2021 10:29:44 +0000 Subject: [PATCH 7/7] [addrman] Don't repeat "Bucketing method was updated" log multiple times Thanks to Vasil Dimov for these suggestions --- src/addrman.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/addrman.h b/src/addrman.h index 9ca0a989c0a..92a55709530 100644 --- a/src/addrman.h +++ b/src/addrman.h @@ -528,6 +528,10 @@ public: const bool restore_bucketing{nUBuckets == ADDRMAN_NEW_BUCKET_COUNT && serialized_asmap_checksum == supplied_asmap_checksum}; + if (!restore_bucketing) { + LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); + } + for (auto bucket_entry : bucket_entries) { int bucket{bucket_entry.first}; const int entry_index{bucket_entry.second}; @@ -546,7 +550,6 @@ public: } else { // In case the new table data cannot be used (bucket count wrong or new asmap), // try to give them a reference based on their primary source address. - LogPrint(BCLog::ADDRMAN, "Bucketing method was updated, re-bucketing addrman entries from disk\n"); bucket = info.GetNewBucket(nKey, m_asmap); bucket_position = info.GetBucketPosition(nKey, true, bucket); if (vvNew[bucket][bucket_position] == -1) {