Merge bitcoin/bitcoin#20516: Well-defined CAddress disk serialization, and addrv2 anchors.dat

f8866e8c32 Add roundtrip fuzz tests for CAddress serialization (Pieter Wuille)
e2f0548b52 Use addrv2 serialization in anchors.dat (Pieter Wuille)
8cd8f37dfe Introduce well-defined CAddress disk serialization (Pieter Wuille)

Pull request description:

  Alternative to #20509.

  This makes the `CAddress` disk serialization format well defined, and uses it to enable addrv2 support in anchors.dat (in a way that's compatible with older software). The new format is:
  - The first 4 bytes store a format version number. Its low 19 bits are ignored (as those historically stored the `CLIENT_VERSION`), but its high 13 bits specify the actual serialization:
    - 0x00000000: LE64 encoding for `nServices`, V1 encoding for `CService` (like pre-BIP155 network serialization).
    - 0x20000000: CompactSize encoding for `nServices`, V2 encoding for `CService` (like BIP155 network serialization).
    - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes.
  - The `ADDRV2_FORMAT` flag in the stream's version does not determine the actual serialization format; it only sets whether or not V2 encoding is permitted.

ACKs for top commit:
  achow101:
    ACK f8866e8c32
  laanwj:
    Code review ACK f8866e8c32
  vasild:
    ACK f8866e8c32
  jonatack:
    ACK f8866e8c32 tested rebased to master and built/run/restarted with DEBUG_ADDRMAN, peers.dat and anchors ser/deser seems fine
  hebasto:
    ACK f8866e8c32, tested on Linux Mint 20.1 (x86_64).

Tree-SHA512: 3898f8a8c51783a46dd0aae03fa10060521f5dd6e79315fe95ba807689e78f202388ffa28c40bf156c6f7b1fc2ce806b155dcbe56027df73d039a55331723796
This commit is contained in:
W. J. van der Laan
2021-06-17 17:37:44 +02:00
3 changed files with 119 additions and 30 deletions

View File

@@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data)
{
// Write and commit header, data
try {
CHashWriter hasher(SER_DISK, CLIENT_VERSION);
CHashWriter hasher(stream.GetType(), stream.GetVersion());
stream << Params().MessageStart() << data;
hasher << Params().MessageStart() << data;
stream << hasher.GetHash();
@@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data)
}
template <typename Data>
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data)
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version)
{
// Generate random temporary filename
uint16_t randv = 0;
@@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
// open temp output file, and associate with CAutoFile
fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn;
FILE *file = fsbridge::fopen(pathTmp, "wb");
CAutoFile fileout(file, SER_DISK, CLIENT_VERSION);
CAutoFile fileout(file, SER_DISK, version);
if (fileout.IsNull()) {
fileout.fclose();
remove(pathTmp);
@@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true)
}
template <typename Data>
bool DeserializeFileDB(const fs::path& path, Data& data)
bool DeserializeFileDB(const fs::path& path, Data& data, int version)
{
// open input file, and associate with CAutoFile
FILE* file = fsbridge::fopen(path, "rb");
CAutoFile filein(file, SER_DISK, CLIENT_VERSION);
CAutoFile filein(file, SER_DISK, version);
if (filein.IsNull()) {
LogPrintf("Missing or invalid file %s\n", path.string());
return false;
@@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path
bool CBanDB::Write(const banmap_t& banSet)
{
return SerializeFileDB("banlist", m_ban_list_path, banSet);
return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION);
}
bool CBanDB::Read(banmap_t& banSet)
{
return DeserializeFileDB(m_ban_list_path, banSet);
return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION);
}
CAddrDB::CAddrDB()
@@ -140,12 +140,12 @@ CAddrDB::CAddrDB()
bool CAddrDB::Write(const CAddrMan& addr)
{
return SerializeFileDB("peers", pathAddr, addr);
return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION);
}
bool CAddrDB::Read(CAddrMan& addr)
{
return DeserializeFileDB(pathAddr, addr);
return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION);
}
bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
@@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
void DumpAnchors(const fs::path& anchors_db_path, const std::vector<CAddress>& anchors)
{
LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size()));
SerializeFileDB("anchors", anchors_db_path, anchors);
SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT);
}
std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path)
{
std::vector<CAddress> anchors;
if (DeserializeFileDB(anchors_db_path, anchors)) {
if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) {
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename());
} else {
anchors.clear();

View File

@@ -13,6 +13,7 @@
#include <netaddress.h>
#include <primitives/transaction.h>
#include <serialize.h>
#include <streams.h>
#include <uint256.h>
#include <version.h>
@@ -358,6 +359,31 @@ class CAddress : public CService
{
static constexpr uint32_t TIME_INIT{100000000};
/** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with
* the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been
* disentangled from client versioning, and now instead:
* - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT,
* (in case any code exists that treats it as a client version) but are ignored on
* deserialization.
* - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information.
* Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and
* any other value triggers a deserialization failure. Other values can be added later if
* needed.
*
* For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2
* deserialization is permitted, but the actual format is determined by the high bits in the
* stored version field. For network serialization, the stream version having ADDRV2_FORMAT or
* not determines the actual format used (as it has no embedded version number).
*/
static constexpr uint32_t DISK_VERSION_INIT{220000};
static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111};
/** The version number written in disk serialized addresses to indicate V2 serializations.
* It must be exactly 1<<29, as that is the value that historical versions used for this
* (they used their internal ADDRV2_FORMAT flag here). */
static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29};
static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK");
static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK");
public:
CAddress() : CService{} {};
CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {};
@@ -365,22 +391,48 @@ public:
SERIALIZE_METHODS(CAddress, obj)
{
SER_READ(obj, obj.nTime = TIME_INIT);
int nVersion = s.GetVersion();
// CAddress has a distinct network serialization and a disk serialization, but it should never
// be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's
// ambiguous what that would mean. Make sure no code relying on that is introduced:
assert(!(s.GetType() & SER_GETHASH));
bool use_v2;
bool store_time;
if (s.GetType() & SER_DISK) {
READWRITE(nVersion);
}
if ((s.GetType() & SER_DISK) ||
(nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) {
// In the disk serialization format, the encoding (v1 or v2) is determined by a flag version
// that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines
// whether V2 is chosen/permitted at all.
uint32_t stored_format_version = DISK_VERSION_INIT;
if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2;
READWRITE(stored_format_version);
stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits
if (stored_format_version == 0) {
use_v2 = false;
} else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) {
// Only support v2 deserialization if ADDRV2_FORMAT is set.
use_v2 = true;
} else {
throw std::ios_base::failure("Unsupported CAddress disk format version");
}
store_time = true;
} else {
// In the network serialization format, the encoding (v1 or v2) is determined directly by
// the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version
// exists in the stream.
assert(s.GetType() & SER_NETWORK);
use_v2 = s.GetVersion() & ADDRV2_FORMAT;
// The only time we serialize a CAddress object without nTime is in
// the initial VERSION messages which contain two CAddress records.
// At that point, the serialization version is INIT_PROTO_VERSION.
// After the version handshake, serialization version is >=
// MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with
// nTime.
READWRITE(obj.nTime);
store_time = s.GetVersion() != INIT_PROTO_VERSION;
}
if (nVersion & ADDRV2_FORMAT) {
SER_READ(obj, obj.nTime = TIME_INIT);
if (store_time) READWRITE(obj.nTime);
// nServices is serialized as CompactSize in V2; as uint64_t in V1.
if (use_v2) {
uint64_t services_tmp;
SER_WRITE(obj, services_tmp = obj.nServices);
READWRITE(Using<CompactSizeFormatter<false>>(services_tmp));
@@ -388,13 +440,22 @@ public:
} else {
READWRITE(Using<CustomUintFormatter<8>>(obj.nServices));
}
READWRITEAS(CService, obj);
// Invoke V1/V2 serializer for CService parent object.
OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0);
SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj));
}
// disk and network only
//! Always included in serialization, except in the network format on INIT_PROTO_VERSION.
uint32_t nTime{TIME_INIT};
//! Serialized as uint64_t in V1, and as CompactSize in V2.
ServiceFlags nServices{NODE_NONE};
friend bool operator==(const CAddress& a, const CAddress& b)
{
return a.nTime == b.nTime &&
a.nServices == b.nServices &&
static_cast<const CService&>(a) == static_cast<const CService&>(b);
}
};
/** getdata message type flags */

View File

@@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception {
};
template <typename T>
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION)
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
{
CDataStream ds(SER_NETWORK, version);
CDataStream ds(ser_type, version);
ds << obj;
return ds;
}
@@ -69,9 +69,9 @@ T Deserialize(CDataStream ds)
}
template <typename T>
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt)
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt, const int ser_type = SER_NETWORK)
{
CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION);
CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION);
if (protocol_version) {
ds.SetVersion(*protocol_version);
} else {
@@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio
}
template <typename T>
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION)
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
{
assert(Deserialize<T>(Serialize(obj, version)) == obj);
assert(Deserialize<T>(Serialize(obj, version, ser_type)) == obj);
}
} // namespace
@@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, {
DeserializeFromFuzzingInput(buffer, mh);
(void)mh.IsCommandValid();
})
FUZZ_TARGET_DESERIALIZE(address_deserialize, {
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, {
CAddress a;
DeserializeFromFuzzingInput(buffer, a);
DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION);
// A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip
// in all 5 formats (with/without nTime, v1/v2, network/disk)
AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION);
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
})
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, {
CAddress a;
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION);
// A CAddress in V1 mode will roundtrip in all 4 formats that have nTime.
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
})
FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, {
CAddress a;
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT);
// A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats
// with time if it's V1 compatible.
if (a.IsAddrV1Compatible()) {
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
}
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
})
FUZZ_TARGET_DESERIALIZE(inv_deserialize, {
CInv i;