From 8cd8f37dfe3ffb73a09f3ad773603d9d89452245 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 26 Nov 2020 16:46:10 -0800 Subject: [PATCH 1/3] Introduce well-defined CAddress disk serialization Before this commit, CAddress disk serialization was messy. It stored CLIENT_VERSION in the first 4 bytes, optionally OR'ed with ADDRV2_FORMAT. - All bits except ADDRV2_FORMAT were ignored, making it hard to use for actual future format changes. - ADDRV2_FORMAT determines whether or not nServices is serialized in LE64 format or in CompactSize format. - Whether or not the embedded CService is serialized in V1 or V2 format is determined by the stream's version having ADDRV2_FORMAT (as opposed to the nServices encoding, which is determined by the disk version). To improve the situation, this commit introduces the following disk serialization format, compatible with earlier versions, but better defined for future changes: - The first 4 bytes store a format version number. Its low 19 bits are ignored (as it historically stored the CLIENT_VERSION), but its high 13 bits specify the serialization exactly: - 0x00000000: LE64 encoding for nServices, V1 encoding for CService - 0x20000000: CompactSize encoding for nServices, V2 encoding for CService - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes. - The ADDRV2_FORMAT flag in the stream's version does not impact the actual serialization format; it only determines whether V2 encoding is permitted; whether it's actually enabled depends solely on the disk version number. Operationally the changes to the deserializer are: - Failure when the stored format version number is unexpected. - The embedded CService's format is determined by the stored format version number rather than the stream's version number. These do no introduce incompatibilities, as no code versions exist that write any value other than 0 or 0x20000000 in the top 13 bits, and no code paths where the stream's version differs from the stored version. --- src/protocol.h | 76 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/src/protocol.h b/src/protocol.h index aaa9f1df400..251b8892cf3 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -358,6 +359,31 @@ class CAddress : public CService { static constexpr uint32_t TIME_INIT{100000000}; + /** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with + * the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been + * disentangled from client versioning, and now instead: + * - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT, + * (in case any code exists that treats it as a client version) but are ignored on + * deserialization. + * - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information. + * Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and + * any other value triggers a deserialization failure. Other values can be added later if + * needed. + * + * For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2 + * deserialization is permitted, but the actual format is determined by the high bits in the + * stored version field. For network serialization, the stream version having ADDRV2_FORMAT or + * not determines the actual format used (as it has no embedded version number). + */ + static constexpr uint32_t DISK_VERSION_INIT{220000}; + static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111}; + /** The version number written in disk serialized addresses to indicate V2 serializations. + * It must be exactly 1<<29, as that is the value that historical versions used for this + * (they used their internal ADDRV2_FORMAT flag here). */ + static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29}; + static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK"); + static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK"); + public: CAddress() : CService{} {}; CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {}; @@ -365,22 +391,48 @@ public: SERIALIZE_METHODS(CAddress, obj) { - SER_READ(obj, obj.nTime = TIME_INIT); - int nVersion = s.GetVersion(); + // CAddress has a distinct network serialization and a disk serialization, but it should never + // be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's + // ambiguous what that would mean. Make sure no code relying on that is introduced: + assert(!(s.GetType() & SER_GETHASH)); + bool use_v2; + bool store_time; if (s.GetType() & SER_DISK) { - READWRITE(nVersion); - } - if ((s.GetType() & SER_DISK) || - (nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) { + // In the disk serialization format, the encoding (v1 or v2) is determined by a flag version + // that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines + // whether V2 is chosen/permitted at all. + uint32_t stored_format_version = DISK_VERSION_INIT; + if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2; + READWRITE(stored_format_version); + stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits + if (stored_format_version == 0) { + use_v2 = false; + } else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) { + // Only support v2 deserialization if ADDRV2_FORMAT is set. + use_v2 = true; + } else { + throw std::ios_base::failure("Unsupported CAddress disk format version"); + } + store_time = true; + } else { + // In the network serialization format, the encoding (v1 or v2) is determined directly by + // the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version + // exists in the stream. + assert(s.GetType() & SER_NETWORK); + use_v2 = s.GetVersion() & ADDRV2_FORMAT; // The only time we serialize a CAddress object without nTime is in // the initial VERSION messages which contain two CAddress records. // At that point, the serialization version is INIT_PROTO_VERSION. // After the version handshake, serialization version is >= // MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with // nTime. - READWRITE(obj.nTime); + store_time = s.GetVersion() != INIT_PROTO_VERSION; } - if (nVersion & ADDRV2_FORMAT) { + + SER_READ(obj, obj.nTime = TIME_INIT); + if (store_time) READWRITE(obj.nTime); + // nServices is serialized as CompactSize in V2; as uint64_t in V1. + if (use_v2) { uint64_t services_tmp; SER_WRITE(obj, services_tmp = obj.nServices); READWRITE(Using>(services_tmp)); @@ -388,12 +440,14 @@ public: } else { READWRITE(Using>(obj.nServices)); } - READWRITEAS(CService, obj); + // Invoke V1/V2 serializer for CService parent object. + OverrideStream os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0); + SerReadWriteMany(os, ser_action, ReadWriteAsHelper(obj)); } - // disk and network only + //! Always included in serialization, except in the network format on INIT_PROTO_VERSION. uint32_t nTime{TIME_INIT}; - + //! Serialized as uint64_t in V1, and as CompactSize in V2. ServiceFlags nServices{NODE_NONE}; }; From e2f0548b52a4b2ba3edf77e3f21365f1e8f270a4 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 26 Nov 2020 13:59:44 -0800 Subject: [PATCH 2/3] Use addrv2 serialization in anchors.dat --- src/addrdb.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/addrdb.cpp b/src/addrdb.cpp index c376aced109..bf2f6c7614e 100644 --- a/src/addrdb.cpp +++ b/src/addrdb.cpp @@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data) { // Write and commit header, data try { - CHashWriter hasher(SER_DISK, CLIENT_VERSION); + CHashWriter hasher(stream.GetType(), stream.GetVersion()); stream << Params().MessageStart() << data; hasher << Params().MessageStart() << data; stream << hasher.GetHash(); @@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data) } template -bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data) +bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version) { // Generate random temporary filename uint16_t randv = 0; @@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data // open temp output file, and associate with CAutoFile fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn; FILE *file = fsbridge::fopen(pathTmp, "wb"); - CAutoFile fileout(file, SER_DISK, CLIENT_VERSION); + CAutoFile fileout(file, SER_DISK, version); if (fileout.IsNull()) { fileout.fclose(); remove(pathTmp); @@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true) } template -bool DeserializeFileDB(const fs::path& path, Data& data) +bool DeserializeFileDB(const fs::path& path, Data& data, int version) { // open input file, and associate with CAutoFile FILE* file = fsbridge::fopen(path, "rb"); - CAutoFile filein(file, SER_DISK, CLIENT_VERSION); + CAutoFile filein(file, SER_DISK, version); if (filein.IsNull()) { LogPrintf("Missing or invalid file %s\n", path.string()); return false; @@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path bool CBanDB::Write(const banmap_t& banSet) { - return SerializeFileDB("banlist", m_ban_list_path, banSet); + return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION); } bool CBanDB::Read(banmap_t& banSet) { - return DeserializeFileDB(m_ban_list_path, banSet); + return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION); } CAddrDB::CAddrDB() @@ -140,12 +140,12 @@ CAddrDB::CAddrDB() bool CAddrDB::Write(const CAddrMan& addr) { - return SerializeFileDB("peers", pathAddr, addr); + return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION); } bool CAddrDB::Read(CAddrMan& addr) { - return DeserializeFileDB(pathAddr, addr); + return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION); } bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers) @@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers) void DumpAnchors(const fs::path& anchors_db_path, const std::vector& anchors) { LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size())); - SerializeFileDB("anchors", anchors_db_path, anchors); + SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT); } std::vector ReadAnchors(const fs::path& anchors_db_path) { std::vector anchors; - if (DeserializeFileDB(anchors_db_path, anchors)) { + if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) { LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename()); } else { anchors.clear(); From f8866e8c324be3322fa507c2ceb1de35d148d0f1 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Thu, 26 Nov 2020 18:13:42 -0800 Subject: [PATCH 3/3] Add roundtrip fuzz tests for CAddress serialization --- src/protocol.h | 7 ++++++ src/test/fuzz/deserialize.cpp | 44 ++++++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/protocol.h b/src/protocol.h index 251b8892cf3..f9248899dc5 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -449,6 +449,13 @@ public: uint32_t nTime{TIME_INIT}; //! Serialized as uint64_t in V1, and as CompactSize in V2. ServiceFlags nServices{NODE_NONE}; + + friend bool operator==(const CAddress& a, const CAddress& b) + { + return a.nTime == b.nTime && + a.nServices == b.nServices && + static_cast(a) == static_cast(b); + } }; /** getdata message type flags */ diff --git a/src/test/fuzz/deserialize.cpp b/src/test/fuzz/deserialize.cpp index 1290c78712b..decfc2610cb 100644 --- a/src/test/fuzz/deserialize.cpp +++ b/src/test/fuzz/deserialize.cpp @@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception { }; template -CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION) +CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK) { - CDataStream ds(SER_NETWORK, version); + CDataStream ds(ser_type, version); ds << obj; return ds; } @@ -69,9 +69,9 @@ T Deserialize(CDataStream ds) } template -void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional protocol_version = std::nullopt) +void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional protocol_version = std::nullopt, const int ser_type = SER_NETWORK) { - CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION); + CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION); if (protocol_version) { ds.SetVersion(*protocol_version); } else { @@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio } template -void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION) +void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK) { - assert(Deserialize(Serialize(obj, version)) == obj); + assert(Deserialize(Serialize(obj, version, ser_type)) == obj); } } // namespace @@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, { DeserializeFromFuzzingInput(buffer, mh); (void)mh.IsCommandValid(); }) -FUZZ_TARGET_DESERIALIZE(address_deserialize, { +FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, { CAddress a; - DeserializeFromFuzzingInput(buffer, a); + DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION); + // A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip + // in all 5 formats (with/without nTime, v1/v2, network/disk) + AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); +}) +FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, { + CAddress a; + DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION); + // A CAddress in V1 mode will roundtrip in all 4 formats that have nTime. + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); +}) +FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, { + CAddress a; + DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT); + // A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats + // with time if it's V1 compatible. + if (a.IsAddrV1Compatible()) { + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + } + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); }) FUZZ_TARGET_DESERIALIZE(inv_deserialize, { CInv i;