mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-03-26 15:36:19 +01:00
Merge bitcoin/bitcoin#20516: Well-defined CAddress disk serialization, and addrv2 anchors.dat
f8866e8c32Add roundtrip fuzz tests for CAddress serialization (Pieter Wuille)e2f0548b52Use addrv2 serialization in anchors.dat (Pieter Wuille)8cd8f37dfeIntroduce well-defined CAddress disk serialization (Pieter Wuille) Pull request description: Alternative to #20509. This makes the `CAddress` disk serialization format well defined, and uses it to enable addrv2 support in anchors.dat (in a way that's compatible with older software). The new format is: - The first 4 bytes store a format version number. Its low 19 bits are ignored (as those historically stored the `CLIENT_VERSION`), but its high 13 bits specify the actual serialization: - 0x00000000: LE64 encoding for `nServices`, V1 encoding for `CService` (like pre-BIP155 network serialization). - 0x20000000: CompactSize encoding for `nServices`, V2 encoding for `CService` (like BIP155 network serialization). - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes. - The `ADDRV2_FORMAT` flag in the stream's version does not determine the actual serialization format; it only sets whether or not V2 encoding is permitted. ACKs for top commit: achow101: ACKf8866e8c32laanwj: Code review ACKf8866e8c32vasild: ACKf8866e8c32jonatack: ACKf8866e8c32tested rebased to master and built/run/restarted with DEBUG_ADDRMAN, peers.dat and anchors ser/deser seems fine hebasto: ACKf8866e8c32, tested on Linux Mint 20.1 (x86_64). Tree-SHA512: 3898f8a8c51783a46dd0aae03fa10060521f5dd6e79315fe95ba807689e78f202388ffa28c40bf156c6f7b1fc2ce806b155dcbe56027df73d039a55331723796
This commit is contained in:
@@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data)
|
||||
{
|
||||
// Write and commit header, data
|
||||
try {
|
||||
CHashWriter hasher(SER_DISK, CLIENT_VERSION);
|
||||
CHashWriter hasher(stream.GetType(), stream.GetVersion());
|
||||
stream << Params().MessageStart() << data;
|
||||
hasher << Params().MessageStart() << data;
|
||||
stream << hasher.GetHash();
|
||||
@@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data)
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data)
|
||||
bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version)
|
||||
{
|
||||
// Generate random temporary filename
|
||||
uint16_t randv = 0;
|
||||
@@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
|
||||
// open temp output file, and associate with CAutoFile
|
||||
fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn;
|
||||
FILE *file = fsbridge::fopen(pathTmp, "wb");
|
||||
CAutoFile fileout(file, SER_DISK, CLIENT_VERSION);
|
||||
CAutoFile fileout(file, SER_DISK, version);
|
||||
if (fileout.IsNull()) {
|
||||
fileout.fclose();
|
||||
remove(pathTmp);
|
||||
@@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true)
|
||||
}
|
||||
|
||||
template <typename Data>
|
||||
bool DeserializeFileDB(const fs::path& path, Data& data)
|
||||
bool DeserializeFileDB(const fs::path& path, Data& data, int version)
|
||||
{
|
||||
// open input file, and associate with CAutoFile
|
||||
FILE* file = fsbridge::fopen(path, "rb");
|
||||
CAutoFile filein(file, SER_DISK, CLIENT_VERSION);
|
||||
CAutoFile filein(file, SER_DISK, version);
|
||||
if (filein.IsNull()) {
|
||||
LogPrintf("Missing or invalid file %s\n", path.string());
|
||||
return false;
|
||||
@@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path
|
||||
|
||||
bool CBanDB::Write(const banmap_t& banSet)
|
||||
{
|
||||
return SerializeFileDB("banlist", m_ban_list_path, banSet);
|
||||
return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION);
|
||||
}
|
||||
|
||||
bool CBanDB::Read(banmap_t& banSet)
|
||||
{
|
||||
return DeserializeFileDB(m_ban_list_path, banSet);
|
||||
return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION);
|
||||
}
|
||||
|
||||
CAddrDB::CAddrDB()
|
||||
@@ -140,12 +140,12 @@ CAddrDB::CAddrDB()
|
||||
|
||||
bool CAddrDB::Write(const CAddrMan& addr)
|
||||
{
|
||||
return SerializeFileDB("peers", pathAddr, addr);
|
||||
return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION);
|
||||
}
|
||||
|
||||
bool CAddrDB::Read(CAddrMan& addr)
|
||||
{
|
||||
return DeserializeFileDB(pathAddr, addr);
|
||||
return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION);
|
||||
}
|
||||
|
||||
bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
|
||||
@@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
|
||||
void DumpAnchors(const fs::path& anchors_db_path, const std::vector<CAddress>& anchors)
|
||||
{
|
||||
LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size()));
|
||||
SerializeFileDB("anchors", anchors_db_path, anchors);
|
||||
SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT);
|
||||
}
|
||||
|
||||
std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path)
|
||||
{
|
||||
std::vector<CAddress> anchors;
|
||||
if (DeserializeFileDB(anchors_db_path, anchors)) {
|
||||
if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) {
|
||||
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename());
|
||||
} else {
|
||||
anchors.clear();
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <netaddress.h>
|
||||
#include <primitives/transaction.h>
|
||||
#include <serialize.h>
|
||||
#include <streams.h>
|
||||
#include <uint256.h>
|
||||
#include <version.h>
|
||||
|
||||
@@ -358,6 +359,31 @@ class CAddress : public CService
|
||||
{
|
||||
static constexpr uint32_t TIME_INIT{100000000};
|
||||
|
||||
/** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with
|
||||
* the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been
|
||||
* disentangled from client versioning, and now instead:
|
||||
* - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT,
|
||||
* (in case any code exists that treats it as a client version) but are ignored on
|
||||
* deserialization.
|
||||
* - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information.
|
||||
* Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and
|
||||
* any other value triggers a deserialization failure. Other values can be added later if
|
||||
* needed.
|
||||
*
|
||||
* For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2
|
||||
* deserialization is permitted, but the actual format is determined by the high bits in the
|
||||
* stored version field. For network serialization, the stream version having ADDRV2_FORMAT or
|
||||
* not determines the actual format used (as it has no embedded version number).
|
||||
*/
|
||||
static constexpr uint32_t DISK_VERSION_INIT{220000};
|
||||
static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111};
|
||||
/** The version number written in disk serialized addresses to indicate V2 serializations.
|
||||
* It must be exactly 1<<29, as that is the value that historical versions used for this
|
||||
* (they used their internal ADDRV2_FORMAT flag here). */
|
||||
static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29};
|
||||
static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK");
|
||||
static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK");
|
||||
|
||||
public:
|
||||
CAddress() : CService{} {};
|
||||
CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {};
|
||||
@@ -365,22 +391,48 @@ public:
|
||||
|
||||
SERIALIZE_METHODS(CAddress, obj)
|
||||
{
|
||||
SER_READ(obj, obj.nTime = TIME_INIT);
|
||||
int nVersion = s.GetVersion();
|
||||
// CAddress has a distinct network serialization and a disk serialization, but it should never
|
||||
// be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's
|
||||
// ambiguous what that would mean. Make sure no code relying on that is introduced:
|
||||
assert(!(s.GetType() & SER_GETHASH));
|
||||
bool use_v2;
|
||||
bool store_time;
|
||||
if (s.GetType() & SER_DISK) {
|
||||
READWRITE(nVersion);
|
||||
}
|
||||
if ((s.GetType() & SER_DISK) ||
|
||||
(nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) {
|
||||
// In the disk serialization format, the encoding (v1 or v2) is determined by a flag version
|
||||
// that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines
|
||||
// whether V2 is chosen/permitted at all.
|
||||
uint32_t stored_format_version = DISK_VERSION_INIT;
|
||||
if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2;
|
||||
READWRITE(stored_format_version);
|
||||
stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits
|
||||
if (stored_format_version == 0) {
|
||||
use_v2 = false;
|
||||
} else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) {
|
||||
// Only support v2 deserialization if ADDRV2_FORMAT is set.
|
||||
use_v2 = true;
|
||||
} else {
|
||||
throw std::ios_base::failure("Unsupported CAddress disk format version");
|
||||
}
|
||||
store_time = true;
|
||||
} else {
|
||||
// In the network serialization format, the encoding (v1 or v2) is determined directly by
|
||||
// the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version
|
||||
// exists in the stream.
|
||||
assert(s.GetType() & SER_NETWORK);
|
||||
use_v2 = s.GetVersion() & ADDRV2_FORMAT;
|
||||
// The only time we serialize a CAddress object without nTime is in
|
||||
// the initial VERSION messages which contain two CAddress records.
|
||||
// At that point, the serialization version is INIT_PROTO_VERSION.
|
||||
// After the version handshake, serialization version is >=
|
||||
// MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with
|
||||
// nTime.
|
||||
READWRITE(obj.nTime);
|
||||
store_time = s.GetVersion() != INIT_PROTO_VERSION;
|
||||
}
|
||||
if (nVersion & ADDRV2_FORMAT) {
|
||||
|
||||
SER_READ(obj, obj.nTime = TIME_INIT);
|
||||
if (store_time) READWRITE(obj.nTime);
|
||||
// nServices is serialized as CompactSize in V2; as uint64_t in V1.
|
||||
if (use_v2) {
|
||||
uint64_t services_tmp;
|
||||
SER_WRITE(obj, services_tmp = obj.nServices);
|
||||
READWRITE(Using<CompactSizeFormatter<false>>(services_tmp));
|
||||
@@ -388,13 +440,22 @@ public:
|
||||
} else {
|
||||
READWRITE(Using<CustomUintFormatter<8>>(obj.nServices));
|
||||
}
|
||||
READWRITEAS(CService, obj);
|
||||
// Invoke V1/V2 serializer for CService parent object.
|
||||
OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0);
|
||||
SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj));
|
||||
}
|
||||
|
||||
// disk and network only
|
||||
//! Always included in serialization, except in the network format on INIT_PROTO_VERSION.
|
||||
uint32_t nTime{TIME_INIT};
|
||||
|
||||
//! Serialized as uint64_t in V1, and as CompactSize in V2.
|
||||
ServiceFlags nServices{NODE_NONE};
|
||||
|
||||
friend bool operator==(const CAddress& a, const CAddress& b)
|
||||
{
|
||||
return a.nTime == b.nTime &&
|
||||
a.nServices == b.nServices &&
|
||||
static_cast<const CService&>(a) == static_cast<const CService&>(b);
|
||||
}
|
||||
};
|
||||
|
||||
/** getdata message type flags */
|
||||
|
||||
@@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception {
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION)
|
||||
CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
|
||||
{
|
||||
CDataStream ds(SER_NETWORK, version);
|
||||
CDataStream ds(ser_type, version);
|
||||
ds << obj;
|
||||
return ds;
|
||||
}
|
||||
@@ -69,9 +69,9 @@ T Deserialize(CDataStream ds)
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt)
|
||||
void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt, const int ser_type = SER_NETWORK)
|
||||
{
|
||||
CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION);
|
||||
CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION);
|
||||
if (protocol_version) {
|
||||
ds.SetVersion(*protocol_version);
|
||||
} else {
|
||||
@@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION)
|
||||
void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
|
||||
{
|
||||
assert(Deserialize<T>(Serialize(obj, version)) == obj);
|
||||
assert(Deserialize<T>(Serialize(obj, version, ser_type)) == obj);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, {
|
||||
DeserializeFromFuzzingInput(buffer, mh);
|
||||
(void)mh.IsCommandValid();
|
||||
})
|
||||
FUZZ_TARGET_DESERIALIZE(address_deserialize, {
|
||||
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, {
|
||||
CAddress a;
|
||||
DeserializeFromFuzzingInput(buffer, a);
|
||||
DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION);
|
||||
// A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip
|
||||
// in all 5 formats (with/without nTime, v1/v2, network/disk)
|
||||
AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION);
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
|
||||
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
|
||||
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
|
||||
})
|
||||
FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, {
|
||||
CAddress a;
|
||||
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION);
|
||||
// A CAddress in V1 mode will roundtrip in all 4 formats that have nTime.
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
|
||||
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
|
||||
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
|
||||
})
|
||||
FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, {
|
||||
CAddress a;
|
||||
DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT);
|
||||
// A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats
|
||||
// with time if it's V1 compatible.
|
||||
if (a.IsAddrV1Compatible()) {
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
|
||||
AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
|
||||
}
|
||||
AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
|
||||
AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
|
||||
})
|
||||
FUZZ_TARGET_DESERIALIZE(inv_deserialize, {
|
||||
CInv i;
|
||||
|
||||
Reference in New Issue
Block a user