Merge bitcoin/bitcoin#29114: util: Faster std::byte (pre)vector (un)serialize

fab41697a5448ef2861f65795bd63a4ccdda6a40 Allow int8_t optimized vector serialization (MarcoFalke)
facaa14785e006c1af5a8b17b10e2722af8d054e Faster std::byte (pre)vector (un)serialize (MarcoFalke)

Pull request description:

  Currently, large vectors of `std::byte` are (un)serialized byte-by-byte, which is slow. Fix this, by enabling the already existing optimization for them.

  On my system this gives a 10x speedup for `./src/bench/bench_bitcoin --filter=PrevectorDeserializeTrivial`, when `std::byte` are used:

  ```diff
  diff --git a/src/bench/prevector.cpp b/src/bench/prevector.cpp
  index 2524e215e4..76b16bc34e 100644
  --- a/src/bench/prevector.cpp
  +++ b/src/bench/prevector.cpp
  @@ -17,7 +17,7 @@ struct nontrivial_t {
   static_assert(!std::is_trivially_default_constructible<nontrivial_t>::value,
                 "expected nontrivial_t to not be trivially constructible");

  -typedef unsigned char trivial_t;
  +typedef std::byte trivial_t;
   static_assert(std::is_trivially_default_constructible<trivial_t>::value,
                 "expected trivial_t to be trivially constructible");

  ```

  However, the optimization does not cover `signed char`. Fix that as well.

ACKs for top commit:
  sipa:
    utACK fab41697a5448ef2861f65795bd63a4ccdda6a40
  achow101:
    ACK fab41697a5448ef2861f65795bd63a4ccdda6a40
  TheCharlatan:
    ACK fab41697a5448ef2861f65795bd63a4ccdda6a40

Tree-SHA512: a3e20f375fd1d0e0dedb827a8ce528de1173ea69660c8c891ad1343a86b422072f6505096fca0d3f8af4442fbe1378a02e32d5974919d4e88ff06934d0258cba
This commit is contained in:
Ava Chow 2024-02-08 13:08:16 -05:00
commit ecbf4bae9c
No known key found for this signature in database
GPG Key ID: 17565732E08E5E41
2 changed files with 8 additions and 10 deletions

View File

@ -710,14 +710,12 @@ template<typename Stream, typename C> void Unserialize(Stream& is, std::basic_st
/**
* prevector
* prevectors of unsigned char are a special case and are intended to be serialized as a single opaque blob.
*/
template<typename Stream, unsigned int N, typename T> inline void Serialize(Stream& os, const prevector<N, T>& v);
template<typename Stream, unsigned int N, typename T> inline void Unserialize(Stream& is, prevector<N, T>& v);
/**
* vector
* vectors of unsigned char are a special case and are intended to be serialized as a single opaque blob.
*/
template<typename Stream, typename T, typename A> inline void Serialize(Stream& os, const std::vector<T, A>& v);
template<typename Stream, typename T, typename A> inline void Unserialize(Stream& is, std::vector<T, A>& v);
@ -820,10 +818,9 @@ void Unserialize(Stream& is, std::basic_string<C>& str)
template <typename Stream, unsigned int N, typename T>
void Serialize(Stream& os, const prevector<N, T>& v)
{
if constexpr (std::is_same_v<T, unsigned char>) {
if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes
WriteCompactSize(os, v.size());
if (!v.empty())
os.write(MakeByteSpan(v));
if (!v.empty()) os.write(MakeByteSpan(v));
} else {
Serialize(os, Using<VectorFormatter<DefaultFormatter>>(v));
}
@ -833,7 +830,7 @@ void Serialize(Stream& os, const prevector<N, T>& v)
template <typename Stream, unsigned int N, typename T>
void Unserialize(Stream& is, prevector<N, T>& v)
{
if constexpr (std::is_same_v<T, unsigned char>) {
if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes
// Limit size per read so bogus size value won't cause out of memory
v.clear();
unsigned int nSize = ReadCompactSize(is);
@ -856,10 +853,9 @@ void Unserialize(Stream& is, prevector<N, T>& v)
template <typename Stream, typename T, typename A>
void Serialize(Stream& os, const std::vector<T, A>& v)
{
if constexpr (std::is_same_v<T, unsigned char>) {
if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes
WriteCompactSize(os, v.size());
if (!v.empty())
os.write(MakeByteSpan(v));
if (!v.empty()) os.write(MakeByteSpan(v));
} else if constexpr (std::is_same_v<T, bool>) {
// A special case for std::vector<bool>, as dereferencing
// std::vector<bool>::const_iterator does not result in a const bool&
@ -877,7 +873,7 @@ void Serialize(Stream& os, const std::vector<T, A>& v)
template <typename Stream, typename T, typename A>
void Unserialize(Stream& is, std::vector<T, A>& v)
{
if constexpr (std::is_same_v<T, unsigned char>) {
if constexpr (BasicByte<T>) { // Use optimized version for unformatted basic bytes
// Limit size per read so bogus size value won't cause out of memory
v.clear();
unsigned int nSize = ReadCompactSize(is);

View File

@ -287,9 +287,11 @@ Span<std::byte> MakeWritableByteSpan(V&& v) noexcept
// Helper functions to safely cast basic byte pointers to unsigned char pointers.
inline unsigned char* UCharCast(char* c) { return reinterpret_cast<unsigned char*>(c); }
inline unsigned char* UCharCast(unsigned char* c) { return c; }
inline unsigned char* UCharCast(signed char* c) { return reinterpret_cast<unsigned char*>(c); }
inline unsigned char* UCharCast(std::byte* c) { return reinterpret_cast<unsigned char*>(c); }
inline const unsigned char* UCharCast(const char* c) { return reinterpret_cast<const unsigned char*>(c); }
inline const unsigned char* UCharCast(const unsigned char* c) { return c; }
inline const unsigned char* UCharCast(const signed char* c) { return reinterpret_cast<const unsigned char*>(c); }
inline const unsigned char* UCharCast(const std::byte* c) { return reinterpret_cast<const unsigned char*>(c); }
// Helper concept for the basic byte types.
template <typename B>