Compact serialization for variable-length integers

Variable-length integers: bytes are a MSB base-128 encoding of the number.
The high bit in each byte signifies whether another digit follows. To make
the encoding is one-to-one, one is subtracted from all but the last digit.
Thus, the byte sequence a[] with length len, where all but the last byte
has bit 128 set, encodes the number:

  (a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1))

Properties:
* Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes)
* Every integer has exactly one encoding
* Encoding does not depend on size of original integer type
This commit is contained in:
Pieter Wuille
2012-06-15 14:19:11 +02:00
parent 43b7905e98
commit 4d6144f97f
2 changed files with 138 additions and 1 deletions

View File

@@ -0,0 +1,45 @@
#include <boost/test/unit_test.hpp>
#include <string>
#include <vector>
#include "serialize.h"
using namespace std;
BOOST_AUTO_TEST_SUITE(serialize_tests)
BOOST_AUTO_TEST_CASE(varints)
{
// encode
CDataStream ss(SER_DISK, 0);
CDataStream::size_type size = 0;
for (int i = 0; i < 100000; i++) {
ss << VARINT(i);
size += ::GetSerializeSize(VARINT(i), 0, 0);
BOOST_CHECK(size == ss.size());
}
for (uint64 i = 0; i < 100000000000ULL; i += 999999937) {
ss << VARINT(i);
size += ::GetSerializeSize(VARINT(i), 0, 0);
BOOST_CHECK(size == ss.size());
}
// decode
for (int i = 0; i < 100000; i++) {
int j;
ss >> VARINT(j);
BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
}
for (uint64 i = 0; i < 100000000000ULL; i += 999999937) {
uint64 j;
ss >> VARINT(j);
BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
}
}
BOOST_AUTO_TEST_SUITE_END()