mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-10-11 03:53:22 +02:00
Rolling bloom filter class
For when you need to keep track of the last N items you've seen, and can tolerate some false-positives. Rebased-by: Pieter Wuille <pieter.wuille@gmail.com>
This commit is contained in:
committed by
Pieter Wuille
parent
8a10000222
commit
69a5f8be0a
@@ -8,6 +8,7 @@
|
||||
#include "clientversion.h"
|
||||
#include "key.h"
|
||||
#include "merkleblock.h"
|
||||
#include "random.h"
|
||||
#include "serialize.h"
|
||||
#include "streams.h"
|
||||
#include "uint256.h"
|
||||
@@ -459,4 +460,81 @@ BOOST_AUTO_TEST_CASE(merkle_block_4_test_update_none)
|
||||
BOOST_CHECK(!filter.contains(COutPoint(uint256S("0x02981fa052f0481dbc5868f4fc2166035a10f27a03cfd2de67326471df5bc041"), 0)));
|
||||
}
|
||||
|
||||
static std::vector<unsigned char> RandomData()
|
||||
{
|
||||
uint256 r = GetRandHash();
|
||||
return std::vector<unsigned char>(r.begin(), r.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(rolling_bloom)
|
||||
{
|
||||
// last-100-entry, 1% false positive:
|
||||
CRollingBloomFilter rb1(100, 0.01, 0);
|
||||
|
||||
// Overfill:
|
||||
static const int DATASIZE=399;
|
||||
std::vector<unsigned char> data[DATASIZE];
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
data[i] = RandomData();
|
||||
rb1.insert(data[i]);
|
||||
}
|
||||
// Last 100 guaranteed to be remembered:
|
||||
for (int i = 299; i < DATASIZE; i++) {
|
||||
BOOST_CHECK(rb1.contains(data[i]));
|
||||
}
|
||||
|
||||
// false positive rate is 1%, so we should get about 100 hits if
|
||||
// testing 10,000 random keys. We get worst-case false positive
|
||||
// behavior when the filter is as full as possible, which is
|
||||
// when we've inserted one minus an integer multiple of nElement*2.
|
||||
unsigned int nHits = 0;
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
if (rb1.contains(RandomData()))
|
||||
++nHits;
|
||||
}
|
||||
// Run test_bitcoin with --log_level=message to see BOOST_TEST_MESSAGEs:
|
||||
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~100 expected)");
|
||||
|
||||
// Insanely unlikely to get a fp count outside this range:
|
||||
BOOST_CHECK(nHits > 25);
|
||||
BOOST_CHECK(nHits < 175);
|
||||
|
||||
BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
|
||||
rb1.clear();
|
||||
BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
|
||||
|
||||
// Now roll through data, make sure last 100 entries
|
||||
// are always remembered:
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
if (i >= 100)
|
||||
BOOST_CHECK(rb1.contains(data[i-100]));
|
||||
rb1.insert(data[i]);
|
||||
}
|
||||
|
||||
// Insert 999 more random entries:
|
||||
for (int i = 0; i < 999; i++) {
|
||||
rb1.insert(RandomData());
|
||||
}
|
||||
// Sanity check to make sure the filter isn't just filling up:
|
||||
nHits = 0;
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
if (rb1.contains(data[i]))
|
||||
++nHits;
|
||||
}
|
||||
// Expect about 5 false positives, more than 100 means
|
||||
// something is definitely broken.
|
||||
BOOST_TEST_MESSAGE("RollingBloomFilter got " << nHits << " false positives (~5 expected)");
|
||||
BOOST_CHECK(nHits < 100);
|
||||
|
||||
// last-1000-entry, 0.01% false positive:
|
||||
CRollingBloomFilter rb2(1000, 0.001, 0);
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
rb2.insert(data[i]);
|
||||
}
|
||||
// ... room for all of them:
|
||||
for (int i = 0; i < DATASIZE; i++) {
|
||||
BOOST_CHECK(rb2.contains(data[i]));
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END()
|
||||
|
Reference in New Issue
Block a user