Rolling bloom filter class

For when you need to keep track of the last N items
you've seen, and can tolerate some false-positives.

Rebased-by: Pieter Wuille <pieter.wuille@gmail.com>
This commit is contained in:
Gavin Andresen
2015-04-24 13:14:45 -04:00
committed by Pieter Wuille
parent 8a10000222
commit 69a5f8be0a
3 changed files with 173 additions and 16 deletions

View File

@@ -21,22 +21,33 @@
using namespace std;
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn, unsigned char nFlagsIn) :
/**
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
* - nElements * log(fp rate) / ln(2)^2
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
*/
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
/**
* The ideal number of hash functions is filter size * ln(2) / number of elements
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
*/
isFull(false),
isEmpty(false),
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn),
nFlags(nFlagsIn)
/**
* The ideal size for a bloom filter with a given number of elements and false positive rate is:
* - nElements * log(fp rate) / ln(2)^2
* We ignore filter parameters which will create a bloom filter larger than the protocol limits
*/
vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8),
/**
* The ideal number of hash functions is filter size * ln(2) / number of elements
* Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
* See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
*/
isFull(false),
isEmpty(false),
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn),
nFlags(nFlagsIn)
{
}
// Private constructor used by CRollingBloomFilter
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
vData((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)) / 8),
isFull(false),
isEmpty(true),
nHashFuncs((unsigned int)(vData.size() * 8 / nElements * LN2)),
nTweak(nTweakIn),
nFlags(BLOOM_UPDATE_NONE)
{
}
@@ -197,3 +208,43 @@ void CBloomFilter::UpdateEmptyFull()
isFull = full;
isEmpty = empty;
}
CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate, unsigned int nTweak) :
b1(nElements * 2, fpRate, nTweak), b2(nElements * 2, fpRate, nTweak)
{
// Implemented using two bloom filters of 2 * nElements each.
// We fill them up, and clear them, staggered, every nElements
// inserted, so at least one always contains the last nElements
// inserted.
nBloomSize = nElements * 2;
nInsertions = 0;
}
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
{
if (nInsertions == 0) {
b1.clear();
} else if (nInsertions == nBloomSize / 2) {
b2.clear();
}
b1.insert(vKey);
b2.insert(vKey);
if (++nInsertions == nBloomSize) {
nInsertions = 0;
}
}
bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
{
if (nInsertions < nBloomSize / 2) {
return b2.contains(vKey);
}
return b1.contains(vKey);
}
void CRollingBloomFilter::clear()
{
b1.clear();
b2.clear();
nInsertions = 0;
}