mirror of
https://github.com/nbd-wtf/go-nostr.git
synced 2025-09-17 19:13:56 +02:00
pool: support CountMany() using hyperloglog.
This commit is contained in:
30
nip45/hyperloglog/helpers.go
Normal file
30
nip45/hyperloglog/helpers.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
const two32 = 1 << 32
|
||||
|
||||
func linearCounting(m uint32, v uint32) float64 {
|
||||
fm := float64(m)
|
||||
return fm * math.Log(fm/float64(v))
|
||||
}
|
||||
|
||||
func clz56(x uint64) uint8 {
|
||||
var c uint8
|
||||
for m := uint64(1 << 55); m&x == 0 && m != 0; m >>= 1 {
|
||||
c++
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func countZeros(s []uint8) uint32 {
|
||||
var c uint32
|
||||
for _, v := range s {
|
||||
if v == 0 {
|
||||
c++
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
85
nip45/hyperloglog/hll.go
Normal file
85
nip45/hyperloglog/hll.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
)
|
||||
|
||||
// Everything is hardcoded to use precision 8, i.e. 256 registers.
|
||||
type HyperLogLog struct {
|
||||
registers []uint8
|
||||
}
|
||||
|
||||
func New() *HyperLogLog {
|
||||
// precision is always 8
|
||||
// the number of registers is always 256 (1<<8)
|
||||
hll := &HyperLogLog{}
|
||||
hll.registers = make([]uint8, 256)
|
||||
return hll
|
||||
}
|
||||
|
||||
func (hll *HyperLogLog) GetRegisters() []byte { return hll.registers }
|
||||
func (hll *HyperLogLog) SetRegisters(enc []byte) { hll.registers = enc }
|
||||
func (hll *HyperLogLog) MergeRegisters(other []byte) {
|
||||
for i, v := range other {
|
||||
if v > hll.registers[i] {
|
||||
hll.registers[i] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (hll *HyperLogLog) Clear() {
|
||||
for i := range hll.registers {
|
||||
hll.registers[i] = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (hll *HyperLogLog) Add(id string) {
|
||||
x, _ := hex.DecodeString(id[32 : 32+8*2])
|
||||
j := x[0] // register address (first 8 bits, i.e. first byte)
|
||||
|
||||
w := binary.BigEndian.Uint64(x) // number that we will use
|
||||
zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
|
||||
|
||||
if zeroBits > hll.registers[j] {
|
||||
hll.registers[j] = zeroBits
|
||||
}
|
||||
}
|
||||
|
||||
func (hll *HyperLogLog) Merge(other *HyperLogLog) {
|
||||
for i, v := range other.registers {
|
||||
if v > hll.registers[i] {
|
||||
hll.registers[i] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (hll *HyperLogLog) Count() uint64 {
|
||||
v := countZeros(hll.registers)
|
||||
|
||||
if v != 0 {
|
||||
lc := linearCounting(256 /* nregisters */, v)
|
||||
|
||||
if lc <= 220 /* threshold */ {
|
||||
return uint64(lc)
|
||||
}
|
||||
}
|
||||
|
||||
est := hll.calculateEstimate()
|
||||
if est <= 256 /* nregisters */ *3 {
|
||||
if v != 0 {
|
||||
return uint64(linearCounting(256 /* nregisters */, v))
|
||||
}
|
||||
}
|
||||
|
||||
return uint64(est)
|
||||
}
|
||||
|
||||
func (hll HyperLogLog) calculateEstimate() float64 {
|
||||
sum := 0.0
|
||||
for _, val := range hll.registers {
|
||||
sum += 1.0 / float64(uint64(1)<<val) // this is the same as 2^(-val)
|
||||
}
|
||||
|
||||
return 0.7182725932495458 /* alpha for 256 registers */ * 256 /* nregisters */ * 256 /* nregisters */ / sum
|
||||
}
|
130
nip45/hyperloglog/hll_test.go
Normal file
130
nip45/hyperloglog/hll_test.go
Normal file
@@ -0,0 +1,130 @@
|
||||
package hyperloglog
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"math/rand/v2"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestHyperLogLogBasic(t *testing.T) {
|
||||
rand := rand.New(rand.NewPCG(1, 0))
|
||||
|
||||
for _, count := range []int{
|
||||
2, 4, 6, 7, 12, 15, 22, 36, 44, 47,
|
||||
64, 77, 89, 95, 104, 116, 122, 144,
|
||||
150, 199, 300, 350, 400, 500, 600,
|
||||
777, 922, 1000, 1500, 2222, 9999,
|
||||
13600, 80000, 133333, 200000,
|
||||
} {
|
||||
hll := New()
|
||||
|
||||
for range count {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hll.Add(id)
|
||||
}
|
||||
|
||||
c := hll.Count()
|
||||
res100 := int(c * 100)
|
||||
require.Greater(t, res100, count*85, "result too low (actual %d < %d)", c, count)
|
||||
require.Less(t, res100, count*115, "result too high (actual %d > %d)", c, count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHyperLogLogMerge(t *testing.T) {
|
||||
rand := rand.New(rand.NewPCG(2, 0))
|
||||
|
||||
for _, count := range []int{
|
||||
2, 4, 6, 7, 12, 15, 22, 36, 44, 47,
|
||||
64, 77, 89, 95, 104, 116, 122, 144,
|
||||
150, 199, 300, 350, 400, 500, 600,
|
||||
777, 922, 1000, 1500, 2222, 9999,
|
||||
13600, 80000, 133333, 200000,
|
||||
} {
|
||||
hllA := New()
|
||||
hllB := New()
|
||||
|
||||
for range count / 2 {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hllA.Add(id)
|
||||
}
|
||||
for range count / 2 {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hllB.Add(id)
|
||||
}
|
||||
|
||||
hll := New()
|
||||
hll.Merge(hllA)
|
||||
hll.Merge(hllB)
|
||||
|
||||
res100 := int(hll.Count() * 100)
|
||||
require.Greater(t, res100, count*85, "result too low (actual %d < %d)", hll.Count(), count)
|
||||
require.Less(t, res100, count*115, "result too high (actual %d > %d)", hll.Count(), count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHyperLogLogMergeComplex(t *testing.T) {
|
||||
rand := rand.New(rand.NewPCG(4, 0))
|
||||
|
||||
for _, count := range []int{
|
||||
3, 6, 9, 12, 15, 22, 36, 46, 57,
|
||||
64, 77, 89, 95, 104, 116, 122, 144,
|
||||
150, 199, 300, 350, 400, 500, 600,
|
||||
777, 922, 1000, 1500, 2222, 9999,
|
||||
13600, 80000, 133333, 200000,
|
||||
} {
|
||||
hllA := New()
|
||||
hllB := New()
|
||||
hllC := New()
|
||||
|
||||
for range count / 3 {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hllA.Add(id)
|
||||
hllC.Add(id)
|
||||
}
|
||||
for range count / 3 {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hllB.Add(id)
|
||||
hllC.Add(id)
|
||||
}
|
||||
for range count / 3 {
|
||||
b := make([]byte, 32)
|
||||
for i := range b {
|
||||
b[i] = uint8(rand.UintN(256))
|
||||
}
|
||||
id := hex.EncodeToString(b)
|
||||
hllC.Add(id)
|
||||
hllA.Add(id)
|
||||
}
|
||||
|
||||
hll := New()
|
||||
hll.Merge(hllA)
|
||||
hll.Merge(hllB)
|
||||
hll.Merge(hllC)
|
||||
|
||||
res100 := int(hll.Count() * 100)
|
||||
require.Greater(t, res100, count*85, "result too low (actual %d < %d)", hll.Count(), count)
|
||||
require.Less(t, res100, count*115, "result too high (actual %d > %d)", hll.Count(), count)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user