nip45: hyperlolog small fixes and improvements. support deterministic offset for hardcoded set of queries.

This commit is contained in:
fiatjaf 2024-12-07 00:12:19 -03:00
parent 0d40b40c9c
commit 529ade9376
5 changed files with 127 additions and 6 deletions

View File

@ -195,7 +195,7 @@ func (v CountEnvelope) MarshalJSON() ([]byte, error) {
w.RawString(strconv.FormatInt(*v.Count, 10))
if v.HyperLogLog != nil {
w.RawString(`,"hll":"`)
hllHex := make([]byte, 0, 512)
hllHex := make([]byte, 512)
hex.Encode(hllHex, v.HyperLogLog)
w.Buffer.AppendBytes(hllHex)
w.RawString(`"`)

42
nip45/hll_event.go Normal file
View File

@ -0,0 +1,42 @@
package nip45
import (
"iter"
"strconv"
"github.com/nbd-wtf/go-nostr"
)
func HyperLogLogEventPubkeyOffsetsAndReferencesForEvent(evt *nostr.Event) iter.Seq2[string, int] {
return func(yield func(string, int) bool) {
switch evt.Kind {
case 3:
//
// follower counts
for _, tag := range evt.Tags {
if len(tag) >= 2 && tag[0] == "p" && nostr.IsValid32ByteHex(tag[1]) {
// 32th nibble of each "p" tag
p, _ := strconv.ParseInt(tag[1][32:33], 16, 64)
if !yield(tag[1], int(p+8)) {
return
}
}
}
case 7:
//
// reaction counts:
// (only the last "e" tag counts)
lastE := evt.Tags.GetLast([]string{"e", ""})
if lastE != nil {
v := (*lastE)[1]
if nostr.IsValid32ByteHex(v) {
// 32th nibble of "e" tag
p, _ := strconv.ParseInt(v[32:33], 16, 64)
if !yield(v, int(p+8)) {
return
}
}
}
}
}
}

49
nip45/hll_filter.go Normal file
View File

@ -0,0 +1,49 @@
package nip45
import (
"strconv"
"github.com/nbd-wtf/go-nostr"
)
// HyperLogLogEventPubkeyOffsetForFilter returns the deterministic pubkey offset that will be used
// when computing hyperloglogs in the context of a specific filter.
//
// It returns -1 when the filter is not eligible for hyperloglog calculation.
func HyperLogLogEventPubkeyOffsetForFilter(filter nostr.Filter) int {
if filter.IDs != nil || filter.Since != nil || filter.Until != nil || filter.Authors != nil ||
len(filter.Kinds) != 1 || filter.Search != "" || len(filter.Tags) != 1 {
// obvious cases in which we won't bother to do hyperloglog stuff
return -1
}
// only serve the cases explicitly defined by the NIP:
if pTags, ok := filter.Tags["p"]; ok {
//
// follower counts:
if filter.Kinds[0] == 3 && len(pTags) == 1 {
// 32th nibble of "p" tag
p, err := strconv.ParseInt(pTags[0][32:33], 16, 64)
if err != nil {
return -1
}
return int(p + 8)
}
} else if eTags, ok := filter.Tags["e"]; ok {
if len(eTags) == 1 {
//
// reaction counts:
if filter.Kinds[0] == 7 {
// 32th nibble of "e" tag
p, err := strconv.ParseInt(eTags[0][32:33], 16, 64)
if err != nil {
return -1
}
return int(p + 8)
}
}
}
// everything else is false at least for now
return -1
}

View File

@ -3,21 +3,37 @@ package hyperloglog
import (
"encoding/binary"
"encoding/hex"
"fmt"
)
// Everything is hardcoded to use precision 8, i.e. 256 registers.
type HyperLogLog struct {
offset int
registers []uint8
}
func New() *HyperLogLog {
func New(offset int) *HyperLogLog {
if offset < 0 || offset > 32-8 {
panic(fmt.Errorf("invalid offset %d", offset))
}
// precision is always 8
// the number of registers is always 256 (1<<8)
hll := &HyperLogLog{}
hll := &HyperLogLog{offset: offset}
hll.registers = make([]uint8, 256)
return hll
}
func NewWithRegisters(registers []byte, offset int) *HyperLogLog {
if offset < 0 || offset > 32-8 {
panic(fmt.Errorf("invalid offset %d", offset))
}
if len(registers) != 256 {
panic(fmt.Errorf("invalid number of registers %d", len(registers)))
}
return &HyperLogLog{registers: registers, offset: offset}
}
func (hll *HyperLogLog) GetRegisters() []byte { return hll.registers }
func (hll *HyperLogLog) SetRegisters(enc []byte) { hll.registers = enc }
func (hll *HyperLogLog) MergeRegisters(other []byte) {
@ -34,8 +50,22 @@ func (hll *HyperLogLog) Clear() {
}
}
func (hll *HyperLogLog) Add(id string) {
x, _ := hex.DecodeString(id[32 : 32+8*2])
// Add takes a Nostr event pubkey which will be used as the item "key" (that combined with the offset)
func (hll *HyperLogLog) Add(pubkey string) {
x, _ := hex.DecodeString(pubkey[hll.offset*2 : hll.offset*2+8*2])
j := x[0] // register address (first 8 bits, i.e. first byte)
w := binary.BigEndian.Uint64(x) // number that we will use
zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
if zeroBits > hll.registers[j] {
hll.registers[j] = zeroBits
}
}
// AddBytes is like Add, but takes pubkey as bytes instead of as string
func (hll *HyperLogLog) AddBytes(pubkey []byte) {
x := pubkey[hll.offset : hll.offset+8]
j := x[0] // register address (first 8 bits, i.e. first byte)
w := binary.BigEndian.Uint64(x) // number that we will use

View File

@ -476,7 +476,7 @@ func (pool *SimplePool) CountMany(
filter Filter,
opts []SubscriptionOption,
) int {
hll := hyperloglog.New()
hll := hyperloglog.New(0) // offset is irrelevant here, so we just pass 0
wg := sync.WaitGroup{}
wg.Add(len(urls))