diff --git a/envelopes.go b/envelopes.go index 8f401a4..5e6bc75 100644 --- a/envelopes.go +++ b/envelopes.go @@ -195,7 +195,7 @@ func (v CountEnvelope) MarshalJSON() ([]byte, error) { w.RawString(strconv.FormatInt(*v.Count, 10)) if v.HyperLogLog != nil { w.RawString(`,"hll":"`) - hllHex := make([]byte, 0, 512) + hllHex := make([]byte, 512) hex.Encode(hllHex, v.HyperLogLog) w.Buffer.AppendBytes(hllHex) w.RawString(`"`) diff --git a/nip45/hll_event.go b/nip45/hll_event.go new file mode 100644 index 0000000..dc4d01f --- /dev/null +++ b/nip45/hll_event.go @@ -0,0 +1,42 @@ +package nip45 + +import ( + "iter" + "strconv" + + "github.com/nbd-wtf/go-nostr" +) + +func HyperLogLogEventPubkeyOffsetsAndReferencesForEvent(evt *nostr.Event) iter.Seq2[string, int] { + return func(yield func(string, int) bool) { + switch evt.Kind { + case 3: + // + // follower counts + for _, tag := range evt.Tags { + if len(tag) >= 2 && tag[0] == "p" && nostr.IsValid32ByteHex(tag[1]) { + // 32th nibble of each "p" tag + p, _ := strconv.ParseInt(tag[1][32:33], 16, 64) + if !yield(tag[1], int(p+8)) { + return + } + } + } + case 7: + // + // reaction counts: + // (only the last "e" tag counts) + lastE := evt.Tags.GetLast([]string{"e", ""}) + if lastE != nil { + v := (*lastE)[1] + if nostr.IsValid32ByteHex(v) { + // 32th nibble of "e" tag + p, _ := strconv.ParseInt(v[32:33], 16, 64) + if !yield(v, int(p+8)) { + return + } + } + } + } + } +} diff --git a/nip45/hll_filter.go b/nip45/hll_filter.go new file mode 100644 index 0000000..c075b38 --- /dev/null +++ b/nip45/hll_filter.go @@ -0,0 +1,49 @@ +package nip45 + +import ( + "strconv" + + "github.com/nbd-wtf/go-nostr" +) + +// HyperLogLogEventPubkeyOffsetForFilter returns the deterministic pubkey offset that will be used +// when computing hyperloglogs in the context of a specific filter. +// +// It returns -1 when the filter is not eligible for hyperloglog calculation. +func HyperLogLogEventPubkeyOffsetForFilter(filter nostr.Filter) int { + if filter.IDs != nil || filter.Since != nil || filter.Until != nil || filter.Authors != nil || + len(filter.Kinds) != 1 || filter.Search != "" || len(filter.Tags) != 1 { + // obvious cases in which we won't bother to do hyperloglog stuff + return -1 + } + + // only serve the cases explicitly defined by the NIP: + if pTags, ok := filter.Tags["p"]; ok { + // + // follower counts: + if filter.Kinds[0] == 3 && len(pTags) == 1 { + // 32th nibble of "p" tag + p, err := strconv.ParseInt(pTags[0][32:33], 16, 64) + if err != nil { + return -1 + } + return int(p + 8) + } + } else if eTags, ok := filter.Tags["e"]; ok { + if len(eTags) == 1 { + // + // reaction counts: + if filter.Kinds[0] == 7 { + // 32th nibble of "e" tag + p, err := strconv.ParseInt(eTags[0][32:33], 16, 64) + if err != nil { + return -1 + } + return int(p + 8) + } + } + } + + // everything else is false at least for now + return -1 +} diff --git a/nip45/hyperloglog/hll.go b/nip45/hyperloglog/hll.go index 16f98ed..e546b67 100644 --- a/nip45/hyperloglog/hll.go +++ b/nip45/hyperloglog/hll.go @@ -3,21 +3,37 @@ package hyperloglog import ( "encoding/binary" "encoding/hex" + "fmt" ) // Everything is hardcoded to use precision 8, i.e. 256 registers. type HyperLogLog struct { + offset int registers []uint8 } -func New() *HyperLogLog { +func New(offset int) *HyperLogLog { + if offset < 0 || offset > 32-8 { + panic(fmt.Errorf("invalid offset %d", offset)) + } + // precision is always 8 // the number of registers is always 256 (1<<8) - hll := &HyperLogLog{} + hll := &HyperLogLog{offset: offset} hll.registers = make([]uint8, 256) return hll } +func NewWithRegisters(registers []byte, offset int) *HyperLogLog { + if offset < 0 || offset > 32-8 { + panic(fmt.Errorf("invalid offset %d", offset)) + } + if len(registers) != 256 { + panic(fmt.Errorf("invalid number of registers %d", len(registers))) + } + return &HyperLogLog{registers: registers, offset: offset} +} + func (hll *HyperLogLog) GetRegisters() []byte { return hll.registers } func (hll *HyperLogLog) SetRegisters(enc []byte) { hll.registers = enc } func (hll *HyperLogLog) MergeRegisters(other []byte) { @@ -34,8 +50,22 @@ func (hll *HyperLogLog) Clear() { } } -func (hll *HyperLogLog) Add(id string) { - x, _ := hex.DecodeString(id[32 : 32+8*2]) +// Add takes a Nostr event pubkey which will be used as the item "key" (that combined with the offset) +func (hll *HyperLogLog) Add(pubkey string) { + x, _ := hex.DecodeString(pubkey[hll.offset*2 : hll.offset*2+8*2]) + j := x[0] // register address (first 8 bits, i.e. first byte) + + w := binary.BigEndian.Uint64(x) // number that we will use + zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits) + + if zeroBits > hll.registers[j] { + hll.registers[j] = zeroBits + } +} + +// AddBytes is like Add, but takes pubkey as bytes instead of as string +func (hll *HyperLogLog) AddBytes(pubkey []byte) { + x := pubkey[hll.offset : hll.offset+8] j := x[0] // register address (first 8 bits, i.e. first byte) w := binary.BigEndian.Uint64(x) // number that we will use diff --git a/pool.go b/pool.go index 6222a16..65e70c8 100644 --- a/pool.go +++ b/pool.go @@ -476,7 +476,7 @@ func (pool *SimplePool) CountMany( filter Filter, opts []SubscriptionOption, ) int { - hll := hyperloglog.New() + hll := hyperloglog.New(0) // offset is irrelevant here, so we just pass 0 wg := sync.WaitGroup{} wg.Add(len(urls))