follow list fetching test and related changes and fixes.

- make BatchedSubManyEose() use a single duplicate id index and use it for replaceable loaders;
- fixes parsing follow entry from kind:3 events (and others);
- adds a "cause" to most cancelation errors in relay/pool;
- remove the inherent cache from dataloader (we have our own hopefully);
- increase max frame size we can read from any websocket to 2**18 (262k), which gives over 2000 item lists.
This commit is contained in:
fiatjaf
2025-01-17 13:44:50 -03:00
parent adb97d46a7
commit 06a15fdaab
9 changed files with 240 additions and 178 deletions

View File

@@ -2,6 +2,7 @@ package sdk
import (
"context"
"errors"
"fmt"
"strconv"
"sync"
@@ -37,6 +38,7 @@ func (sys *System) createAddressableDataloader(kind int) *dataloader.Loader[stri
},
dataloader.WithBatchCapacity[string, []*nostr.Event](60),
dataloader.WithClearCacheOnBatch[string, []*nostr.Event](),
dataloader.WithCache(&dataloader.NoCache[string, []*nostr.Event]{}),
dataloader.WithWait[string, []*nostr.Event](time.Millisecond*350),
)
}
@@ -45,13 +47,16 @@ func (sys *System) batchLoadAddressableEvents(
kind int,
pubkeys []string,
) []*dataloader.Result[[]*nostr.Event] {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*4)
ctx, cancel := context.WithTimeoutCause(context.Background(), time.Second*6,
errors.New("batch addressable load took too long"),
)
defer cancel()
batchSize := len(pubkeys)
results := make([]*dataloader.Result[[]*nostr.Event], batchSize)
keyPositions := make(map[string]int) // { [pubkey]: slice_index }
relayFilters := make(map[string]nostr.Filter) // { [relayUrl]: filter }
keyPositions := make(map[string]int) // { [pubkey]: slice_index }
relayFilter := make([]nostr.DirectedFilter, 0, max(3, batchSize*2))
relayFilterIndex := make(map[string]int, max(3, batchSize*2))
wg := sync.WaitGroup{}
wg.Add(len(pubkeys))
@@ -91,49 +96,60 @@ func (sys *System) batchLoadAddressableEvents(
cm.Lock()
for _, relay := range relays {
// each relay will have a custom filter
filter, ok := relayFilters[relay]
if !ok {
filter = nostr.Filter{
Kinds: []int{kind},
Authors: make([]string, 0, batchSize-i /* this and all pubkeys after this can be added */),
idx, ok := relayFilterIndex[relay]
var dfilter nostr.DirectedFilter
if ok {
dfilter = relayFilter[idx]
} else {
dfilter = nostr.DirectedFilter{
Relay: relay,
Filter: nostr.Filter{
Kinds: []int{kind},
Authors: make([]string, 0, batchSize-i /* this and all pubkeys after this can be added */),
},
}
idx = len(relayFilter)
relayFilterIndex[relay] = idx
relayFilter = append(relayFilter, dfilter)
}
filter.Authors = append(filter.Authors, pubkey)
relayFilters[relay] = filter
dfilter.Authors = append(dfilter.Authors, pubkey)
relayFilter[idx] = dfilter
}
cm.Unlock()
}(i, pubkey)
}
// query all relays with the prepared filters
// wait for relay batches to be prepared
wg.Wait()
multiSubs := sys.batchAddressableRelayQueries(ctx, relayFilters)
// query all relays with the prepared filters
multiSubs := sys.Pool.BatchedSubManyEose(ctx, relayFilter)
nextEvent:
for {
select {
case evt, more := <-multiSubs:
case ie, more := <-multiSubs:
if !more {
return results
}
// insert this event at the desired position
pos := keyPositions[evt.PubKey] // @unchecked: it must succeed because it must be a key we passed
pos := keyPositions[ie.PubKey] // @unchecked: it must succeed because it must be a key we passed
events := results[pos].Data
if events == nil {
// no events found, so just add this and end
results[pos] = &dataloader.Result[[]*nostr.Event]{Data: []*nostr.Event{evt}}
results[pos] = &dataloader.Result[[]*nostr.Event]{Data: []*nostr.Event{ie.Event}}
continue nextEvent
}
// there are events, so look for a match
d := evt.Tags.GetD()
d := ie.Tags.GetD()
for i, event := range events {
if event.Tags.GetD() == d {
// there is a match
if event.CreatedAt < evt.CreatedAt {
if event.CreatedAt < ie.CreatedAt {
// ...and this one is newer, so replace
events[i] = evt
events[i] = ie.Event
} else {
// ... but this one is older, so ignore
}
@@ -143,42 +159,10 @@ nextEvent:
}
// there is no match, so add to the end
events = append(events, evt)
events = append(events, ie.Event)
results[pos].Data = events
case <-ctx.Done():
return results
}
}
}
// batchAddressableRelayQueries is like batchReplaceableRelayQueries, except it doesn't count results to
// try to exit early.
func (sys *System) batchAddressableRelayQueries(
ctx context.Context,
relayFilters map[string]nostr.Filter,
) <-chan *nostr.Event {
all := make(chan *nostr.Event)
wg := sync.WaitGroup{}
wg.Add(len(relayFilters))
for url, filter := range relayFilters {
go func(url string, filter nostr.Filter) {
defer wg.Done()
n := len(filter.Authors)
ctx, cancel := context.WithTimeout(ctx, time.Millisecond*450+time.Millisecond*50*time.Duration(n))
defer cancel()
for ie := range sys.Pool.SubManyEose(ctx, []string{url}, nostr.Filters{filter}, nostr.WithLabel("addr")) {
all <- ie.Event
}
}(url, filter)
}
go func() {
wg.Wait()
close(all)
}()
return all
}

View File

@@ -48,11 +48,11 @@ func parseProfileRef(tag nostr.Tag) (fw ProfileRef, ok bool) {
if _, err := url.Parse(tag[2]); err == nil {
fw.Relay = nostr.NormalizeURL(tag[2])
}
if len(tag) > 3 {
fw.Petname = strings.TrimSpace(tag[3])
}
return fw, true
}
return fw, false
return fw, true
}

View File

@@ -2,6 +2,7 @@ package sdk
import (
"context"
"errors"
"fmt"
"slices"
"strconv"
@@ -12,6 +13,10 @@ import (
"github.com/nbd-wtf/go-nostr"
)
// this is used as a hack to signal that these replaceable loader queries shouldn't use the full
// context timespan when they're being made from inside determineRelaysToQuery
var contextForSub10002Query = context.WithValue(context.Background(), "", "")
type replaceableIndex int
const (
@@ -49,26 +54,39 @@ func (sys *System) initializeReplaceableDataloaders() {
func (sys *System) createReplaceableDataloader(kind int) *dataloader.Loader[string, *nostr.Event] {
return dataloader.NewBatchedLoader(
func(_ context.Context, pubkeys []string) []*dataloader.Result[*nostr.Event] {
return sys.batchLoadReplaceableEvents(kind, pubkeys)
func(ctx context.Context, pubkeys []string) []*dataloader.Result[*nostr.Event] {
var cancel context.CancelFunc
if ctx == contextForSub10002Query {
ctx, cancel = context.WithTimeoutCause(context.Background(), time.Millisecond*2300,
errors.New("fetching relays in subloader took too long"),
)
} else {
ctx, cancel = context.WithTimeoutCause(context.Background(), time.Second*6,
errors.New("batch replaceable load took too long"),
)
defer cancel()
}
return sys.batchLoadReplaceableEvents(ctx, kind, pubkeys)
},
dataloader.WithBatchCapacity[string, *nostr.Event](60),
dataloader.WithClearCacheOnBatch[string, *nostr.Event](),
dataloader.WithCache(&dataloader.NoCache[string, *nostr.Event]{}),
dataloader.WithWait[string, *nostr.Event](time.Millisecond*350),
)
}
func (sys *System) batchLoadReplaceableEvents(
ctx context.Context,
kind int,
pubkeys []string,
) []*dataloader.Result[*nostr.Event] {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*4)
defer cancel()
batchSize := len(pubkeys)
results := make([]*dataloader.Result[*nostr.Event], batchSize)
keyPositions := make(map[string]int) // { [pubkey]: slice_index }
relayFilters := make(map[string]nostr.Filter) // { [relayUrl]: filter }
keyPositions := make(map[string]int) // { [pubkey]: slice_index }
relayFilter := make([]nostr.DirectedFilter, 0, max(3, batchSize*2))
relayFilterIndex := make(map[string]int, max(3, batchSize*2))
wg := sync.WaitGroup{}
wg.Add(len(pubkeys))
@@ -108,15 +126,24 @@ func (sys *System) batchLoadReplaceableEvents(
cm.Lock()
for _, relay := range relays {
// each relay will have a custom filter
filter, ok := relayFilters[relay]
if !ok {
filter = nostr.Filter{
Kinds: []int{kind},
Authors: make([]string, 0, batchSize-i /* this and all pubkeys after this can be added */),
idx, ok := relayFilterIndex[relay]
var dfilter nostr.DirectedFilter
if ok {
dfilter = relayFilter[idx]
} else {
dfilter = nostr.DirectedFilter{
Relay: relay,
Filter: nostr.Filter{
Kinds: []int{kind},
Authors: make([]string, 0, batchSize-i /* this and all pubkeys after this can be added */),
},
}
idx = len(relayFilter)
relayFilterIndex[relay] = idx
relayFilter = append(relayFilter, dfilter)
}
filter.Authors = append(filter.Authors, pubkey)
relayFilters[relay] = filter
dfilter.Authors = append(dfilter.Authors, pubkey)
relayFilter[idx] = dfilter
}
cm.Unlock()
}(i, pubkey)
@@ -124,18 +151,18 @@ func (sys *System) batchLoadReplaceableEvents(
// query all relays with the prepared filters
wg.Wait()
multiSubs := sys.batchReplaceableRelayQueries(ctx, relayFilters)
multiSubs := sys.Pool.BatchedSubManyEose(ctx, relayFilter, nostr.WithLabel("repl~"+strconv.Itoa(kind)))
for {
select {
case evt, more := <-multiSubs:
case ie, more := <-multiSubs:
if !more {
return results
}
// insert this event at the desired position
pos := keyPositions[evt.PubKey] // @unchecked: it must succeed because it must be a key we passed
if results[pos].Data == nil || results[pos].Data.CreatedAt < evt.CreatedAt {
results[pos] = &dataloader.Result[*nostr.Event]{Data: evt}
pos := keyPositions[ie.PubKey] // @unchecked: it must succeed because it must be a key we passed
if results[pos].Data == nil || results[pos].Data.CreatedAt < ie.CreatedAt {
results[pos] = &dataloader.Result[*nostr.Event]{Data: ie.Event}
}
case <-ctx.Done():
return results
@@ -153,11 +180,13 @@ func (sys *System) determineRelaysToQuery(ctx context.Context, pubkey string, ki
if len(relays) == 0 {
relays = []string{"wss://relay.damus.io", "wss://nos.lol"}
}
} else if kind == 0 || kind == 3 {
// leave room for two hardcoded relays because people are stupid
relays = sys.FetchOutboxRelays(ctx, pubkey, 1)
} else {
relays = sys.FetchOutboxRelays(ctx, pubkey, 3)
if kind == 0 || kind == 3 {
// leave room for two hardcoded relays because people are stupid
relays = sys.FetchOutboxRelays(contextForSub10002Query, pubkey, 1)
} else {
relays = sys.FetchOutboxRelays(contextForSub10002Query, pubkey, 3)
}
}
// use a different set of extra relays depending on the kind
@@ -182,45 +211,3 @@ func (sys *System) determineRelaysToQuery(ctx context.Context, pubkey string, ki
return relays
}
// batchReplaceableRelayQueries subscribes to multiple relays using a different filter for each and returns
// a single channel with all results. it closes on EOSE or when all the expected events were returned.
//
// the number of expected events is given by the number of pubkeys in the .Authors filter field.
// because of that, batchReplaceableRelayQueries is only suitable for querying replaceable events -- and
// care must be taken to not include the same pubkey more than once in the filter .Authors array.
func (sys *System) batchReplaceableRelayQueries(
ctx context.Context,
relayFilters map[string]nostr.Filter,
) <-chan *nostr.Event {
all := make(chan *nostr.Event)
wg := sync.WaitGroup{}
wg.Add(len(relayFilters))
for url, filter := range relayFilters {
go func(url string, filter nostr.Filter) {
defer wg.Done()
n := len(filter.Authors)
ctx, cancel := context.WithTimeout(ctx, time.Millisecond*950+time.Millisecond*50*time.Duration(n))
defer cancel()
received := 0
for ie := range sys.Pool.SubManyEose(ctx, []string{url}, nostr.Filters{filter}, nostr.WithLabel("repl")) {
all <- ie.Event
received++
if received >= n {
// we got all events we asked for, unless the relay is shitty and sent us two from the same
return
}
}
}(url, filter)
}
go func() {
wg.Wait()
close(all)
}()
return all
}

View File

@@ -2,13 +2,15 @@ package sdk
import (
"context"
"fmt"
"strings"
"testing"
"github.com/nbd-wtf/go-nostr"
"github.com/stretchr/testify/require"
)
func TestSystemFiatjaf(t *testing.T) {
func TestMetadataAndEvents(t *testing.T) {
sys := NewSystem()
ctx := context.Background()
@@ -33,3 +35,53 @@ func TestSystemFiatjaf(t *testing.T) {
require.NotEmpty(t, events[meta.PubKey])
require.GreaterOrEqual(t, len(events[meta.PubKey]), 5)
}
func TestFollowListRecursion(t *testing.T) {
sys := NewSystem()
ctx := context.Background()
// fetch initial follow list
followList := sys.FetchFollowList(ctx, "3bf0c63fcb93463407af97a5e5ee64fa883d107ef9e558472c4eb9aaaefa459d")
fmt.Println("~", len(followList.Items))
require.Greater(t, len(followList.Items), 400, "should follow more than 400 accounts")
// fetch metadata and follow lists for each followed account concurrently
type result struct {
pubkey string
followList GenericList[ProfileRef]
metadata ProfileMetadata
}
results := make(chan result)
go func() {
for _, item := range followList.Items {
go func() {
fl := sys.FetchFollowList(ctx, item.Pubkey)
meta := sys.FetchProfileMetadata(ctx, item.Pubkey)
fmt.Println(" ~", item.Pubkey, meta.Name, len(fl.Items))
results <- result{item.Pubkey, fl, meta}
}()
}
}()
// collect results
var validAccounts int
var accountsWithManyFollows int
for i := 0; i < len(followList.Items); i++ {
r := <-results
// skip if metadata has "bot" in name
if strings.Contains(strings.ToLower(r.metadata.Name), "bot") {
continue
}
validAccounts++
if len(r.followList.Items) > 20 {
accountsWithManyFollows++
}
}
// check if at least 90% of non-bot accounts follow more than 20 accounts
ratio := float64(accountsWithManyFollows) / float64(validAccounts)
require.Greater(t, ratio, 0.9, "at least 90%% of accounts should follow more than 20 others (actual: %.2f%%)", ratio*100)
}