diff --git a/go.mod b/go.mod index afc7fd4..2917985 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.18 require ( github.com/PuerkitoBio/goquery v1.8.0 github.com/cockroachdb/pebble v0.0.0-20220723153705-3fc374e4dc66 - github.com/elastic/go-elasticsearch v0.0.0 github.com/elastic/go-elasticsearch/v8 v8.6.0 github.com/gorilla/mux v1.8.0 github.com/gorilla/websocket v1.4.2 @@ -83,3 +82,5 @@ require ( golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 // indirect golang.org/x/text v0.3.7 // indirect ) + +replace github.com/nbd-wtf/go-nostr => /Users/steve/opc/go-nostr diff --git a/go.sum b/go.sum index 262025e..e3f3e48 100644 --- a/go.sum +++ b/go.sum @@ -125,8 +125,6 @@ github.com/dvyukov/go-fuzz v0.0.0-20210602112143-b1f3d6f4ef4e h1:qTP1telKJHlToHl github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/elastic/elastic-transport-go/v8 v8.0.0-20211216131617-bbee439d559c h1:onA2RpIyeCPvYAj1LFYiiMTrSpqVINWMfYFRS7lofJs= github.com/elastic/elastic-transport-go/v8 v8.0.0-20211216131617-bbee439d559c/go.mod h1:87Tcz8IVNe6rVSLdBux1o/PEItLtyabHU3naC7IoqKI= -github.com/elastic/go-elasticsearch v0.0.0 h1:Pd5fqOuBxKxv83b0+xOAJDAkziWYwFinWnBO0y+TZaA= -github.com/elastic/go-elasticsearch v0.0.0/go.mod h1:TkBSJBuTyFdBnrNqoPc54FN0vKf5c04IdM4zuStJ7xg= github.com/elastic/go-elasticsearch/v8 v8.6.0 h1:xMaSe8jIh7NHzmNo9YBkewmaD2Pr+tX+zLkXxhieny4= github.com/elastic/go-elasticsearch/v8 v8.6.0/go.mod h1:Usvydt+x0dv9a1TzEUaovqbJor8rmOHy5dSmPeMAE2k= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -326,8 +324,6 @@ github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOA github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM= github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= -github.com/nbd-wtf/go-nostr v0.12.0 h1:6uo6D6jhcNzrzm6Fi8nA3jfZQqoXbeTWi9dIX5MsgZc= -github.com/nbd-wtf/go-nostr v0.12.0/go.mod h1:qFFTIxh15H5GGN0WsBI/P73DteqsevnhSEW/yk8nEf4= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nwaples/rardecode v1.1.2 h1:Cj0yZY6T1Zx1R7AhTbyGSALm44/Mmq+BAPc4B/p/d3M= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= diff --git a/search/README.md b/search/README.md index f3ed003..e42ffb6 100644 --- a/search/README.md +++ b/search/README.md @@ -5,9 +5,16 @@ bzip2 -cd nostr-wellorder-early-1m-v1.jsonl.bz2 | \ websocat -n -B 200000 ws://127.0.0.1:7447 ``` +``` +echo '["REQ", "asdf", {"search": "steve", "kinds": [0]}]' | websocat -n ws://127.0.0.1:7447 +``` + + todo: * index `content_search` field * support search queries * some kind of ranking signal (based on pubkey) * better config for ES: adjust bulk indexer settings, use custom mapping? + +* ES DSL string builder might not escape json strings correctly... diff --git a/search/main.go b/search/main.go index 16658ca..1688e3e 100644 --- a/search/main.go +++ b/search/main.go @@ -1,7 +1,6 @@ package main import ( - "encoding/json" "fmt" "log" @@ -36,10 +35,10 @@ func (r *Relay) Init() error { func (r *Relay) AcceptEvent(evt *nostr.Event) bool { // block events that are too large - jsonb, _ := json.Marshal(evt) - if len(jsonb) > 100000 { - return false - } + // jsonb, _ := json.Marshal(evt) + // if len(jsonb) > 100000 { + // return false + // } return true } diff --git a/storage/elasticsearch/elasticsearch.go b/storage/elasticsearch/elasticsearch.go index b443335..9953603 100644 --- a/storage/elasticsearch/elasticsearch.go +++ b/storage/elasticsearch/elasticsearch.go @@ -15,6 +15,11 @@ import ( "github.com/nbd-wtf/go-nostr" ) +type IndexedEvent struct { + Event nostr.Event `json:"event"` + ContentSearch string `json:"content_search"` +} + var indexMapping = ` { "settings": { @@ -24,11 +29,17 @@ var indexMapping = ` "mappings": { "dynamic": false, "properties": { - "id": {"type": "keyword"}, - "pubkey": {"type": "keyword"}, - "kind": {"type": "integer"}, - "tags": {"type": "keyword"}, - "created_at": {"type": "date"} + "event": { + "dynamic": false, + "properties": { + "id": {"type": "keyword"}, + "pubkey": {"type": "keyword"}, + "kind": {"type": "integer"}, + "tags": {"type": "keyword"}, + "created_at": {"type": "date"} + } + }, + "content_search": {"type": "text"} } } } @@ -49,7 +60,7 @@ func (ess *ElasticsearchStorage) Init() error { // log.Println(es.Info()) // todo: config - ess.indexName = "test" + ess.indexName = "test3" // todo: don't delete index every time // es.Indices.Delete([]string{ess.indexName}) @@ -123,7 +134,17 @@ func (ess *ElasticsearchStorage) DeleteEvent(id string, pubkey string) error { } func (ess *ElasticsearchStorage) SaveEvent(event *nostr.Event) error { - data, err := json.Marshal(event) + ie := &IndexedEvent{ + Event: *event, + } + + // post processing: index for FTS + // this could also possibly do custom indexing for kind=0. + if event.Kind != 4 { + ie.ContentSearch = event.Content + } + + data, err := json.Marshal(ie) if err != nil { return err } diff --git a/storage/elasticsearch/query.go b/storage/elasticsearch/query.go index 6b3bd59..03f99db 100644 --- a/storage/elasticsearch/query.go +++ b/storage/elasticsearch/query.go @@ -23,7 +23,7 @@ type EsSearchResult struct { Relation string } Hits []struct { - Source nostr.Event `json:"_source"` + Source IndexedEvent `json:"_source"` } } } @@ -42,7 +42,7 @@ func buildDsl(filter *nostr.Filter) string { if len(val) < 64 { op = "prefix" } - b.WriteString(fmt.Sprintf(`{"%s": {"%s": %q}}`, op, fieldName, val)) + b.WriteString(fmt.Sprintf(`{"%s": {"event.%s": %q}}`, op, fieldName, val)) } b.WriteString(`]}},`) } @@ -56,7 +56,7 @@ func buildDsl(filter *nostr.Filter) string { // kinds if len(filter.Kinds) > 0 { k, _ := json.Marshal(filter.Kinds) - b.WriteString(fmt.Sprintf(`{"terms": {"kind": %s}},`, k)) + b.WriteString(fmt.Sprintf(`{"terms": {"event.kind": %s}},`, k)) } // tags @@ -73,10 +73,10 @@ func buildDsl(filter *nostr.Filter) string { commaIdx++ b.WriteString(`{"bool": {"must": [`) for _, t := range terms { - b.WriteString(fmt.Sprintf(`{"term": {"tags": %q}},`, t)) + b.WriteString(fmt.Sprintf(`{"term": {"event.tags": %q}},`, t)) } // add the tag type at the end - b.WriteString(fmt.Sprintf(`{"term": {"tags": %q}}`, char)) + b.WriteString(fmt.Sprintf(`{"term": {"event.tags": %q}}`, char)) b.WriteString(`]}}`) } b.WriteString(`]}},`) @@ -84,12 +84,17 @@ func buildDsl(filter *nostr.Filter) string { // since if filter.Since != nil { - b.WriteString(fmt.Sprintf(`{"range": {"created_at": {"gt": %d}}},`, filter.Since.Unix())) + b.WriteString(fmt.Sprintf(`{"range": {"event.created_at": {"gt": %d}}},`, filter.Since.Unix())) } // until if filter.Until != nil { - b.WriteString(fmt.Sprintf(`{"range": {"created_at": {"lt": %d}}},`, filter.Until.Unix())) + b.WriteString(fmt.Sprintf(`{"range": {"event.created_at": {"lt": %d}}},`, filter.Until.Unix())) + } + + // search + if filter.Search != "" { + b.WriteString(fmt.Sprintf(`{"match": {"content_search": {"query": %s}}},`, filter.Search)) } // all blocks have a trailing comma... @@ -111,7 +116,7 @@ func (ess *ElasticsearchStorage) getByID(filter *nostr.Filter) ([]nostr.Event, e var mgetResponse struct { Docs []struct { Found bool - Source nostr.Event `json:"_source"` + Source IndexedEvent `json:"_source"` } } if err := json.NewDecoder(got.Body).Decode(&mgetResponse); err != nil { @@ -121,7 +126,7 @@ func (ess *ElasticsearchStorage) getByID(filter *nostr.Filter) ([]nostr.Event, e events := make([]nostr.Event, 0, len(mgetResponse.Docs)) for _, e := range mgetResponse.Docs { if e.Found { - events = append(events, e.Source) + events = append(events, e.Source.Event) } } @@ -155,7 +160,7 @@ func (ess *ElasticsearchStorage) QueryEvents(filter *nostr.Filter) ([]nostr.Even es.Search.WithBody(strings.NewReader(dsl)), es.Search.WithSize(limit), - es.Search.WithSort("created_at:desc"), + es.Search.WithSort("event.created_at:desc"), // es.Search.WithTrackTotalHits(true), // es.Search.WithPretty(), @@ -178,7 +183,7 @@ func (ess *ElasticsearchStorage) QueryEvents(filter *nostr.Filter) ([]nostr.Even events := make([]nostr.Event, len(r.Hits.Hits)) for i, e := range r.Hits.Hits { - events[i] = e.Source + events[i] = e.Source.Event } return events, nil