From 55bfb146285a2ac501da08897650fbb39ca42a82 Mon Sep 17 00:00:00 2001 From: mr0x50 <24775431+mroxso@users.noreply.github.com> Date: Sun, 9 Feb 2025 22:09:41 +0100 Subject: [PATCH] initial code commit --- .gitignore | 1 + compose.yaml | 39 +++++++++ nips/01.md | 177 +++++++++++++++++++++++++++++++++++++++ relay/.dockerignore | 32 +++++++ relay/Dockerfile | 78 +++++++++++++++++ relay/go.mod | 36 ++++++++ relay/go.sum | 72 ++++++++++++++++ relay/main.go | 61 ++++++++++++++ scraper/.dockerignore | 34 ++++++++ scraper/Dockerfile | 48 +++++++++++ scraper/main.py | 72 ++++++++++++++++ scraper/requirements.txt | 1 + 12 files changed, 651 insertions(+) create mode 100644 .gitignore create mode 100644 compose.yaml create mode 100644 nips/01.md create mode 100644 relay/.dockerignore create mode 100644 relay/Dockerfile create mode 100644 relay/go.mod create mode 100644 relay/go.sum create mode 100644 relay/main.go create mode 100644 scraper/.dockerignore create mode 100644 scraper/Dockerfile create mode 100644 scraper/main.py create mode 100644 scraper/requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7de31d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +postgres/ diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..489d157 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,39 @@ +services: + relay: + depends_on: + postgres: + condition: service_healthy + build: + context: ./relay + target: final + ports: + - 3334:3334 + environment: + RELAY_NAME: "Scrapestr Relay" + RELAY_DESCRIPTION: "A scrapestr Nostr relay" + POSTGRES_URL: "postgres://postgres:postgres@postgres/postgres?sslmode=disable" + scraper: + depends_on: + - relay + build: + context: ./scraper + environment: + INPUT_RELAY: "wss://relay.nostr.band" + OUTPUT_RELAY: "ws://relay:3334" + KINDS: "[0,1,2,3,20]" + postgres: + image: postgres:17 + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + volumes: + - ./postgres:/var/lib/postgresql/data + ports: + - 5432:5432 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + start_period: 10s \ No newline at end of file diff --git a/nips/01.md b/nips/01.md new file mode 100644 index 0000000..e2b4ad4 --- /dev/null +++ b/nips/01.md @@ -0,0 +1,177 @@ +NIP-01 +====== + +Basic protocol flow description +------------------------------- + +`draft` `mandatory` + +This NIP defines the basic protocol that should be implemented by everybody. New NIPs may add new optional (or mandatory) fields and messages and features to the structures and flows described here. + +## Events and signatures + +Each user has a keypair. Signatures, public key, and encodings are done according to the [Schnorr signatures standard for the curve `secp256k1`](https://bips.xyz/340). + +The only object type that exists is the `event`, which has the following format on the wire: + +```jsonc +{ + "id": <32-bytes lowercase hex-encoded sha256 of the serialized event data>, + "pubkey": <32-bytes lowercase hex-encoded public key of the event creator>, + "created_at": , + "kind": , + "tags": [ + [...], + // ... + ], + "content": , + "sig": <64-bytes lowercase hex of the signature of the sha256 hash of the serialized event data, which is the same as the "id" field> +} +``` + +To obtain the `event.id`, we `sha256` the serialized event. The serialization is done over the UTF-8 JSON-serialized string (which is described below) of the following structure: + +``` +[ + 0, + , + , + , + , + +] +``` + +To prevent implementation differences from creating a different event ID for the same event, the following rules MUST be followed while serializing: +- UTF-8 should be used for encoding. +- Whitespace, line breaks or other unnecessary formatting should not be included in the output JSON. +- The following characters in the content field must be escaped as shown, and all other characters must be included verbatim: + - A line break (`0x0A`), use `\n` + - A double quote (`0x22`), use `\"` + - A backslash (`0x5C`), use `\\` + - A carriage return (`0x0D`), use `\r` + - A tab character (`0x09`), use `\t` + - A backspace, (`0x08`), use `\b` + - A form feed, (`0x0C`), use `\f` + +### Tags + +Each tag is an array of one or more strings, with some conventions around them. Take a look at the example below: + +```jsonc +{ + "tags": [ + ["e", "5c83da77af1dec6d7289834998ad7aafbd9e2191396d75ec3cc27f5a77226f36", "wss://nostr.example.com"], + ["p", "f7234bd4c1394dda46d09f35bd384dd30cc552ad5541990f98844fb06676e9ca"], + ["a", "30023:f7234bd4c1394dda46d09f35bd384dd30cc552ad5541990f98844fb06676e9ca:abcd", "wss://nostr.example.com"], + ["alt", "reply"], + // ... + ], + // ... +} +``` + +The first element of the tag array is referred to as the tag _name_ or _key_ and the second as the tag _value_. So we can safely say that the event above has an `e` tag set to `"5c83da77af1dec6d7289834998ad7aafbd9e2191396d75ec3cc27f5a77226f36"`, an `alt` tag set to `"reply"` and so on. All elements after the second do not have a conventional name. + +This NIP defines 3 standard tags that can be used across all event kinds with the same meaning. They are as follows: + +- The `e` tag, used to refer to an event: `["e", <32-bytes lowercase hex of the id of another event>, , <32-bytes lowercase hex of the author's pubkey, optional>]` +- The `p` tag, used to refer to another user: `["p", <32-bytes lowercase hex of a pubkey>, ]` +- The `a` tag, used to refer to an addressable or replaceable event + - for an addressable event: `["a", :<32-bytes lowercase hex of a pubkey>:, ]` + - for a normal replaceable event: `["a", :<32-bytes lowercase hex of a pubkey>:, ]` + +As a convention, all single-letter (only english alphabet letters: a-z, A-Z) key tags are expected to be indexed by relays, such that it is possible, for example, to query or subscribe to events that reference the event `"5c83da77af1dec6d7289834998ad7aafbd9e2191396d75ec3cc27f5a77226f36"` by using the `{"#e": ["5c83da77af1dec6d7289834998ad7aafbd9e2191396d75ec3cc27f5a77226f36"]}` filter. Only the first value in any given tag is indexed. + +### Kinds + +Kinds specify how clients should interpret the meaning of each event and the other fields of each event (e.g. an `"r"` tag may have a meaning in an event of kind 1 and an entirely different meaning in an event of kind 10002). Each NIP may define the meaning of a set of kinds that weren't defined elsewhere. [NIP-10](10.md), for instance, especifies the `kind:1` text note for social media applications. + +This NIP defines one basic kind: + +- `0`: **user metadata**: the `content` is set to a stringified JSON object `{name: , about: , picture: }` describing the user who created the event. [Extra metadata fields](24.md#kind-0) may be set. A relay may delete older events once it gets a new one for the same pubkey. + +And also a convention for kind ranges that allow for easier experimentation and flexibility of relay implementation: + +- for kind `n` such that `1000 <= n < 10000 || 4 <= n < 45 || n == 1 || n == 2`, events are **regular**, which means they're all expected to be stored by relays. +- for kind `n` such that `10000 <= n < 20000 || n == 0 || n == 3`, events are **replaceable**, which means that, for each combination of `pubkey` and `kind`, only the latest event MUST be stored by relays, older versions MAY be discarded. +- for kind `n` such that `20000 <= n < 30000`, events are **ephemeral**, which means they are not expected to be stored by relays. +- for kind `n` such that `30000 <= n < 40000`, events are **addressable** by their `kind`, `pubkey` and `d` tag value -- which means that, for each combination of `kind`, `pubkey` and the `d` tag value, only the latest event MUST be stored by relays, older versions MAY be discarded. + +In case of replaceable events with the same timestamp, the event with the lowest id (first in lexical order) should be retained, and the other discarded. + +When answering to `REQ` messages for replaceable events such as `{"kinds":[0],"authors":[]}`, even if the relay has more than one version stored, it SHOULD return just the latest one. + +These are just conventions and relay implementations may differ. + +## Communication between clients and relays + +Relays expose a websocket endpoint to which clients can connect. Clients SHOULD open a single websocket connection to each relay and use it for all their subscriptions. Relays MAY limit number of connections from specific IP/client/etc. + +### From client to relay: sending events and creating subscriptions + +Clients can send 3 types of messages, which must be JSON arrays, according to the following patterns: + + * `["EVENT", ]`, used to publish events. + * `["REQ", , , , ...]`, used to request events and subscribe to new updates. + * `["CLOSE", ]`, used to stop previous subscriptions. + +`` is an arbitrary, non-empty string of max length 64 chars. It represents a subscription per connection. Relays MUST manage ``s independently for each WebSocket connection. ``s are not guaranteed to be globally unique. + +`` is a JSON object that determines what events will be sent in that subscription, it can have the following attributes: + +```json +{ + "ids": , + "authors": , + "kinds": , + "#": , + "since": = to this to pass>, + "until": , + "limit": +} +``` + +Upon receiving a `REQ` message, the relay SHOULD return events that match the filter. Any new events it receives SHOULD be sent to that same websocket until the connection is closed, a `CLOSE` event is received with the same ``, or a new `REQ` is sent using the same `` (in which case a new subscription is created, replacing the old one). + +Filter attributes containing lists (`ids`, `authors`, `kinds` and tag filters like `#e`) are JSON arrays with one or more values. At least one of the arrays' values must match the relevant field in an event for the condition to be considered a match. For scalar event attributes such as `authors` and `kind`, the attribute from the event must be contained in the filter list. In the case of tag attributes such as `#e`, for which an event may have multiple values, the event and filter condition values must have at least one item in common. + +The `ids`, `authors`, `#e` and `#p` filter lists MUST contain exact 64-character lowercase hex values. + +The `since` and `until` properties can be used to specify the time range of events returned in the subscription. If a filter includes the `since` property, events with `created_at` greater than or equal to `since` are considered to match the filter. The `until` property is similar except that `created_at` must be less than or equal to `until`. In short, an event matches a filter if `since <= created_at <= until` holds. + +All conditions of a filter that are specified must match for an event for it to pass the filter, i.e., multiple conditions are interpreted as `&&` conditions. + +A `REQ` message may contain multiple filters. In this case, events that match any of the filters are to be returned, i.e., multiple filters are to be interpreted as `||` conditions. + +The `limit` property of a filter is only valid for the initial query and MUST be ignored afterwards. When `limit: n` is present it is assumed that the events returned in the initial query will be the last `n` events ordered by the `created_at`. Newer events should appear first, and in the case of ties the event with the lowest id (first in lexical order) should be first. It is safe to return less events than `limit` specifies, but it is expected that relays do not return (much) more events than requested so clients don't get unnecessarily overwhelmed by data. + +### From relay to client: sending events and notices + +Relays can send 5 types of messages, which must also be JSON arrays, according to the following patterns: + + * `["EVENT", , ]`, used to send events requested by clients. + * `["OK", , , ]`, used to indicate acceptance or denial of an `EVENT` message. + * `["EOSE", ]`, used to indicate the _end of stored events_ and the beginning of events newly received in real-time. + * `["CLOSED", , ]`, used to indicate that a subscription was ended on the server side. + * `["NOTICE", ]`, used to send human-readable error messages or other things to clients. + +This NIP defines no rules for how `NOTICE` messages should be sent or treated. + +- `EVENT` messages MUST be sent only with a subscription ID related to a subscription previously initiated by the client (using the `REQ` message above). +- `OK` messages MUST be sent in response to `EVENT` messages received from clients, they must have the 3rd parameter set to `true` when an event has been accepted by the relay, `false` otherwise. The 4th parameter MUST always be present, but MAY be an empty string when the 3rd is `true`, otherwise it MUST be a string formed by a machine-readable single-word prefix followed by a `:` and then a human-readable message. Some examples: + * `["OK", "b1a649ebe8...", true, ""]` + * `["OK", "b1a649ebe8...", true, "pow: difficulty 25>=24"]` + * `["OK", "b1a649ebe8...", true, "duplicate: already have this event"]` + * `["OK", "b1a649ebe8...", false, "blocked: you are banned from posting here"]` + * `["OK", "b1a649ebe8...", false, "blocked: please register your pubkey at https://my-expensive-relay.example.com"]` + * `["OK", "b1a649ebe8...", false, "rate-limited: slow down there chief"]` + * `["OK", "b1a649ebe8...", false, "invalid: event creation date is too far off from the current time"]` + * `["OK", "b1a649ebe8...", false, "pow: difficulty 26 is less than 30"]` + * `["OK", "b1a649ebe8...", false, "restricted: not allowed to write."]` + * `["OK", "b1a649ebe8...", false, "error: could not connect to the database"]` +- `CLOSED` messages MUST be sent in response to a `REQ` when the relay refuses to fulfill it. It can also be sent when a relay decides to kill a subscription on its side before a client has disconnected or sent a `CLOSE`. This message uses the same pattern of `OK` messages with the machine-readable prefix and human-readable message. Some examples: + * `["CLOSED", "sub1", "unsupported: filter contains unknown elements"]` + * `["CLOSED", "sub1", "error: could not connect to the database"]` + * `["CLOSED", "sub1", "error: shutting down idle subscription"]` +- The standardized machine-readable prefixes for `OK` and `CLOSED` are: `duplicate`, `pow`, `blocked`, `rate-limited`, `invalid`, `restricted`, and `error` for when none of that fits. diff --git a/relay/.dockerignore b/relay/.dockerignore new file mode 100644 index 0000000..9e03c48 --- /dev/null +++ b/relay/.dockerignore @@ -0,0 +1,32 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose.y*ml +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/relay/Dockerfile b/relay/Dockerfile new file mode 100644 index 0000000..0c19d6f --- /dev/null +++ b/relay/Dockerfile @@ -0,0 +1,78 @@ +# syntax=docker/dockerfile:1 + +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Dockerfile reference guide at +# https://docs.docker.com/go/dockerfile-reference/ + +# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7 + +################################################################################ +# Create a stage for building the application. +ARG GO_VERSION=1.23.4 +FROM --platform=$BUILDPLATFORM golang:${GO_VERSION} AS build +WORKDIR /src + +# Download dependencies as a separate step to take advantage of Docker's caching. +# Leverage a cache mount to /go/pkg/mod/ to speed up subsequent builds. +# Leverage bind mounts to go.sum and go.mod to avoid having to copy them into +# the container. +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=bind,source=go.sum,target=go.sum \ + --mount=type=bind,source=go.mod,target=go.mod \ + go mod download -x + +# This is the architecture you're building for, which is passed in by the builder. +# Placing it here allows the previous steps to be cached across architectures. +ARG TARGETARCH + +# Build the application. +# Leverage a cache mount to /go/pkg/mod/ to speed up subsequent builds. +# Leverage a bind mount to the current directory to avoid having to copy the +# source code into the container. +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=bind,target=. \ + CGO_ENABLED=0 GOARCH=$TARGETARCH go build -o /bin/server . + +################################################################################ +# Create a new stage for running the application that contains the minimal +# runtime dependencies for the application. This often uses a different base +# image from the build stage where the necessary files are copied from the build +# stage. +# +# The example below uses the alpine image as the foundation for running the app. +# By specifying the "latest" tag, it will also use whatever happens to be the +# most recent version of that image when you build your Dockerfile. If +# reproducibility is important, consider using a versioned tag +# (e.g., alpine:3.17.2) or SHA (e.g., alpine@sha256:c41ab5c992deb4fe7e5da09f67a8804a46bd0592bfdf0b1847dde0e0889d2bff). +FROM alpine:latest AS final + +# Install any runtime dependencies that are needed to run your application. +# Leverage a cache mount to /var/cache/apk/ to speed up subsequent builds. +RUN --mount=type=cache,target=/var/cache/apk \ + apk --update add \ + ca-certificates \ + tzdata \ + && \ + update-ca-certificates + +# Create a non-privileged user that the app will run under. +# See https://docs.docker.com/go/dockerfile-user-best-practices/ +ARG UID=10001 +RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + appuser +USER appuser + +# Copy the executable from the "build" stage. +COPY --from=build /bin/server /bin/ + +# Expose the port that the application listens on. +EXPOSE 3334 + +# What the container should run when it is started. +ENTRYPOINT [ "/bin/server" ] diff --git a/relay/go.mod b/relay/go.mod new file mode 100644 index 0000000..880fa3c --- /dev/null +++ b/relay/go.mod @@ -0,0 +1,36 @@ +module git.v0l.io/highperfocused/scrapestr/relay + +go 1.23.4 + +require ( + fiatjaf.com/lib v0.2.0 // indirect + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/bep/debounce v1.2.1 // indirect + github.com/btcsuite/btcd/btcec/v2 v2.3.4 // indirect + github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 // indirect + github.com/coder/websocket v1.8.12 // indirect + github.com/decred/dcrd/crypto/blake256 v1.1.0 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect + github.com/fasthttp/websocket v1.5.7 // indirect + github.com/fiatjaf/eventstore v0.16.0 + github.com/fiatjaf/khatru v0.15.2 + github.com/jmoiron/sqlx v1.4.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/lib/pq v1.10.9 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/nbd-wtf/go-nostr v0.46.0 // indirect + github.com/puzpuzpuz/xsync/v3 v3.4.0 // indirect + github.com/rs/cors v1.11.1 // indirect + github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d // indirect + golang.org/x/net v0.32.0 // indirect +) diff --git a/relay/go.sum b/relay/go.sum new file mode 100644 index 0000000..5eb56b2 --- /dev/null +++ b/relay/go.sum @@ -0,0 +1,72 @@ +fiatjaf.com/lib v0.2.0 h1:TgIJESbbND6GjOgGHxF5jsO6EMjuAxIzZHPo5DXYexs= +fiatjaf.com/lib v0.2.0/go.mod h1:Ycqq3+mJ9jAWu7XjbQI1cVr+OFgnHn79dQR5oTII47g= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/bep/debounce v1.2.1 h1:v67fRdBA9UQu2NhLFXrSg0Brw7CexQekrBwDMM8bzeY= +github.com/bep/debounce v1.2.1/go.mod h1:H8yggRPQKLUhUoqrJC1bO2xNya7vanpDl7xR3ISbCJ0= +github.com/btcsuite/btcd/btcec/v2 v2.3.4 h1:3EJjcN70HCu/mwqlUsGK8GcNVyLVxFDlWurTXGPFfiQ= +github.com/btcsuite/btcd/btcec/v2 v2.3.4/go.mod h1:zYzJ8etWJQIv1Ogk7OzpWjowwOdXY1W/17j2MW85J04= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 h1:59Kx4K6lzOW5w6nFlA0v5+lk/6sjybR934QNHSJZPTQ= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= +github.com/coder/websocket v1.8.12 h1:5bUXkEPPIbewrnkU8LTCLVaxi4N4J8ahufH2vlo4NAo= +github.com/coder/websocket v1.8.12/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/decred/dcrd/crypto/blake256 v1.1.0 h1:zPMNGQCm0g4QTY27fOCorQW7EryeQ/U0x++OzVrdms8= +github.com/decred/dcrd/crypto/blake256 v1.1.0/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= +github.com/fasthttp/websocket v1.5.7 h1:0a6o2OfeATvtGgoMKleURhLT6JqWPg7fYfWnH4KHau4= +github.com/fasthttp/websocket v1.5.7/go.mod h1:bC4fxSono9czeXHQUVKxsC0sNjbm7lPJR04GDFqClfU= +github.com/fiatjaf/eventstore v0.15.0 h1:5UXe0+vIb30/cYcOWipks8nR3g+X8W224TFy5yPzivk= +github.com/fiatjaf/eventstore v0.15.0/go.mod h1:KAsld5BhkmSck48aF11Txu8X+OGNmoabw4TlYVWqInc= +github.com/fiatjaf/eventstore v0.16.0 h1:r26aJeOwJTCbEevU8RVqp9FlcAgzKKqUWFH//x+Y+7M= +github.com/fiatjaf/eventstore v0.16.0/go.mod h1:KAsld5BhkmSck48aF11Txu8X+OGNmoabw4TlYVWqInc= +github.com/fiatjaf/khatru v0.15.2 h1:4p0LGUFh+C0zFAPTQdzUdhZDabjmktyov9h5V32EdSw= +github.com/fiatjaf/khatru v0.15.2/go.mod h1:GBQJXZpitDatXF9RookRXcWB5zCJclCE4ufDK3jk80g= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/nbd-wtf/go-nostr v0.46.0 h1:aR+xXEC6MPutNMIRhNdi+2iBPEHW7SO10sFaOAVSz3Y= +github.com/nbd-wtf/go-nostr v0.46.0/go.mod h1:xVNOqkn0GImeTmaF6VDwgYsuSkfG3yrIbd0dT6NZDIQ= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/puzpuzpuz/xsync/v3 v3.4.0 h1:DuVBAdXuGFHv8adVXjWWZ63pJq+NRXOWVXlKDBZ+mJ4= +github.com/puzpuzpuz/xsync/v3 v3.4.0/go.mod h1:VjzYrABPabuM4KyBh1Ftq6u8nhwY5tBPKP9jpmh0nnA= +github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= +github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= +github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk= +github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d h1:0olWaB5pg3+oychR51GUVCEsGkeCU/2JxjBgIo4f3M0= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= diff --git a/relay/main.go b/relay/main.go new file mode 100644 index 0000000..02b4ca6 --- /dev/null +++ b/relay/main.go @@ -0,0 +1,61 @@ +package main + +import ( + "fmt" + "net/http" + "os" + "time" + + "github.com/fiatjaf/eventstore/postgresql" + "github.com/fiatjaf/khatru" + "github.com/fiatjaf/khatru/policies" +) + +func getEnv(key, fallback string) string { + if value, ok := os.LookupEnv(key); ok { + return value + } + return fallback +} + +func main() { + fmt.Print(` + SCRAPESTR RELAY +`) + + // create the relay instance + relay := khatru.NewRelay() + + // set up relay properties with environment variable configuration + relay.Info.Name = getEnv("RELAY_NAME", "Scrapestr Relay") + relay.Info.PubKey = getEnv("RELAY_PUBKEY", "79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798") + relay.Info.Description = getEnv("RELAY_DESCRIPTION", "Scrapestr Relay") + relay.Info.Icon = getEnv("RELAY_ICON", "https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fliquipedia.net%2Fcommons%2Fimages%2F3%2F35%2FSCProbe.jpg&f=1&nofb=1&ipt=0cbbfef25bce41da63d910e86c3c343e6c3b9d63194ca9755351bb7c2efa3359&ipo=images") + + // Print relay information + fmt.Printf("Name: %s\n", relay.Info.Name) + fmt.Printf("Public Key: %s\n", relay.Info.PubKey) + fmt.Printf("Description: %s\n\n", relay.Info.Description) + + // Configure PostgreSQL connection with environment variable + postgresURL := getEnv("POSTGRES_URL", "postgres://postgres:postgres@postgres/postgres?sslmode=disable") + db := postgresql.PostgresBackend{DatabaseURL: postgresURL} + if err := db.Init(); err != nil { + panic(err) + } + + relay.StoreEvent = append(relay.StoreEvent, db.SaveEvent) + relay.QueryEvents = append(relay.QueryEvents, db.QueryEvents) + relay.DeleteEvent = append(relay.DeleteEvent, db.DeleteEvent) + relay.ReplaceEvent = append(relay.ReplaceEvent, db.ReplaceEvent) + + relay.RejectEvent = append( + relay.RejectEvent, + policies.PreventLargeTags(120), + policies.PreventTimestampsInThePast(time.Hour*2), + policies.PreventTimestampsInTheFuture(time.Minute*30), + ) + + fmt.Println("running on :3334") + http.ListenAndServe(":3334", relay) +} diff --git a/scraper/.dockerignore b/scraper/.dockerignore new file mode 100644 index 0000000..03a268b --- /dev/null +++ b/scraper/.dockerignore @@ -0,0 +1,34 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose.y*ml +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/scraper/Dockerfile b/scraper/Dockerfile new file mode 100644 index 0000000..541a2e8 --- /dev/null +++ b/scraper/Dockerfile @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1 + +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Dockerfile reference guide at +# https://docs.docker.com/go/dockerfile-reference/ + +# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7 + +ARG PYTHON_VERSION=3.13.1 +FROM python:${PYTHON_VERSION}-slim as base + +# Prevents Python from writing pyc files. +ENV PYTHONDONTWRITEBYTECODE=1 + +# Keeps Python from buffering stdout and stderr to avoid situations where +# the application crashes without emitting any logs due to buffering. +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Create a non-privileged user that the app will run under. +# See https://docs.docker.com/go/dockerfile-user-best-practices/ +ARG UID=10001 +RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + appuser + +# Download dependencies as a separate step to take advantage of Docker's caching. +# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. +# Leverage a bind mount to requirements.txt to avoid having to copy them into +# into this layer. +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements.txt,target=requirements.txt \ + python -m pip install -r requirements.txt + +# Switch to the non-privileged user to run the application. +USER appuser + +# Copy the source code into the container. +COPY . . + +# Run the application. +CMD python main.py diff --git a/scraper/main.py b/scraper/main.py new file mode 100644 index 0000000..811cdd8 --- /dev/null +++ b/scraper/main.py @@ -0,0 +1,72 @@ +import asyncio +import os +import websockets +import json +from datetime import datetime + +async def relay_websockets(input_websocket, output_websocket, kinds): + while True: + try: + # Wait for an event on input websocket + event = json.loads(await input_websocket.recv()) + try: + if(event[0] == "EVENT"): + # TODO: Broadcast to output websocket + print("Got event: ", event) + # Output websocket broadcast will be implemented here + + elif(event[0] == "EOSE"): + print("End of stream") + + except Exception as error: + print(f"Failed to relay event: {error}") + if("sent 1011" in str(error)): + print("Got Code 1011 -> Closing websockets...") + input_websocket.close() + output_websocket.close() + continue + + except websockets.ConnectionClosed: + # If either websocket is closed, attempt to reconnect + print("Connection closed, attempting to reconnect...") + await asyncio.sleep(1) + try: + async with websockets.connect(os.environ.get("INPUT_RELAY")) as new_input_websocket, \ + websockets.connect(os.environ.get("OUTPUT_RELAY")) as new_output_websocket: + message = '["REQ", "1337", {"kinds": '+kinds+', "limit": 10}]' + await new_input_websocket.send(message) + await relay_websockets(new_input_websocket, new_output_websocket, kinds) + + except Exception as error: + # If the reconnection attempt fails, repeat the loop and try again + print(f"Failed to reconnect: {error}") + continue + +async def main(): + print("Scraper started...") + # Read the websocket URLs from environment variables + input_url = os.environ.get("INPUT_RELAY") + output_url = os.environ.get("OUTPUT_RELAY") + kinds = os.environ.get("KINDS") + + # If either relay URL is missing, raise an error + if not input_url: + raise ValueError("Please set the INPUT_RELAY environment variable") + if not output_url: + raise ValueError("Please set the OUTPUT_RELAY environment variable") + + try: + async with websockets.connect(input_url) as input_websocket, \ + websockets.connect(output_url) as output_websocket: + message = '["REQ", "1337", {"kinds": '+kinds+'}]' + await input_websocket.send(message) + await relay_websockets(input_websocket, output_websocket, kinds) + + except Exception as error: + # If the initial connection attempt fails, attempt to reconnect immediately + print(f"Failed to connect: {error}") + await asyncio.sleep(1) + await main() + +# Start the script +asyncio.run(main()) \ No newline at end of file diff --git a/scraper/requirements.txt b/scraper/requirements.txt new file mode 100644 index 0000000..7a38911 --- /dev/null +++ b/scraper/requirements.txt @@ -0,0 +1 @@ +websockets \ No newline at end of file