From 1b88bf9e84330cb2d54ceb50b106af36efbe367e Mon Sep 17 00:00:00 2001 From: mr0x50 <24775431+mroxso@users.noreply.github.com> Date: Sun, 23 Feb 2025 21:58:12 +0100 Subject: [PATCH] initial code commit --- compose.yaml | 52 +++++++ relay/.dockerignore | 32 ++++ relay/Dockerfile | 78 ++++++++++ relay/go.mod | 36 +++++ relay/go.sum | 72 +++++++++ relay/main.go | 328 +++++++++++++++++++++++++++++++++++++++ scraper/.dockerignore | 34 ++++ scraper/Dockerfile | 48 ++++++ scraper/main.py | 66 ++++++++ scraper/requirements.txt | 1 + 10 files changed, 747 insertions(+) create mode 100644 compose.yaml create mode 100644 relay/.dockerignore create mode 100644 relay/Dockerfile create mode 100644 relay/go.mod create mode 100644 relay/go.sum create mode 100644 relay/main.go create mode 100644 scraper/.dockerignore create mode 100644 scraper/Dockerfile create mode 100644 scraper/main.py create mode 100644 scraper/requirements.txt diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..e14e118 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,52 @@ +services: + relay: + depends_on: + postgres: + condition: service_healthy + build: + context: ./relay + target: final + ports: + - 3334:3334 + environment: + RELAY_NAME: "LUMINA Relay" + RELAY_DESCRIPTION: "LUMINA Nostr Relay" + POSTGRES_URL: "postgres://postgres:postgres@postgres/postgres?sslmode=disable" + restart: unless-stopped + scraper: + depends_on: + - relay + build: + context: ./scraper + environment: + INPUT_RELAY: "wss://relay.nostr.band" + OUTPUT_RELAY: "ws://relay:3334" + KINDS: "[0,1,2,3,7,20,9735]" + restart: unless-stopped + postgres: + image: postgres:17 + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + volumes: + - ./postgres:/var/lib/postgresql/data + ports: + - 5432:5432 + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 5s + timeout: 5s + retries: 5 + start_period: 10s + restart: unless-stopped + # pg_admin: + # image: dpage/pgadmin4 + # environment: + # PGADMIN_DEFAULT_EMAIL: example@example.com + # PGADMIN_DEFAULT_PASSWORD: example + # ports: + # - 8080:80 + # depends_on: + # - postgres + # restart: unless-stopped diff --git a/relay/.dockerignore b/relay/.dockerignore new file mode 100644 index 0000000..9e03c48 --- /dev/null +++ b/relay/.dockerignore @@ -0,0 +1,32 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose.y*ml +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/relay/Dockerfile b/relay/Dockerfile new file mode 100644 index 0000000..0c19d6f --- /dev/null +++ b/relay/Dockerfile @@ -0,0 +1,78 @@ +# syntax=docker/dockerfile:1 + +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Dockerfile reference guide at +# https://docs.docker.com/go/dockerfile-reference/ + +# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7 + +################################################################################ +# Create a stage for building the application. +ARG GO_VERSION=1.23.4 +FROM --platform=$BUILDPLATFORM golang:${GO_VERSION} AS build +WORKDIR /src + +# Download dependencies as a separate step to take advantage of Docker's caching. +# Leverage a cache mount to /go/pkg/mod/ to speed up subsequent builds. +# Leverage bind mounts to go.sum and go.mod to avoid having to copy them into +# the container. +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=bind,source=go.sum,target=go.sum \ + --mount=type=bind,source=go.mod,target=go.mod \ + go mod download -x + +# This is the architecture you're building for, which is passed in by the builder. +# Placing it here allows the previous steps to be cached across architectures. +ARG TARGETARCH + +# Build the application. +# Leverage a cache mount to /go/pkg/mod/ to speed up subsequent builds. +# Leverage a bind mount to the current directory to avoid having to copy the +# source code into the container. +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=bind,target=. \ + CGO_ENABLED=0 GOARCH=$TARGETARCH go build -o /bin/server . + +################################################################################ +# Create a new stage for running the application that contains the minimal +# runtime dependencies for the application. This often uses a different base +# image from the build stage where the necessary files are copied from the build +# stage. +# +# The example below uses the alpine image as the foundation for running the app. +# By specifying the "latest" tag, it will also use whatever happens to be the +# most recent version of that image when you build your Dockerfile. If +# reproducibility is important, consider using a versioned tag +# (e.g., alpine:3.17.2) or SHA (e.g., alpine@sha256:c41ab5c992deb4fe7e5da09f67a8804a46bd0592bfdf0b1847dde0e0889d2bff). +FROM alpine:latest AS final + +# Install any runtime dependencies that are needed to run your application. +# Leverage a cache mount to /var/cache/apk/ to speed up subsequent builds. +RUN --mount=type=cache,target=/var/cache/apk \ + apk --update add \ + ca-certificates \ + tzdata \ + && \ + update-ca-certificates + +# Create a non-privileged user that the app will run under. +# See https://docs.docker.com/go/dockerfile-user-best-practices/ +ARG UID=10001 +RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + appuser +USER appuser + +# Copy the executable from the "build" stage. +COPY --from=build /bin/server /bin/ + +# Expose the port that the application listens on. +EXPOSE 3334 + +# What the container should run when it is started. +ENTRYPOINT [ "/bin/server" ] diff --git a/relay/go.mod b/relay/go.mod new file mode 100644 index 0000000..880fa3c --- /dev/null +++ b/relay/go.mod @@ -0,0 +1,36 @@ +module git.v0l.io/highperfocused/scrapestr/relay + +go 1.23.4 + +require ( + fiatjaf.com/lib v0.2.0 // indirect + github.com/andybalholm/brotli v1.0.5 // indirect + github.com/bep/debounce v1.2.1 // indirect + github.com/btcsuite/btcd/btcec/v2 v2.3.4 // indirect + github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 // indirect + github.com/coder/websocket v1.8.12 // indirect + github.com/decred/dcrd/crypto/blake256 v1.1.0 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect + github.com/fasthttp/websocket v1.5.7 // indirect + github.com/fiatjaf/eventstore v0.16.0 + github.com/fiatjaf/khatru v0.15.2 + github.com/jmoiron/sqlx v1.4.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/lib/pq v1.10.9 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/nbd-wtf/go-nostr v0.46.0 // indirect + github.com/puzpuzpuz/xsync/v3 v3.4.0 // indirect + github.com/rs/cors v1.11.1 // indirect + github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.51.0 // indirect + golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d // indirect + golang.org/x/net v0.32.0 // indirect +) diff --git a/relay/go.sum b/relay/go.sum new file mode 100644 index 0000000..5eb56b2 --- /dev/null +++ b/relay/go.sum @@ -0,0 +1,72 @@ +fiatjaf.com/lib v0.2.0 h1:TgIJESbbND6GjOgGHxF5jsO6EMjuAxIzZHPo5DXYexs= +fiatjaf.com/lib v0.2.0/go.mod h1:Ycqq3+mJ9jAWu7XjbQI1cVr+OFgnHn79dQR5oTII47g= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= +github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= +github.com/bep/debounce v1.2.1 h1:v67fRdBA9UQu2NhLFXrSg0Brw7CexQekrBwDMM8bzeY= +github.com/bep/debounce v1.2.1/go.mod h1:H8yggRPQKLUhUoqrJC1bO2xNya7vanpDl7xR3ISbCJ0= +github.com/btcsuite/btcd/btcec/v2 v2.3.4 h1:3EJjcN70HCu/mwqlUsGK8GcNVyLVxFDlWurTXGPFfiQ= +github.com/btcsuite/btcd/btcec/v2 v2.3.4/go.mod h1:zYzJ8etWJQIv1Ogk7OzpWjowwOdXY1W/17j2MW85J04= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 h1:59Kx4K6lzOW5w6nFlA0v5+lk/6sjybR934QNHSJZPTQ= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= +github.com/coder/websocket v1.8.12 h1:5bUXkEPPIbewrnkU8LTCLVaxi4N4J8ahufH2vlo4NAo= +github.com/coder/websocket v1.8.12/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/decred/dcrd/crypto/blake256 v1.1.0 h1:zPMNGQCm0g4QTY27fOCorQW7EryeQ/U0x++OzVrdms8= +github.com/decred/dcrd/crypto/blake256 v1.1.0/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= +github.com/fasthttp/websocket v1.5.7 h1:0a6o2OfeATvtGgoMKleURhLT6JqWPg7fYfWnH4KHau4= +github.com/fasthttp/websocket v1.5.7/go.mod h1:bC4fxSono9czeXHQUVKxsC0sNjbm7lPJR04GDFqClfU= +github.com/fiatjaf/eventstore v0.15.0 h1:5UXe0+vIb30/cYcOWipks8nR3g+X8W224TFy5yPzivk= +github.com/fiatjaf/eventstore v0.15.0/go.mod h1:KAsld5BhkmSck48aF11Txu8X+OGNmoabw4TlYVWqInc= +github.com/fiatjaf/eventstore v0.16.0 h1:r26aJeOwJTCbEevU8RVqp9FlcAgzKKqUWFH//x+Y+7M= +github.com/fiatjaf/eventstore v0.16.0/go.mod h1:KAsld5BhkmSck48aF11Txu8X+OGNmoabw4TlYVWqInc= +github.com/fiatjaf/khatru v0.15.2 h1:4p0LGUFh+C0zFAPTQdzUdhZDabjmktyov9h5V32EdSw= +github.com/fiatjaf/khatru v0.15.2/go.mod h1:GBQJXZpitDatXF9RookRXcWB5zCJclCE4ufDK3jk80g= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/nbd-wtf/go-nostr v0.46.0 h1:aR+xXEC6MPutNMIRhNdi+2iBPEHW7SO10sFaOAVSz3Y= +github.com/nbd-wtf/go-nostr v0.46.0/go.mod h1:xVNOqkn0GImeTmaF6VDwgYsuSkfG3yrIbd0dT6NZDIQ= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/puzpuzpuz/xsync/v3 v3.4.0 h1:DuVBAdXuGFHv8adVXjWWZ63pJq+NRXOWVXlKDBZ+mJ4= +github.com/puzpuzpuz/xsync/v3 v3.4.0/go.mod h1:VjzYrABPabuM4KyBh1Ftq6u8nhwY5tBPKP9jpmh0nnA= +github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= +github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= +github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1AvpV+7XmhI4r39LGNzwUL4YpMuL5vk= +github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA= +github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d h1:0olWaB5pg3+oychR51GUVCEsGkeCU/2JxjBgIo4f3M0= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= diff --git a/relay/main.go b/relay/main.go new file mode 100644 index 0000000..9f10e4d --- /dev/null +++ b/relay/main.go @@ -0,0 +1,328 @@ +package main + +import ( + "encoding/json" + "fmt" + "net/http" + "os" + "time" + + "github.com/fiatjaf/eventstore/postgresql" + "github.com/fiatjaf/khatru" + "github.com/fiatjaf/khatru/policies" +) + +func getEnv(key, fallback string) string { + if value, ok := os.LookupEnv(key); ok { + return value + } + return fallback +} + +func main() { + fmt.Print(` + LUMINA RELAY +`) + + // create the relay instance + relay := khatru.NewRelay() + + // set up relay properties with environment variable configuration + relay.Info.Name = getEnv("RELAY_NAME", "LUMINA Relay") + relay.Info.PubKey = getEnv("RELAY_PUBKEY", "79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798") + relay.Info.Description = getEnv("RELAY_DESCRIPTION", "LUMINA Relay") + relay.Info.Icon = getEnv("RELAY_ICON", "https://external-content.duckduckgo.com/iu/?u=https%3A%2F%2Fliquipedia.net%2Fcommons%2Fimages%2F3%2F35%2FSCProbe.jpg&f=1&nofb=1&ipt=0cbbfef25bce41da63d910e86c3c343e6c3b9d63194ca9755351bb7c2efa3359&ipo=images") + + // Print relay information + fmt.Printf("Name: %s\n", relay.Info.Name) + fmt.Printf("Public Key: %s\n", relay.Info.PubKey) + fmt.Printf("Description: %s\n\n", relay.Info.Description) + + // Configure PostgreSQL connection with environment variable + postgresURL := getEnv("POSTGRES_URL", "postgres://postgres:postgres@postgres/postgres?sslmode=disable") + db := postgresql.PostgresBackend{DatabaseURL: postgresURL} + if err := db.Init(); err != nil { + panic(err) + } + + relay.StoreEvent = append(relay.StoreEvent, db.SaveEvent) + relay.QueryEvents = append(relay.QueryEvents, db.QueryEvents) + relay.DeleteEvent = append(relay.DeleteEvent, db.DeleteEvent) + relay.ReplaceEvent = append(relay.ReplaceEvent, db.ReplaceEvent) + relay.CountEvents = append(relay.CountEvents, db.CountEvents) + + relay.RejectEvent = append( + relay.RejectEvent, + policies.PreventLargeTags(120), + policies.PreventTimestampsInThePast(time.Hour*2), + policies.PreventTimestampsInTheFuture(time.Minute*30), + ) + + mux := relay.Router() + // set up other http handlers + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "text/html") + + // Query the total number of events + count := 0 + row := db.DB.QueryRow("SELECT COUNT(*) FROM event") + if err := row.Scan(&count); err != nil { + fmt.Printf("Error counting events: %v\n", err) + } + + // Improved HTML content with link to stats page + fmt.Fprintf(w, ` + + + + + + Scrapestr Relay + + + +
+

Welcome to LUMINA Relay!

+

Number of events stored: %d

+

View Event Stats

+
+ + + `, count) + }) + + mux.HandleFunc("/stats", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("content-type", "text/html") + + // Query the number of events for each kind, sorted by kind + rows, err := db.DB.Query("SELECT kind, COUNT(*) FROM event GROUP BY kind ORDER BY kind") + if err != nil { + fmt.Printf("Error querying event kinds: %v\n", err) + return + } + defer rows.Close() + + stats := make(map[string]int) + for rows.Next() { + var kind string + var count int + if err := rows.Scan(&kind, &count); err != nil { + fmt.Printf("Error scanning row: %v\n", err) + return + } + stats[kind] = count + } + + // Improved HTML content for stats + fmt.Fprintf(w, ` + + + + + + Scrapestr Relay Stats + + + +
+

Event Stats

+ + + + + + `) + for kind, count := range stats { + fmt.Fprintf(w, ` + + + + + `, kind, count) + } + fmt.Fprintf(w, ` +
KindCount
%s%d
+
+ + + `) + }) + + mux.HandleFunc("/api/stats", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + + // Query the number of events for each kind, sorted by kind + rows, err := db.DB.Query("SELECT kind, COUNT(*) FROM event GROUP BY kind ORDER BY kind") + if err != nil { + http.Error(w, fmt.Sprintf("Error querying event kinds: %v", err), http.StatusInternalServerError) + return + } + defer rows.Close() + + stats := make(map[string]int) + totalCount := 0 + for rows.Next() { + var kind string + var count int + if err := rows.Scan(&kind, &count); err != nil { + http.Error(w, fmt.Sprintf("Error scanning row: %v", err), http.StatusInternalServerError) + return + } + stats[kind] = count + totalCount += count + } + + // Add total count to the stats + response := map[string]interface{}{ + "total": totalCount, + "kinds": stats, + } + + // Encode stats to JSON and write to response + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, fmt.Sprintf("Error encoding JSON: %v", err), http.StatusInternalServerError) + } + }) + + mux.HandleFunc("/api/trending/kind20", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + + // This query joins kind 20 posts with their reactions (kind 7) + // and counts the number of reactions in the last 24 hours using lateral join + query := ` + WITH reactions AS ( + SELECT + tags_expanded.value->1 #>> '{}' AS original_event_id, + COUNT(*) as reaction_count + FROM event e + CROSS JOIN LATERAL jsonb_array_elements(tags) as tags_expanded(value) + WHERE e.kind::text = '7' + AND e.created_at >= extract(epoch from now() - interval '24 hours')::bigint + AND tags_expanded.value->0 #>> '{}' = 'e' + GROUP BY tags_expanded.value->1 #>> '{}' + ) + SELECT + e.id, + e.pubkey, + to_timestamp(e.created_at) as created_at, + e.kind, + e.content, + e.tags, + COALESCE(r.reaction_count, 0) as reaction_count + FROM event e + LEFT JOIN reactions r ON e.id = r.original_event_id + WHERE e.kind::text = '20' + AND e.created_at >= extract(epoch from now() - interval '24 hours')::bigint + ORDER BY reaction_count DESC, e.created_at DESC + LIMIT 20 + ` + + rows, err := db.DB.Query(query) + if err != nil { + http.Error(w, fmt.Sprintf("Error querying trending posts: %v", err), http.StatusInternalServerError) + return + } + defer rows.Close() + + type TrendingPost struct { + ID string `json:"id"` + PubKey string `json:"pubkey"` + CreatedAt time.Time `json:"created_at"` + Kind string `json:"kind"` + Content string `json:"content"` + Tags [][]string `json:"tags"` + ReactionCount int `json:"reaction_count"` + } + + var trendingPosts []TrendingPost + for rows.Next() { + var post TrendingPost + var tagsJSON []byte + if err := rows.Scan(&post.ID, &post.PubKey, &post.CreatedAt, &post.Kind, &post.Content, &tagsJSON, &post.ReactionCount); err != nil { + http.Error(w, fmt.Sprintf("Error scanning row: %v", err), http.StatusInternalServerError) + return + } + // Parse the tags JSON + if err := json.Unmarshal(tagsJSON, &post.Tags); err != nil { + http.Error(w, fmt.Sprintf("Error parsing tags: %v", err), http.StatusInternalServerError) + return + } + trendingPosts = append(trendingPosts, post) + } + + response := map[string]interface{}{ + "trending": trendingPosts, + } + + if err := json.NewEncoder(w).Encode(response); err != nil { + http.Error(w, fmt.Sprintf("Error encoding JSON: %v", err), http.StatusInternalServerError) + } + }) + + fmt.Println("running on :3334") + http.ListenAndServe(":3334", relay) +} diff --git a/scraper/.dockerignore b/scraper/.dockerignore new file mode 100644 index 0000000..03a268b --- /dev/null +++ b/scraper/.dockerignore @@ -0,0 +1,34 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/go/build-context-dockerignore/ + +**/.DS_Store +**/__pycache__ +**/.venv +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/bin +**/charts +**/docker-compose* +**/compose.y*ml +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/obj +**/secrets.dev.yaml +**/values.dev.yaml +LICENSE +README.md diff --git a/scraper/Dockerfile b/scraper/Dockerfile new file mode 100644 index 0000000..541a2e8 --- /dev/null +++ b/scraper/Dockerfile @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1 + +# Comments are provided throughout this file to help you get started. +# If you need more help, visit the Dockerfile reference guide at +# https://docs.docker.com/go/dockerfile-reference/ + +# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7 + +ARG PYTHON_VERSION=3.13.1 +FROM python:${PYTHON_VERSION}-slim as base + +# Prevents Python from writing pyc files. +ENV PYTHONDONTWRITEBYTECODE=1 + +# Keeps Python from buffering stdout and stderr to avoid situations where +# the application crashes without emitting any logs due to buffering. +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Create a non-privileged user that the app will run under. +# See https://docs.docker.com/go/dockerfile-user-best-practices/ +ARG UID=10001 +RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + appuser + +# Download dependencies as a separate step to take advantage of Docker's caching. +# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds. +# Leverage a bind mount to requirements.txt to avoid having to copy them into +# into this layer. +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,source=requirements.txt,target=requirements.txt \ + python -m pip install -r requirements.txt + +# Switch to the non-privileged user to run the application. +USER appuser + +# Copy the source code into the container. +COPY . . + +# Run the application. +CMD python main.py diff --git a/scraper/main.py b/scraper/main.py new file mode 100644 index 0000000..9f5bcd5 --- /dev/null +++ b/scraper/main.py @@ -0,0 +1,66 @@ +import asyncio +import os +import websockets +import json +from datetime import datetime +from uuid import uuid4 + +async def relay_websockets(input_websocket, output_websocket, kinds, sub_id): + while True: + try: + # Wait for an event on input websocket + event = json.loads(await input_websocket.recv()) + try: + if(event[0] == "EVENT"): + print("Received ID: ",event[2]['id']," // Kind: ",event[2]['kind']) + # Forward the event to output websocket + await output_websocket.send(json.dumps(["EVENT", sub_id, event[2]])) + elif(event[0] == "EOSE"): + print("End of stream") + + except Exception as error: + print(f"Failed to relay event: {error}") + if("sent 1011" in str(error)): + print("Got Code 1011 -> Closing websockets...") + input_websocket.close() + output_websocket.close() + continue + + except websockets.ConnectionClosed: + # If either websocket is closed, attempt to reconnect + print("Connection closed, attempting to reconnect...") + await asyncio.sleep(1) + break + +async def main(): + print("Scraper started...") + # Read the websocket URLs from environment variables + input_url = os.environ.get("INPUT_RELAY") + output_url = os.environ.get("OUTPUT_RELAY") + kinds = os.environ.get("KINDS") + + # If either relay URL is missing, raise an error + if not input_url: + raise ValueError("Please set the INPUT_RELAY environment variable") + if not output_url: + raise ValueError("Please set the OUTPUT_RELAY environment variable") + + while True: + try: + sub_id = str(uuid4()) + async with websockets.connect(input_url) as input_websocket, \ + websockets.connect(output_url) as output_websocket: + message = f'["REQ", "{sub_id}", {{"kinds": {kinds}}}]' + await input_websocket.send(message) + await relay_websockets(input_websocket, output_websocket, kinds, sub_id) + + except Exception as error: + # If the initial connection attempt fails, attempt to reconnect immediately + print(f"Failed to connect: {error}") + await asyncio.sleep(1) + if "maximum recursion depth exceeded" in str(error): + raise RuntimeError("Maximum recursion depth exceeded, crashing application.") + continue + +# Start the script +asyncio.run(main()) \ No newline at end of file diff --git a/scraper/requirements.txt b/scraper/requirements.txt new file mode 100644 index 0000000..7a38911 --- /dev/null +++ b/scraper/requirements.txt @@ -0,0 +1 @@ +websockets \ No newline at end of file