From 82afd54551ccee88d68021e6766e221da9d632a4 Mon Sep 17 00:00:00 2001 From: mroxso Date: Tue, 4 Jul 2023 07:35:52 +0200 Subject: [PATCH] add basic system --- .env | 29 ++++++ .gitignore | 5 + Dockerfile | 13 +++ README.md | 12 +++ compose.yaml | 237 +++++++++++++++++++++++++++++++++++++++++++++++ main.py | 77 +++++++++++++++ requirements.txt | 3 + 7 files changed, 376 insertions(+) create mode 100644 .env create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 compose.yaml create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.env b/.env new file mode 100644 index 0000000..7af388e --- /dev/null +++ b/.env @@ -0,0 +1,29 @@ +# Password for the 'elastic' user (at least 6 characters) +ELASTIC_PASSWORD=elastic + +# Password for the 'kibana_system' user (at least 6 characters) +KIBANA_PASSWORD=kibana + +# Version of Elastic products +STACK_VERSION=8.8.2 + +# Set the cluster name +CLUSTER_NAME=docker-cluster + +# Set to 'basic' or 'trial' to automatically start the 30-day trial +LICENSE=basic +#LICENSE=trial + +# Port to expose Elasticsearch HTTP API to the host +ES_PORT=9200 +#ES_PORT=127.0.0.1:9200 + +# Port to expose Kibana to the host +KIBANA_PORT=5601 +#KIBANA_PORT=80 + +# Increase or decrease based on the available host memory (in bytes) +MEM_LIMIT=1073741824 + +# Project namespace (defaults to the current folder name if not set) +#COMPOSE_PROJECT_NAME=myproject \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d6bf7e --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +certs/ +kibanadata/ +esdata01/ +esdata02/ +esdata03/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cf1e8a6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.9-slim-buster + +WORKDIR /app + +COPY main.py . +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +ENV INPUT_RELAY=wss://relay.nostr.band +ENV KINDS=[0,1] + +ENTRYPOINT [ "python", "-u", "./main.py" ] \ No newline at end of file diff --git a/README.md b/README.md index 7397fff..32e5a15 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,14 @@ # search Nostr Search with Elasticsearch and Kibana + +# Configuration +Please configure .env file to your needs! Change the Elasticsearch and Kibana Password for your own safety! + +# Run Docker Compose +`docker compose build && docker compose up` + +# Contribute +Feel free to contribute! + +# Buy me a coffee +⚡️ highperfocused@getalby.com \ No newline at end of file diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..d22d3b8 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,237 @@ +version: "2.2" + +services: + setup: + image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} + volumes: + - ./certs:/usr/share/elasticsearch/config/certs + user: "0" + command: > + bash -c ' + if [ x${ELASTIC_PASSWORD} == x ]; then + echo "Set the ELASTIC_PASSWORD environment variable in the .env file"; + exit 1; + elif [ x${KIBANA_PASSWORD} == x ]; then + echo "Set the KIBANA_PASSWORD environment variable in the .env file"; + exit 1; + fi; + if [ ! -f config/certs/ca.zip ]; then + echo "Creating CA"; + bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip; + unzip config/certs/ca.zip -d config/certs; + fi; + if [ ! -f config/certs/certs.zip ]; then + echo "Creating certs"; + echo -ne \ + "instances:\n"\ + " - name: es01\n"\ + " dns:\n"\ + " - es01\n"\ + " - localhost\n"\ + " ip:\n"\ + " - 127.0.0.1\n"\ + " - name: es02\n"\ + " dns:\n"\ + " - es02\n"\ + " - localhost\n"\ + " ip:\n"\ + " - 127.0.0.1\n"\ + " - name: es03\n"\ + " dns:\n"\ + " - es03\n"\ + " - localhost\n"\ + " ip:\n"\ + " - 127.0.0.1\n"\ + > config/certs/instances.yml; + bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key; + unzip config/certs/certs.zip -d config/certs; + fi; + echo "Setting file permissions" + chown -R root:root config/certs; + find . -type d -exec chmod 750 \{\} \;; + find . -type f -exec chmod 640 \{\} \;; + echo "Waiting for Elasticsearch availability"; + until curl -s --cacert config/certs/ca/ca.crt https://es01:9200 | grep -q "missing authentication credentials"; do sleep 30; done; + echo "Setting kibana_system password"; + until curl -s -X POST --cacert config/certs/ca/ca.crt -u "elastic:${ELASTIC_PASSWORD}" -H "Content-Type: application/json" https://es01:9200/_security/user/kibana_system/_password -d "{\"password\":\"${KIBANA_PASSWORD}\"}" | grep -q "^{}"; do sleep 10; done; + echo "All done!"; + ' + healthcheck: + test: ["CMD-SHELL", "[ -f config/certs/es01/es01.crt ]"] + interval: 1s + timeout: 5s + retries: 120 + + es01: + depends_on: + setup: + condition: service_healthy + image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} + volumes: + - ./certs:/usr/share/elasticsearch/config/certs + - ./esdata01:/usr/share/elasticsearch/data + ports: + - ${ES_PORT}:9200 + environment: + - node.name=es01 + - cluster.name=${CLUSTER_NAME} + - cluster.initial_master_nodes=es01,es02,es03 + - discovery.seed_hosts=es02,es03 + - ELASTIC_PASSWORD=${ELASTIC_PASSWORD} + - bootstrap.memory_lock=true + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=true + - xpack.security.http.ssl.key=certs/es01/es01.key + - xpack.security.http.ssl.certificate=certs/es01/es01.crt + - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.enabled=true + - xpack.security.transport.ssl.key=certs/es01/es01.key + - xpack.security.transport.ssl.certificate=certs/es01/es01.crt + - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.verification_mode=certificate + - xpack.license.self_generated.type=${LICENSE} + mem_limit: ${MEM_LIMIT} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", + ] + interval: 10s + timeout: 10s + retries: 120 + + es02: + depends_on: + - es01 + image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} + volumes: + - ./certs:/usr/share/elasticsearch/config/certs + - ./esdata02:/usr/share/elasticsearch/data + environment: + - node.name=es02 + - cluster.name=${CLUSTER_NAME} + - cluster.initial_master_nodes=es01,es02,es03 + - discovery.seed_hosts=es01,es03 + - bootstrap.memory_lock=true + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=true + - xpack.security.http.ssl.key=certs/es02/es02.key + - xpack.security.http.ssl.certificate=certs/es02/es02.crt + - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.enabled=true + - xpack.security.transport.ssl.key=certs/es02/es02.key + - xpack.security.transport.ssl.certificate=certs/es02/es02.crt + - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.verification_mode=certificate + - xpack.license.self_generated.type=${LICENSE} + mem_limit: ${MEM_LIMIT} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", + ] + interval: 10s + timeout: 10s + retries: 120 + + es03: + depends_on: + - es02 + image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION} + volumes: + - ./certs:/usr/share/elasticsearch/config/certs + - ./esdata03:/usr/share/elasticsearch/data + environment: + - node.name=es03 + - cluster.name=${CLUSTER_NAME} + - cluster.initial_master_nodes=es01,es02,es03 + - discovery.seed_hosts=es01,es02 + - bootstrap.memory_lock=true + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=true + - xpack.security.http.ssl.key=certs/es03/es03.key + - xpack.security.http.ssl.certificate=certs/es03/es03.crt + - xpack.security.http.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.enabled=true + - xpack.security.transport.ssl.key=certs/es03/es03.key + - xpack.security.transport.ssl.certificate=certs/es03/es03.crt + - xpack.security.transport.ssl.certificate_authorities=certs/ca/ca.crt + - xpack.security.transport.ssl.verification_mode=certificate + - xpack.license.self_generated.type=${LICENSE} + mem_limit: ${MEM_LIMIT} + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s --cacert config/certs/ca/ca.crt https://localhost:9200 | grep -q 'missing authentication credentials'", + ] + interval: 10s + timeout: 10s + retries: 120 + + kibana: + depends_on: + es01: + condition: service_healthy + es02: + condition: service_healthy + es03: + condition: service_healthy + image: docker.elastic.co/kibana/kibana:${STACK_VERSION} + volumes: + - ./certs:/usr/share/kibana/config/certs + - ./kibanadata:/usr/share/kibana/data + ports: + - ${KIBANA_PORT}:5601 + environment: + - SERVERNAME=kibana + - ELASTICSEARCH_HOSTS=https://es01:9200 + - ELASTICSEARCH_USERNAME=kibana_system + - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD} + - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt + mem_limit: ${MEM_LIMIT} + healthcheck: + test: + [ + "CMD-SHELL", + "curl -s -I http://localhost:5601 | grep -q 'HTTP/1.1 302 Found'", + ] + interval: 10s + timeout: 10s + retries: 120 + + scraper: + build: . + depends_on: + - kibana + environment: + - INPUT_RELAY=wss://relay.nostr.band + - KINDS=[0,1] + volumes: + - ./certs/ca:/app/certs/ca/ + +# volumes: +# certs: +# driver: local +# esdata01: +# driver: local +# esdata02: +# driver: local +# esdata03: +# driver: local +# kibanadata: +# driver: local \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..17282a2 --- /dev/null +++ b/main.py @@ -0,0 +1,77 @@ +import asyncio +import os +import websockets +import json +from elasticsearch import Elasticsearch + +async def relay_websockets(inputWebsocket, kinds, es): + while True: + try: + # Wait for an event on websocket 1 + event = json.loads(await inputWebsocket.recv()) + try: + if(event[0] == "EVENT"): + # Remove the event ID from the event + del event[1] + print("Sending event " + str(event[1]['id']) + " (kind: "+str(event[1]['kind'])+") to elasticsearch") + # send event to elasticsearch + resp = es.index(index="nostr", id=str(event[1]['id']), document=event[1]) + if(resp['result'] != "created"): + print("Failed to send event to elasticsearch: "+str(resp)) + elif(event[0] == "EOSE"): + print("End of stream") + + except Exception as error: + print(f"Failed to relay event: {error}") + if("sent 1011" in str(error)): + print("Got Code 1011 -> Closing websockets...") + websockets.close() + continue + + except websockets.ConnectionClosed: + # If either websocket is closed, attempt to reconnect + print("Connection closed, attempting to reconnect...") + await asyncio.sleep(1) + try: + async with websockets.connect(os.environ.get("INPUT_RELAY")) as inputWebsocket: + message = '["REQ", "1337", {"kinds": '+kinds+', "limit": 10}]' + await inputWebsocket.send(message) + await relay_websockets(inputWebsocket, kinds, es=es) + + except Exception as error: + # If the reconnection attempt fails, repeat the loop and try again + print(f"Failed to reconnect: {error}") + continue + +async def main(): + print("Scraper started...") + # Read the websocket URLs from environment variables + inputUrl = os.environ.get("INPUT_RELAY") + kinds = os.environ.get("KINDS") + ELASTIC_PASSWORD = os.getenv("ELASTIC_PASSWORD", "elastic") + + # Create the client instance + client = Elasticsearch( + "https://es01:9200", + ca_certs="/app/certs/ca/ca.crt", + basic_auth=("elastic", ELASTIC_PASSWORD) + ) + + # If the INPUT_RELAY is missing, raise an error + if not inputUrl: + raise ValueError("Please set the INPUT_RELAY environment variable") + + try: + async with websockets.connect(inputUrl) as inputWebsocket: + message = '["REQ", "1337", {"kinds": '+kinds+'}]' + await inputWebsocket.send(message) + await relay_websockets(inputWebsocket, kinds, es=client) + + except Exception as error: + # If the initial connection attempt fails, attempt to reconnect immediately + print(f"Failed to connect: {error}") + await asyncio.sleep(1) + await main() + +# Start the script +asyncio.run(main()) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..05365d8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +websockets +asyncio +elasticsearch \ No newline at end of file