initial code commit

This commit is contained in:
mr0x50
2025-02-23 21:58:12 +01:00
commit 1b88bf9e84
10 changed files with 747 additions and 0 deletions

34
scraper/.dockerignore Normal file
View File

@ -0,0 +1,34 @@
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/go/build-context-dockerignore/
**/.DS_Store
**/__pycache__
**/.venv
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/bin
**/charts
**/docker-compose*
**/compose.y*ml
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/obj
**/secrets.dev.yaml
**/values.dev.yaml
LICENSE
README.md

48
scraper/Dockerfile Normal file
View File

@ -0,0 +1,48 @@
# syntax=docker/dockerfile:1
# Comments are provided throughout this file to help you get started.
# If you need more help, visit the Dockerfile reference guide at
# https://docs.docker.com/go/dockerfile-reference/
# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
ARG PYTHON_VERSION=3.13.1
FROM python:${PYTHON_VERSION}-slim as base
# Prevents Python from writing pyc files.
ENV PYTHONDONTWRITEBYTECODE=1
# Keeps Python from buffering stdout and stderr to avoid situations where
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/go/dockerfile-user-best-practices/
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/sbin/nologin" \
--no-create-home \
--uid "${UID}" \
appuser
# Download dependencies as a separate step to take advantage of Docker's caching.
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
# Leverage a bind mount to requirements.txt to avoid having to copy them into
# into this layer.
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=requirements.txt \
python -m pip install -r requirements.txt
# Switch to the non-privileged user to run the application.
USER appuser
# Copy the source code into the container.
COPY . .
# Run the application.
CMD python main.py

66
scraper/main.py Normal file
View File

@ -0,0 +1,66 @@
import asyncio
import os
import websockets
import json
from datetime import datetime
from uuid import uuid4
async def relay_websockets(input_websocket, output_websocket, kinds, sub_id):
while True:
try:
# Wait for an event on input websocket
event = json.loads(await input_websocket.recv())
try:
if(event[0] == "EVENT"):
print("Received ID: ",event[2]['id']," // Kind: ",event[2]['kind'])
# Forward the event to output websocket
await output_websocket.send(json.dumps(["EVENT", sub_id, event[2]]))
elif(event[0] == "EOSE"):
print("End of stream")
except Exception as error:
print(f"Failed to relay event: {error}")
if("sent 1011" in str(error)):
print("Got Code 1011 -> Closing websockets...")
input_websocket.close()
output_websocket.close()
continue
except websockets.ConnectionClosed:
# If either websocket is closed, attempt to reconnect
print("Connection closed, attempting to reconnect...")
await asyncio.sleep(1)
break
async def main():
print("Scraper started...")
# Read the websocket URLs from environment variables
input_url = os.environ.get("INPUT_RELAY")
output_url = os.environ.get("OUTPUT_RELAY")
kinds = os.environ.get("KINDS")
# If either relay URL is missing, raise an error
if not input_url:
raise ValueError("Please set the INPUT_RELAY environment variable")
if not output_url:
raise ValueError("Please set the OUTPUT_RELAY environment variable")
while True:
try:
sub_id = str(uuid4())
async with websockets.connect(input_url) as input_websocket, \
websockets.connect(output_url) as output_websocket:
message = f'["REQ", "{sub_id}", {{"kinds": {kinds}}}]'
await input_websocket.send(message)
await relay_websockets(input_websocket, output_websocket, kinds, sub_id)
except Exception as error:
# If the initial connection attempt fails, attempt to reconnect immediately
print(f"Failed to connect: {error}")
await asyncio.sleep(1)
if "maximum recursion depth exceeded" in str(error):
raise RuntimeError("Maximum recursion depth exceeded, crashing application.")
continue
# Start the script
asyncio.run(main())

1
scraper/requirements.txt Normal file
View File

@ -0,0 +1 @@
websockets