initial code commit
This commit is contained in:
34
scraper/.dockerignore
Normal file
34
scraper/.dockerignore
Normal file
@ -0,0 +1,34 @@
|
||||
# Include any files or directories that you don't want to be copied to your
|
||||
# container here (e.g., local build artifacts, temporary files, etc.).
|
||||
#
|
||||
# For more help, visit the .dockerignore file reference guide at
|
||||
# https://docs.docker.com/go/build-context-dockerignore/
|
||||
|
||||
**/.DS_Store
|
||||
**/__pycache__
|
||||
**/.venv
|
||||
**/.classpath
|
||||
**/.dockerignore
|
||||
**/.env
|
||||
**/.git
|
||||
**/.gitignore
|
||||
**/.project
|
||||
**/.settings
|
||||
**/.toolstarget
|
||||
**/.vs
|
||||
**/.vscode
|
||||
**/*.*proj.user
|
||||
**/*.dbmdl
|
||||
**/*.jfm
|
||||
**/bin
|
||||
**/charts
|
||||
**/docker-compose*
|
||||
**/compose.y*ml
|
||||
**/Dockerfile*
|
||||
**/node_modules
|
||||
**/npm-debug.log
|
||||
**/obj
|
||||
**/secrets.dev.yaml
|
||||
**/values.dev.yaml
|
||||
LICENSE
|
||||
README.md
|
48
scraper/Dockerfile
Normal file
48
scraper/Dockerfile
Normal file
@ -0,0 +1,48 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# Comments are provided throughout this file to help you get started.
|
||||
# If you need more help, visit the Dockerfile reference guide at
|
||||
# https://docs.docker.com/go/dockerfile-reference/
|
||||
|
||||
# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
|
||||
|
||||
ARG PYTHON_VERSION=3.13.1
|
||||
FROM python:${PYTHON_VERSION}-slim as base
|
||||
|
||||
# Prevents Python from writing pyc files.
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
# Keeps Python from buffering stdout and stderr to avoid situations where
|
||||
# the application crashes without emitting any logs due to buffering.
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Create a non-privileged user that the app will run under.
|
||||
# See https://docs.docker.com/go/dockerfile-user-best-practices/
|
||||
ARG UID=10001
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--gecos "" \
|
||||
--home "/nonexistent" \
|
||||
--shell "/sbin/nologin" \
|
||||
--no-create-home \
|
||||
--uid "${UID}" \
|
||||
appuser
|
||||
|
||||
# Download dependencies as a separate step to take advantage of Docker's caching.
|
||||
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
|
||||
# Leverage a bind mount to requirements.txt to avoid having to copy them into
|
||||
# into this layer.
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
--mount=type=bind,source=requirements.txt,target=requirements.txt \
|
||||
python -m pip install -r requirements.txt
|
||||
|
||||
# Switch to the non-privileged user to run the application.
|
||||
USER appuser
|
||||
|
||||
# Copy the source code into the container.
|
||||
COPY . .
|
||||
|
||||
# Run the application.
|
||||
CMD python main.py
|
66
scraper/main.py
Normal file
66
scraper/main.py
Normal file
@ -0,0 +1,66 @@
|
||||
import asyncio
|
||||
import os
|
||||
import websockets
|
||||
import json
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
async def relay_websockets(input_websocket, output_websocket, kinds, sub_id):
|
||||
while True:
|
||||
try:
|
||||
# Wait for an event on input websocket
|
||||
event = json.loads(await input_websocket.recv())
|
||||
try:
|
||||
if(event[0] == "EVENT"):
|
||||
print("Received ID: ",event[2]['id']," // Kind: ",event[2]['kind'])
|
||||
# Forward the event to output websocket
|
||||
await output_websocket.send(json.dumps(["EVENT", sub_id, event[2]]))
|
||||
elif(event[0] == "EOSE"):
|
||||
print("End of stream")
|
||||
|
||||
except Exception as error:
|
||||
print(f"Failed to relay event: {error}")
|
||||
if("sent 1011" in str(error)):
|
||||
print("Got Code 1011 -> Closing websockets...")
|
||||
input_websocket.close()
|
||||
output_websocket.close()
|
||||
continue
|
||||
|
||||
except websockets.ConnectionClosed:
|
||||
# If either websocket is closed, attempt to reconnect
|
||||
print("Connection closed, attempting to reconnect...")
|
||||
await asyncio.sleep(1)
|
||||
break
|
||||
|
||||
async def main():
|
||||
print("Scraper started...")
|
||||
# Read the websocket URLs from environment variables
|
||||
input_url = os.environ.get("INPUT_RELAY")
|
||||
output_url = os.environ.get("OUTPUT_RELAY")
|
||||
kinds = os.environ.get("KINDS")
|
||||
|
||||
# If either relay URL is missing, raise an error
|
||||
if not input_url:
|
||||
raise ValueError("Please set the INPUT_RELAY environment variable")
|
||||
if not output_url:
|
||||
raise ValueError("Please set the OUTPUT_RELAY environment variable")
|
||||
|
||||
while True:
|
||||
try:
|
||||
sub_id = str(uuid4())
|
||||
async with websockets.connect(input_url) as input_websocket, \
|
||||
websockets.connect(output_url) as output_websocket:
|
||||
message = f'["REQ", "{sub_id}", {{"kinds": {kinds}}}]'
|
||||
await input_websocket.send(message)
|
||||
await relay_websockets(input_websocket, output_websocket, kinds, sub_id)
|
||||
|
||||
except Exception as error:
|
||||
# If the initial connection attempt fails, attempt to reconnect immediately
|
||||
print(f"Failed to connect: {error}")
|
||||
await asyncio.sleep(1)
|
||||
if "maximum recursion depth exceeded" in str(error):
|
||||
raise RuntimeError("Maximum recursion depth exceeded, crashing application.")
|
||||
continue
|
||||
|
||||
# Start the script
|
||||
asyncio.run(main())
|
1
scraper/requirements.txt
Normal file
1
scraper/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
websockets
|
Reference in New Issue
Block a user