mirror of
https://github.com/believethehype/nostrdvm.git
synced 2025-04-01 00:18:04 +02:00
add second task, pdf-to-text
example for kind 5000 event
This commit is contained in:
parent
c450c8544f
commit
535d5c2dd6
@ -5,4 +5,5 @@ USER_DB_PATH = nostrzaps.db
|
||||
LNBITS_INVOICE_KEY = lnbitswalletinvoicekey
|
||||
LNBITS_HOST = https://lnbits.com
|
||||
|
||||
TASK_TEXTEXTRACTION_NIP89_DTAG = "asdd"
|
||||
TASK_TRANSLATION_NIP89_DTAG = abcded
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Nostr Data Vending Machine Python Implementation
|
||||
|
||||
This example DVM implementation in Python currently supports simple translations using Google translate.
|
||||
This example DVM implementation in Python currently supports simple translations using Google translate, as well as extraction of text from links with pdf files.
|
||||
|
||||
At a later stage, additional example tasks will be added, as well as the integration into a larger Machine Learning backend
|
||||
|
||||
|
20
dvm.py
20
dvm.py
@ -4,7 +4,7 @@ import time
|
||||
import emoji
|
||||
from utils.definitions import EventDefinitions, DVMConfig, RequiredJobToWatch, JobToWatch, LOCAL_TASKS
|
||||
from utils.admin_utils import admin_make_database_updates
|
||||
from utils.ai_utils import GoogleTranslate
|
||||
from utils.ai_utils import extract_text_from_pdf, google_translate
|
||||
from utils.backend_utils import get_amount_per_task, check_task_is_supported, get_task
|
||||
from utils.database_utils import update_sql_table, get_from_sql_table, \
|
||||
create_sql_table, get_or_add_user, update_user_balance
|
||||
@ -38,6 +38,7 @@ def dvm(config):
|
||||
|
||||
dm_zap_filter = Filter().pubkey(pk).kinds([EventDefinitions.KIND_ZAP]).since(Timestamp.now())
|
||||
dvm_filter = (Filter().kinds([EventDefinitions.KIND_NIP90_GENERIC,
|
||||
EventDefinitions.KIND_NIP90_EXTRACT_TEXT,
|
||||
EventDefinitions.KIND_NIP90_TRANSLATE_TEXT,
|
||||
]).since(Timestamp.now()))
|
||||
client.subscribe([dm_zap_filter, dvm_filter])
|
||||
@ -60,13 +61,12 @@ def dvm(config):
|
||||
user = get_or_add_user(event.pubkey().to_hex())
|
||||
is_whitelisted = user[2]
|
||||
is_blacklisted = user[3]
|
||||
if is_whitelisted:
|
||||
task_supported, task, duration = check_task_is_supported(event, client=client, get_duration=False,
|
||||
config=dvm_config)
|
||||
print(task)
|
||||
else:
|
||||
task_supported, task, duration = check_task_is_supported(event, client=client, get_duration=True,
|
||||
config=dvm_config)
|
||||
|
||||
task_supported, task, duration = check_task_is_supported(event, client=client,
|
||||
get_duration=(not is_whitelisted),
|
||||
config=dvm_config)
|
||||
print(task)
|
||||
|
||||
if is_blacklisted:
|
||||
send_job_status_reaction(event, "error", client=client, config=dvm_config)
|
||||
print("[Nostr] Request by blacklisted user, skipped")
|
||||
@ -231,7 +231,9 @@ def dvm(config):
|
||||
options = dict(opts)
|
||||
|
||||
if task == "translation":
|
||||
result = GoogleTranslate(options["text"], options["translation_lang"])
|
||||
result = google_translate(options["text"], options["translation_lang"])
|
||||
elif task == "pdf-to-text":
|
||||
result = extract_text_from_pdf(options["url"])
|
||||
# TODO ADD FURTHER LOCAL TASKS HERE
|
||||
|
||||
check_and_return_event(result, str(job_event.as_json()),
|
||||
|
22
main.py
22
main.py
@ -11,21 +11,29 @@ def run_nostr_dvm_with_local_config():
|
||||
from dvm import dvm, DVMConfig
|
||||
from utils.nip89_utils import NIP89Announcement
|
||||
|
||||
|
||||
dvmconfig = DVMConfig()
|
||||
dvmconfig.PRIVATE_KEY = os.getenv(env.NOSTR_PRIVATE_KEY)
|
||||
dvmconfig.SUPPORTED_TASKS = ["translation"]
|
||||
dvmconfig.SUPPORTED_TASKS = ["translation", "pdf-to-text"]
|
||||
dvmconfig.LNBITS_INVOICE_KEY = os.getenv(env.LNBITS_INVOICE_KEY)
|
||||
dvmconfig.LNBITS_URL = os.getenv(env.LNBITS_HOST)
|
||||
|
||||
# In admin_utils, set rebroadcast_nip89 to true to (re)broadcast your DVM. You can create a valid dtag and the content on vendata.io
|
||||
# Add the dtag in your .env file so you can update your dvm later and change the content here as needed.
|
||||
|
||||
nip89extraction = NIP89Announcement()
|
||||
nip89extraction.kind = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
|
||||
nip89extraction.dtag = os.getenv(env.TASK_TRANSLATION_NIP89_DTAG)
|
||||
nip89extraction.pk = os.getenv(env.NOSTR_PRIVATE_KEY)
|
||||
nip89extraction.content = "{\"name\":\"Pdf Extractor\",\"image\":\"https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg\",\"about\":\"I extract Text from pdf documents\",\"nip90Params\":{}}"
|
||||
dvmconfig.NIP89s.append(nip89extraction)
|
||||
|
||||
nip89translation = NIP89Announcement()
|
||||
nip89translation.kind = EventDefinitions.KIND_NIP90_TRANSLATE_TEXT
|
||||
nip89translation.dtag = os.getenv(env.TASK_TRANSLATION_NIP89_DTAG)
|
||||
nip89translation.pk = os.getenv(env.NOSTR_PRIVATE_KEY)
|
||||
nip89translation.content = "{\"name\":\"NostrAI DVM Translator\",\"image\":\"https://cdn.nostr.build/i/feb98d8700abe7d6c67d9106a72a20354bf50805af79869638f5a32d24a5ac2a.jpg\",\"about\":\"Translates Text from given text/event/job, currently using Google Translation Services into language defined in param. \",\"nip90Params\":{\"language\":{\"required\":true,\"values\":[\"af\",\"am\",\"ar\",\"az\",\"be\",\"bg\",\"bn\",\"bs\",\"ca\",\"ceb\",\"co\",\"cs\",\"cy\",\"da\",\"de\",\"el\",\"eo\",\"es\",\"et\",\"eu\",\"fa\",\"fi\",\"fr\",\"fy\",\"ga\",\"gd\",\"gl\",\"gu\",\"ha\",\"haw\",\"hi\",\"hmn\",\"hr\",\"ht\",\"hu\",\"hy\",\"id\",\"ig\",\"is\",\"it\",\"he\",\"ja\",\"jv\",\"ka\",\"kk\",\"km\",\"kn\",\"ko\",\"ku\",\"ky\",\"la\",\"lb\",\"lo\",\"lt\",\"lv\",\"mg\",\"mi\",\"mk\",\"ml\",\"mn\",\"mr\",\"ms\",\"mt\",\"my\",\"ne\",\"nl\",\"no\",\"ny\",\"or\",\"pa\",\"pl\",\"ps\",\"pt\",\"ro\",\"ru\",\"sd\",\"si\",\"sk\",\"sl\",\"sm\",\"sn\",\"so\",\"sq\",\"sr\",\"st\",\"su\",\"sv\",\"sw\",\"ta\",\"te\",\"tg\",\"th\",\"tl\",\"tr\",\"ug\",\"uk\",\"ur\",\"uz\",\"vi\",\"xh\",\"yi\",\"yo\",\"zh\",\"zu\"]}}}"
|
||||
nip89translation.content = "{\"name\":\"Translator\",\"image\":\"https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg\",\"about\":\"I translate text from given text/event/job, currently using Google Translation Services into language defined in param. \",\"nip90Params\":{\"language\":{\"required\":true,\"values\":[\"af\",\"am\",\"ar\",\"az\",\"be\",\"bg\",\"bn\",\"bs\",\"ca\",\"ceb\",\"co\",\"cs\",\"cy\",\"da\",\"de\",\"el\",\"eo\",\"es\",\"et\",\"eu\",\"fa\",\"fi\",\"fr\",\"fy\",\"ga\",\"gd\",\"gl\",\"gu\",\"ha\",\"haw\",\"hi\",\"hmn\",\"hr\",\"ht\",\"hu\",\"hy\",\"id\",\"ig\",\"is\",\"it\",\"he\",\"ja\",\"jv\",\"ka\",\"kk\",\"km\",\"kn\",\"ko\",\"ku\",\"ky\",\"la\",\"lb\",\"lo\",\"lt\",\"lv\",\"mg\",\"mi\",\"mk\",\"ml\",\"mn\",\"mr\",\"ms\",\"mt\",\"my\",\"ne\",\"nl\",\"no\",\"ny\",\"or\",\"pa\",\"pl\",\"ps\",\"pt\",\"ro\",\"ru\",\"sd\",\"si\",\"sk\",\"sl\",\"sm\",\"sn\",\"so\",\"sq\",\"sr\",\"st\",\"su\",\"sv\",\"sw\",\"ta\",\"te\",\"tg\",\"th\",\"tl\",\"tr\",\"ug\",\"uk\",\"ur\",\"uz\",\"vi\",\"xh\",\"yi\",\"yo\",\"zh\",\"zu\"]}}}"
|
||||
dvmconfig.NIP89s.append(nip89translation)
|
||||
|
||||
|
||||
nostr_dvm_thread = Thread(target=dvm, args=[dvmconfig])
|
||||
nostr_dvm_thread.start()
|
||||
|
||||
@ -39,10 +47,4 @@ if __name__ == '__main__':
|
||||
else:
|
||||
raise FileNotFoundError(f'.env file not found at {env_path} ')
|
||||
|
||||
|
||||
|
||||
run_nostr_dvm_with_local_config()
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -30,3 +30,37 @@ translatepy==2.3
|
||||
tzdata==2023.3
|
||||
urllib3==2.1.0
|
||||
wcwidth==0.2.10
|
||||
(venv) tobias@Tobiass-MacBook-Pro-2 dvm % pip freeze
|
||||
beautifulsoup4==4.12.2
|
||||
bech32==1.2.0
|
||||
blessed==1.20.0
|
||||
certifi==2023.7.22
|
||||
charset-normalizer==3.3.2
|
||||
emoji==2.8.0
|
||||
ffmpegio==0.8.5
|
||||
ffmpegio-core==0.8.5
|
||||
idna==3.4
|
||||
inquirer==3.1.3
|
||||
install==1.3.5
|
||||
nostr-sdk==0.0.4
|
||||
numpy==1.26.2
|
||||
packaging==23.2
|
||||
pandas==2.1.3
|
||||
Pillow==10.1.0
|
||||
pluggy==1.3.0
|
||||
pycryptodome==3.19.0
|
||||
pypdf==3.17.1
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-editor==1.0.4
|
||||
pytz==2023.3.post1
|
||||
pyuseragents==1.0.5
|
||||
readchar==4.0.5
|
||||
requests==2.31.0
|
||||
safeIO==1.2
|
||||
six==1.16.0
|
||||
soupsieve==2.5
|
||||
translatepy==2.3
|
||||
tzdata==2023.3
|
||||
urllib3==2.1.0
|
||||
wcwidth==0.2.10
|
||||
|
@ -3,8 +3,8 @@ import time
|
||||
|
||||
from nostr_sdk import Keys, EventBuilder, PublicKey
|
||||
|
||||
from utils.database_utils import get_from_sql_table, list_db, clear_db, delete_from_sql_table, update_sql_table, \
|
||||
get_or_add_user, update_user_metadata
|
||||
from utils.database_utils import get_from_sql_table, list_db, delete_from_sql_table, update_sql_table, \
|
||||
get_or_add_user, clean_db
|
||||
from utils.nip89_utils import nip89_announce_tasks
|
||||
from utils.nostr_utils import send_event
|
||||
|
||||
@ -14,7 +14,7 @@ def admin_make_database_updates(config=None, client=None):
|
||||
dvmconfig = config
|
||||
|
||||
rebroadcast_nip89 = False
|
||||
cleardb = False
|
||||
cleandb = False
|
||||
listdatabase = False
|
||||
deleteuser = False
|
||||
whitelistuser = False
|
||||
@ -57,8 +57,8 @@ def admin_make_database_updates(config=None, client=None):
|
||||
if deleteuser:
|
||||
delete_from_sql_table(publickey)
|
||||
|
||||
if cleardb:
|
||||
clear_db()
|
||||
if cleandb:
|
||||
clean_db()
|
||||
|
||||
if listdatabase:
|
||||
list_db()
|
||||
|
@ -1,3 +1,5 @@
|
||||
import os
|
||||
|
||||
|
||||
#We can add multiple Tasks here and call them in the do_work function.
|
||||
|
||||
@ -5,7 +7,7 @@
|
||||
#the according event type in the beginning of dvm.py and
|
||||
|
||||
|
||||
def GoogleTranslate(text, translation_lang):
|
||||
def google_translate(text, translation_lang):
|
||||
from translatepy.translators.google import GoogleTranslate
|
||||
gtranslate = GoogleTranslate()
|
||||
length = len(text)
|
||||
@ -36,3 +38,21 @@ def GoogleTranslate(text, translation_lang):
|
||||
|
||||
|
||||
return translated_text
|
||||
|
||||
def extract_text_from_pdf(url):
|
||||
from pypdf import PdfReader
|
||||
from pathlib import Path
|
||||
import requests
|
||||
file_path = Path('temp.pdf')
|
||||
response = requests.get(url)
|
||||
file_path.write_bytes(response.content)
|
||||
reader = PdfReader(file_path)
|
||||
number_of_pages = len(reader.pages)
|
||||
text = ""
|
||||
for page_num in range(number_of_pages):
|
||||
page = reader.pages[page_num]
|
||||
text = text + page.extract_text()
|
||||
|
||||
os.remove('temp.pdf')
|
||||
return text
|
||||
|
||||
|
@ -19,6 +19,30 @@ def get_task(event, client, dvmconfig):
|
||||
else:
|
||||
return "unknown job: " + event.as_json()
|
||||
|
||||
elif event.kind() == EventDefinitions.KIND_NIP90_EXTRACT_TEXT:
|
||||
for tag in event.tags():
|
||||
if tag.as_vec()[0] == "i":
|
||||
if tag.as_vec()[2] == "url":
|
||||
file_type = check_url_is_readable(tag.as_vec()[1])
|
||||
if file_type == "pdf":
|
||||
return "pdf-to-text"
|
||||
else:
|
||||
return "unknown job"
|
||||
elif tag.as_vec()[2] == "event":
|
||||
evt = get_event_by_id(tag.as_vec()[1],config=dvmconfig)
|
||||
if evt is not None:
|
||||
if evt.kind() == 1063:
|
||||
for tag in evt.tags():
|
||||
if tag.as_vec()[0] == 'url':
|
||||
file_type = check_url_is_readable(tag.as_vec()[1])
|
||||
if file_type == "pdf":
|
||||
return "pdf-to-text"
|
||||
else:
|
||||
return "unknown job"
|
||||
else:
|
||||
return "unknown type"
|
||||
|
||||
|
||||
elif event.kind() == EventDefinitions.KIND_NIP90_TRANSLATE_TEXT:
|
||||
return "translation"
|
||||
|
||||
@ -58,6 +82,8 @@ def check_task_is_supported(event, client, get_duration = False, config=None):
|
||||
print("No output set")
|
||||
if task not in dvmconfig.SUPPORTED_TASKS: # The Tasks this DVM supports (can be extended)
|
||||
return False, task, duration
|
||||
|
||||
|
||||
elif task == "translation" and (
|
||||
input_type != "event" and input_type != "job" and input_type != "text"): # The input types per task
|
||||
return False, task, duration
|
||||
@ -88,12 +114,15 @@ def check_url_is_readable(url):
|
||||
".mp3") or content_type == 'audio/ogg' or str(url).endswith(".ogg"):
|
||||
return "audio"
|
||||
elif content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(url).endswith(
|
||||
".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or str(url).endswith(".pdf") or content_type == 'image/png' or str(
|
||||
".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or content_type == 'image/png' or str(
|
||||
url).endswith(".png"):
|
||||
return "image"
|
||||
elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(url).endswith(
|
||||
".avi") or content_type == 'video/mov' or str(url).endswith(".mov"):
|
||||
return "video"
|
||||
elif (str(url)).endswith(".pdf"):
|
||||
return "pdf"
|
||||
|
||||
# Otherwise we will not offer to do the job.
|
||||
return None
|
||||
|
||||
@ -101,6 +130,9 @@ def get_amount_per_task(task, duration = 0, config=None):
|
||||
dvmconfig = config
|
||||
if task == "translation":
|
||||
amount = dvmconfig.COSTPERUNIT_TRANSLATION
|
||||
elif task == "pdf-to-text":
|
||||
amount = dvmconfig.COSTPERUNIT_TEXT_EXTRACTION
|
||||
|
||||
else:
|
||||
print("[Nostr] Task " + task + " is currently not supported by this instance, skipping")
|
||||
return None
|
||||
|
@ -101,7 +101,7 @@ def delete_from_sql_table(npub):
|
||||
print(e)
|
||||
|
||||
|
||||
def clear_db():
|
||||
def clean_db():
|
||||
try:
|
||||
con = sqlite3.connect(os.getenv(env.USER_DB_PATH))
|
||||
cur = con.cursor()
|
||||
|
@ -3,7 +3,7 @@ from dataclasses import dataclass
|
||||
from nostr_sdk import Event
|
||||
NEW_USER_BALANCE = 250
|
||||
|
||||
LOCAL_TASKS = ["conversion", "summarization","note-recommendation", "inactive-following", "image-upscale", "translation"]
|
||||
LOCAL_TASKS = ["pdf-to-text", "translation"]
|
||||
# Tasks performed by the DVM and not send to nova-server (can change later)
|
||||
|
||||
RELAY_LIST = ["wss://relay.damus.io", "wss://nostr-pub.wellorder.net", "wss://nos.lol", "wss://nostr.wine",
|
||||
@ -54,7 +54,8 @@ class DVMConfig:
|
||||
SHOWRESULTBEFOREPAYMENT: bool = True # if this is true show results even when not paid right after autoprocess
|
||||
NEW_USER_BALANCE: int = 250 # Free credits for new users
|
||||
|
||||
COSTPERUNIT_TRANSLATION: int = 20 # Still need to multiply this by duration
|
||||
COSTPERUNIT_TRANSLATION: int = 20
|
||||
COSTPERUNIT_TEXT_EXTRACTION: int = 20
|
||||
|
||||
NIP89s: list = []
|
||||
|
||||
|
@ -7,5 +7,6 @@ LNBITS_INVOICE_KEY = "LNBITS_INVOICE_KEY"
|
||||
LNBITS_HOST = "LNBITS_HOST"
|
||||
|
||||
TASK_TRANSLATION_NIP89_DTAG = "TASK_TRANSLATION_NIP89_DTAG"
|
||||
TASK_TEXTEXTRACTION_NIP89_DTAG = "TASK_TEXTEXTRACTION_NIP89_DTAG"
|
||||
|
||||
|
||||
|
@ -62,4 +62,29 @@ def create_requestform_from_nostr_event(event, is_bot=False, client=None, dvmcon
|
||||
text.replace('\U0001f919', "").replace("=", "equals").
|
||||
replace(";", ","))
|
||||
|
||||
|
||||
|
||||
|
||||
elif task == "pdf-to-text":
|
||||
input_type = "url"
|
||||
input_content = ""
|
||||
url = ""
|
||||
for tag in event.tags():
|
||||
if tag.as_vec()[0] == 'i':
|
||||
input_type = tag.as_vec()[2]
|
||||
input_content = tag.as_vec()[1]
|
||||
|
||||
if input_type == "url":
|
||||
url = input_content
|
||||
elif input_type == "event":
|
||||
evt = get_event_by_id(input_content, config=dvmconfig)
|
||||
url = re.search("(?P<url>https?://[^\s]+)", evt.content()).group("url")
|
||||
elif input_type == "job":
|
||||
evt = get_referenced_event_by_id(input_content, [EventDefinitions.KIND_NIP90_RESULT_GENERATE_IMAGE],
|
||||
client, config=dvmconfig)
|
||||
|
||||
url = re.search("(?P<url>https?://[^\s]+)", evt.content()).group("url")
|
||||
|
||||
request_form["optStr"] = 'url=' + url
|
||||
|
||||
return request_form
|
||||
|
Loading…
x
Reference in New Issue
Block a user