diff --git a/backends/nova_server.py b/backends/nova_server.py index 117295c..d8ba5c2 100644 --- a/backends/nova_server.py +++ b/backends/nova_server.py @@ -1,6 +1,7 @@ import io import json import os +import re import time import zipfile from pathlib import Path @@ -96,10 +97,13 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame: image = Image.open(io.BytesIO(response.content)) image.save("./outputs/image.jpg") result = upload_media_to_hoster("./outputs/image.jpg") - return result os.remove("./outputs/image.jpg") + return result + elif content_type == 'text/plain; charset=utf-8': result = response.content.decode('utf-8') + # TODO: This should not be necessary? + result = result.replace(" ", "#").replace(" ", "").replace("#", " ") return result elif content_type == "application/x-zip-compressed": zf = zipfile.ZipFile(io.BytesIO(response.content), "r") diff --git a/bot/bot.py b/bot/bot.py index 821d636..964f2b2 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -272,6 +272,7 @@ class Bot: send_event(evt, client=self.client, dvm_config=dvm_config) else: print("Bot payment-required") + time.sleep(2.0) evt = EventBuilder.new_encrypted_direct_msg(self.keys, PublicKey.from_hex(entry["npub"]), "Current balance: " + str( diff --git a/tasks/imageinterrogator.py b/tasks/imageinterrogator.py new file mode 100644 index 0000000..493be36 --- /dev/null +++ b/tasks/imageinterrogator.py @@ -0,0 +1,164 @@ +import json +import os +from multiprocessing.pool import ThreadPool +from pathlib import Path + +import dotenv + +from backends.nova_server import check_nova_server_status, send_request_to_nova_server +from interfaces.dvmtaskinterface import DVMTaskInterface +from utils.admin_utils import AdminConfig +from utils.backend_utils import keep_alive +from utils.dvmconfig import DVMConfig +from utils.nip89_utils import NIP89Config, check_and_set_d_tag +from utils.definitions import EventDefinitions +from utils.nostr_utils import check_and_set_private_key + +""" +This File contains a Module to extract a prompt from an image from an url. + +Accepted Inputs: link to image (url) +Outputs: An textual description of the image + +""" + + +class ImageInterrogator(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT + TASK: str = "image-to-text" + FIX_COST: float = 80 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + hasurl = False + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type == "url": + hasurl = True + + if not hasurl: + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer' + url = "" + method = "prompt" + mode = "best" + + + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "url": + url = tag.as_vec()[1] + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "method": + method = tag.as_vec()[2] + elif tag.as_vec()[1] == "mode": + mode = tag.as_vec()[2] + + io_input_image = { + "id": "input_image", + "type": "input", + "src": "url:Image", + "uri": url + } + + io_output = { + "id": "output", + "type": "output", + "src": "request:text" + } + + request_form['data'] = json.dumps([io_input_image, io_output]) + + options = { + "kind": method, + "mode": mode + + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of NOVA-Server with our request form. + response = send_request_to_nova_server(request_form, self.options['nova_server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to NOVA-server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server'])) + print("Wait for results of NOVA-Server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address): + dvm_config = DVMConfig() + dvm_config.PRIVATE_KEY = check_and_set_private_key(identifier) + dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY") + dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST") + + nip90params = { + "method": { + "required": False, + "values": ["prompt", "analysis"] + }, + "mode": { + "required": False, + "values": ["best", "classic", "fast", "negative"] + } + } + nip89info = { + "name": name, + "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg", + "about": "I analyse Images an return a prompt or a prompt analysis", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": nip90params + } + + # A module might have options it can be initialized with, here we set a default model, lora and the nova-server + options = {'nova_server': server_address} + + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, + nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89 + return ImageInterrogator(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +if __name__ == '__main__': + env_path = Path('.env') + if env_path.is_file(): + print(f'loading environment from {env_path.resolve()}') + dotenv.load_dotenv(env_path, verbose=True, override=True) + else: + raise FileNotFoundError(f'.env file not found at {env_path} ') + + admin_config = AdminConfig() + admin_config.REBROADCAST_NIP89 = False + admin_config.UPDATE_PROFILE = False + admin_config.LUD16 = "" + dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("NOVA_SERVER")) + dvm.run() + + keep_alive() diff --git a/utils/backend_utils.py b/utils/backend_utils.py index ce220eb..a6ee554 100644 --- a/utils/backend_utils.py +++ b/utils/backend_utils.py @@ -36,6 +36,8 @@ def get_task(event, client, dvm_config): return "pdf-to-text" elif file_type == "audio" or file_type == "video": return "speech-to-text" + elif file_type == "image": + return "image-to-text" else: return "unknown job" elif tag.as_vec()[2] == "event":