added image intgerrogator

2025-11-19 16:26:26 +01:00 · 2023-12-04 21:41:18 +01:00
parent 7006da9eec
commit 93e4e48b3f
4 changed files with 172 additions and 1 deletions
--- a/backends/nova_server.py
+++ b/backends/nova_server.py
@@ -1,6 +1,7 @@
 import io
 import json
 import os
 import re
 import time
 import zipfile
 from pathlib import Path
@@ -96,10 +97,13 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
                image = Image.open(io.BytesIO(response.content))
                image.save("./outputs/image.jpg")
                result = upload_media_to_hoster("./outputs/image.jpg")
                return result
                os.remove("./outputs/image.jpg")
                return result
            elif content_type == 'text/plain; charset=utf-8':
                result = response.content.decode('utf-8')
                # TODO: This should not be necessary?
                result = result.replace("  ", "#").replace(" ", "").replace("#", " ")
                return result
            elif content_type == "application/x-zip-compressed":
                zf = zipfile.ZipFile(io.BytesIO(response.content), "r")
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -272,6 +272,7 @@ class Bot:
                                    send_event(evt, client=self.client, dvm_config=dvm_config)
                                else:
                                    print("Bot payment-required")
                                    time.sleep(2.0)
                                    evt = EventBuilder.new_encrypted_direct_msg(self.keys,
                                                                                PublicKey.from_hex(entry["npub"]),
                                                                                "Current balance: " + str(
--- a/tasks/imageinterrogator.py
+++ b/tasks/imageinterrogator.py
@@ -0,0 +1,164 @@
 import json
 import os
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 import dotenv
 from backends.nova_server import check_nova_server_status, send_request_to_nova_server
 from interfaces.dvmtaskinterface import DVMTaskInterface
 from utils.admin_utils import AdminConfig
 from utils.backend_utils import keep_alive
 from utils.dvmconfig import DVMConfig
 from utils.nip89_utils import NIP89Config, check_and_set_d_tag
 from utils.definitions import EventDefinitions
 from utils.nostr_utils import check_and_set_private_key
 """
 This File contains a Module to extract a prompt from an image from an url.
 Accepted Inputs: link to image (url)
 Outputs: An textual description of the image
 """
 class ImageInterrogator(DVMTaskInterface):
    KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
    TASK: str = "image-to-text"
    FIX_COST: float = 80
    def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
                 admin_config: AdminConfig = None, options=None):
        super().__init__(name, dvm_config, nip89config, admin_config, options)
    def is_input_supported(self, tags):
        hasurl = False
        for tag in tags:
            if tag.as_vec()[0] == 'i':
                input_value = tag.as_vec()[1]
                input_type = tag.as_vec()[2]
                if input_type == "url":
                    hasurl = True
        if not hasurl:
            return False
        return True
    def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
        request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
        request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer'
        url = ""
        method = "prompt"
        mode = "best"
        for tag in event.tags():
            if tag.as_vec()[0] == 'i':
                input_type = tag.as_vec()[2]
                if input_type == "url":
                    url = tag.as_vec()[1]
            elif tag.as_vec()[0] == 'param':
                print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
                if tag.as_vec()[1] == "method":
                    method = tag.as_vec()[2]
                elif tag.as_vec()[1] == "mode":
                    mode = tag.as_vec()[2]
        io_input_image = {
        "id": "input_image",
        "type": "input",
        "src": "url:Image",
        "uri": url
        }
        io_output = {
            "id": "output",
            "type": "output",
            "src": "request:text"
        }
        request_form['data'] = json.dumps([io_input_image, io_output])
        options = {
            "kind": method,
            "mode": mode
        }
        request_form['options'] = json.dumps(options)
        return request_form
    def process(self, request_form):
        try:
            # Call the process route of NOVA-Server with our request form.
            response = send_request_to_nova_server(request_form, self.options['nova_server'])
            if bool(json.loads(response)['success']):
                print("Job " + request_form['jobID'] + " sent to NOVA-server")
            pool = ThreadPool(processes=1)
            thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
            print("Wait for results of NOVA-Server...")
            result = thread.get()
            return result
        except Exception as e:
            raise Exception(e)
 # We build an example here that we can call by either calling this file directly from the main directory,
 # or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
 # playground or elsewhere
 def build_example(name, identifier, admin_config, server_address):
    dvm_config = DVMConfig()
    dvm_config.PRIVATE_KEY = check_and_set_private_key(identifier)
    dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
    dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
    nip90params = {
        "method": {
            "required": False,
            "values": ["prompt", "analysis"]
        },
        "mode": {
            "required": False,
            "values": ["best", "classic", "fast", "negative"]
        }
    }
    nip89info = {
        "name": name,
        "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
        "about": "I analyse Images an return a prompt or a prompt analysis",
        "encryptionSupported": True,
        "cashuAccepted": True,
        "nip90Params": nip90params
    }
    # A module might have options it can be initialized with, here we set a default model, lora and the nova-server
    options = {'nova_server': server_address}
    nip89config = NIP89Config()
    nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
                                                              nip89info["image"])
    nip89config.CONTENT = json.dumps(nip89info)
    # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89
    return ImageInterrogator(name=name, dvm_config=dvm_config, nip89config=nip89config,
                                      admin_config=admin_config, options=options)
 if __name__ == '__main__':
    env_path = Path('.env')
    if env_path.is_file():
        print(f'loading environment from {env_path.resolve()}')
        dotenv.load_dotenv(env_path, verbose=True, override=True)
    else:
        raise FileNotFoundError(f'.env file not found at {env_path} ')
    admin_config = AdminConfig()
    admin_config.REBROADCAST_NIP89 = False
    admin_config.UPDATE_PROFILE = False
    admin_config.LUD16 = ""
    dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("NOVA_SERVER"))
    dvm.run()
    keep_alive()
--- a/utils/backend_utils.py
+++ b/utils/backend_utils.py
@@ -36,6 +36,8 @@ def get_task(event, client, dvm_config):
                            return "pdf-to-text"
                        elif file_type == "audio" or file_type == "video":
                            return "speech-to-text"
                        elif file_type == "image":
                            return "image-to-text"
                        else:
                            return "unknown job"
                    elif tag.as_vec()[2] == "event":