add stable video diffusion, gif conversion

2025-11-18 14:06:55 +01:00 · 2023-12-07 13:58:24 +01:00
parent afde70c672
commit d2db32ed73
6 changed files with 198 additions and 25 deletions
--- a/main.py
+++ b/main.py
@@ -1,12 +1,7 @@
-import json
 import os
-from datetime import timedelta
 from pathlib import Path
 import dotenv
-from nostr_sdk import PublicKey, Options, Client, Keys
-
 from bot.bot import Bot
-from interfaces.dvmtaskinterface import DVMTaskInterface

 import tasks.convert_media as convert_media
 import tasks.discovery_inactive_follows as discovery_inactive_follows
@@ -15,7 +10,7 @@ import tasks.textextraction_pdf as textextraction_pdf
 import tasks.textextraction_google as textextraction_google
 import tasks.translation_google as translation_google
 import tasks.translation_libretranslate as translation_libretranslate
-from tasks import imagegeneration_replicate
+from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd

 from utils.admin_utils import AdminConfig
 from utils.backend_utils import keep_alive
@@ -77,10 +72,14 @@ def playground():
        dalle.run()

    if os.getenv("REPLICATE_API_TOKEN") is not None and os.getenv("REPLICATE_API_TOKEN") != "":
-        sdxlreplicate = imagegeneration_replicate.build_example("Stable Diffusion XL", "replicate_sdxl", admin_config)
+        sdxlreplicate = imagegeneration_replicate_sdxl.build_example("Stable Diffusion XL", "replicate_sdxl", admin_config)
        bot_config.SUPPORTED_DVMS.append(sdxlreplicate)
        sdxlreplicate.run()

+    if os.getenv("REPLICATE_API_TOKEN") is not None and os.getenv("REPLICATE_API_TOKEN") != "":
+        svdreplicate = videogeneration_replicate_svd.build_example("Stable Video Diffusion", "replicate_svd", admin_config)
+        bot_config.SUPPORTED_DVMS.append(svdreplicate)
+        svdreplicate.run()


    #Let's define a function so we can add external DVMs to our bot, we will instanciate it afterwards
--- a/tasks/imagegeneration_replicate_sdxl.py
+++ b/tasks/imagegeneration_replicate_sdxl.py
@@ -26,7 +26,7 @@ Params:
 """


-class ImageGenerationReplicate(DVMTaskInterface):
+class ImageGenerationReplicateSDXL(DVMTaskInterface):
    KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
    TASK: str = "text-to-image"
    FIX_COST: float = 120
@@ -148,7 +148,7 @@ def build_example(name, identifier, admin_config):
                                                              nip89info["image"])
    nip89config.CONTENT = json.dumps(nip89info)
    # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89
-    return ImageGenerationReplicate(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
+    return ImageGenerationReplicateSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)


 if __name__ == '__main__':
--- a/tasks/videogeneration_replicate_svd.py
+++ b/tasks/videogeneration_replicate_svd.py
@@ -0,0 +1,158 @@
+import json
+import os
+from io import BytesIO
+from pathlib import Path
+
+import dotenv
+import requests
+import urllib.request
+from PIL import Image
+
+from interfaces.dvmtaskinterface import DVMTaskInterface
+from utils.admin_utils import AdminConfig
+from utils.backend_utils import keep_alive
+from utils.definitions import EventDefinitions
+from utils.dvmconfig import DVMConfig
+from utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from utils.nostr_utils import check_and_set_private_key
+from utils.output_utils import upload_media_to_hoster
+from utils.zap_utils import get_price_per_sat
+
+"""
+This File contains a Module to transform an image to a short video clip using Stable Video Diffusion with replicate
+
+Accepted Inputs: Prompt (text)
+Outputs: An url to an Image
+Params: 
+"""
+
+
+class VideoGenerationReplicateSVD(DVMTaskInterface):
+    KIND: int = EventDefinitions.KIND_NIP90_GENERATE_VIDEO
+    TASK: str = "image-to-video"
+    FIX_COST: float = 120
+
+    def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+                 admin_config: AdminConfig = None, options=None):
+        super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+    def is_input_supported(self, tags):
+        for tag in tags:
+            if tag.as_vec()[0] == 'i':
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "url":
+                    return False
+        return True
+
+    def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+        request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
+        url = ""
+        frames = 14  # 25
+        if frames == 25:
+            length = "25_frames_with_svd_xt"
+        else:
+            length = "14_frames_with_svd"
+        sizing_strategy = "maintain_aspect_ratio" #crop_to_16_9, use_image_dimensions
+        frames_per_second = 6
+        motion_bucket_id = 127  #Increase overall motion in the generated video
+        cond_aug = 0.02  # Amount of noise to add to input image
+
+        for tag in event.tags():
+            if tag.as_vec()[0] == 'i':
+                input_type = tag.as_vec()[2]
+                if input_type == "url":
+                    url = tag.as_vec()[1]
+        #TODO add params as defined above
+
+        options = {
+            "url": url,
+            "length": length,
+            "sizing_strategy": sizing_strategy,
+            "frames_per_second": frames_per_second,
+            "motion_bucket_id": motion_bucket_id,
+            "cond_aug": cond_aug
+
+        }
+        request_form['options'] = json.dumps(options)
+
+        return request_form
+
+    def process(self, request_form):
+        try:
+            options = DVMTaskInterface.set_options(request_form)
+            print(options["url"])
+            response = requests.get(options["url"])
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+            image.save("./outputs/input.jpg")
+
+            import replicate
+            output = replicate.run(
+                "stability-ai/stable-video-diffusion:3f0457e4619daac51203dedb472816fd4af51f3149fa7a9e0b5ffcf1b8172438",
+                input={"input_image": open("./outputs/input.jpg", "rb"),
+                       "video_length": options["length"],
+                       "sizing_strategy": options["sizing_strategy"],
+                       "frames_per_second": options["frames_per_second"],
+                       "motion_bucket_id": options["motion_bucket_id"],
+                       "cond_aug": options["cond_aug"]
+                       }
+            )
+            print(output)
+
+            urllib.request.urlretrieve(output, "./outputs/svd.mp4")
+            result = upload_media_to_hoster("./outputs/svd.mp4")
+            return result
+
+        except Exception as e:
+            print("Error in Module")
+            raise Exception(e)
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config):
+    dvm_config = DVMConfig()
+    dvm_config.PRIVATE_KEY = check_and_set_private_key(identifier)
+    dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
+    dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
+    profit_in_sats = 10
+    cost_in_cent = 4.0
+    dvm_config.FIX_COST = int(((cost_in_cent / (get_price_per_sat("USD") * 100)) + profit_in_sats))
+
+    nip90params = {
+    }
+    nip89info = {
+        "name": name,
+        "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
+        "about": "I use Replicate to run StableDiffusion XL",
+        "encryptionSupported": True,
+        "cashuAccepted": True,
+        "nip90Params": nip90params
+    }
+
+
+    nip89config = NIP89Config()
+    nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
+                                                              nip89info["image"])
+    nip89config.CONTENT = json.dumps(nip89info)
+    # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89
+    return VideoGenerationReplicateSVD(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
+
+
+if __name__ == '__main__':
+    env_path = Path('.env')
+    if env_path.is_file():
+        print(f'loading environment from {env_path.resolve()}')
+        dotenv.load_dotenv(env_path, verbose=True, override=True)
+    else:
+        raise FileNotFoundError(f'.env file not found at {env_path} ')
+
+    admin_config = AdminConfig()
+    admin_config.REBROADCAST_NIP89 = False
+    admin_config.UPDATE_PROFILE = False
+    admin_config.LUD16 = ""
+
+    dvm = build_example("Stable Video Diffusion", "replicate_svd", admin_config)
+    dvm.run()
+
+    keep_alive()
--- a/utils/backend_utils.py
+++ b/utils/backend_utils.py
@@ -69,9 +69,9 @@ def get_task(event, client, dvm_config):
                            print("found image tag")
                    elif tag.as_vec()[2] == "job":
                        evt = get_referenced_event_by_id(event_id=tag.as_vec()[1], kinds=
-                                                         [EventDefinitions.KIND_NIP90_RESULT_EXTRACT_TEXT,
-                                                          EventDefinitions.KIND_NIP90_RESULT_TRANSLATE_TEXT,
-                                                          EventDefinitions.KIND_NIP90_RESULT_SUMMARIZE_TEXT],
+                        [EventDefinitions.KIND_NIP90_RESULT_EXTRACT_TEXT,
+                         EventDefinitions.KIND_NIP90_RESULT_TRANSLATE_TEXT,
+                         EventDefinitions.KIND_NIP90_RESULT_SUMMARIZE_TEXT],
                                                         client=client,
                                                         dvm_config=dvm_config)
                        if evt is not None:
@@ -88,7 +88,6 @@ def get_task(event, client, dvm_config):
        #  TODO if a task can consist of multiple inputs add them here
        #  This is not ideal. Maybe such events should have their own kind

-
        #  else if kind is supported, simply return task
        else:

@@ -102,6 +101,7 @@ def get_task(event, client, dvm_config):


 def is_input_supported_generic(tags, client, dvm_config) -> bool:
+    # Handle malformed tags, missing events etc here.
    try:
        for tag in tags:
            if tag.as_vec()[0] == 'i':
@@ -129,17 +129,19 @@ def is_input_supported_generic(tags, client, dvm_config) -> bool:
 def check_task_is_supported(event: Event, client, config=None):
    try:
        dvm_config = config
+        # Check for generic issues, event maformed, referenced event not found etc..
+        if not is_input_supported_generic(event.tags(), client, dvm_config):
+            return False, ""
+
+        # See if current dvm supports the task
        task = get_task(event, client=client, dvm_config=dvm_config)
        if task not in (x.TASK for x in dvm_config.SUPPORTED_DVMS):
            return False, task
-
-        if not is_input_supported_generic(event.tags(), client, dvm_config):
-            return False, ""
+        # See if current dvm can handle input for given task
        for dvm in dvm_config.SUPPORTED_DVMS:
            if dvm.TASK == task:
                if not dvm.is_input_supported(event.tags()):
                    return False, task
-
        return True, task


@@ -158,14 +160,17 @@ def check_url_is_readable(url):
        # If link is comaptible with one of these file formats, move on.
        req = requests.get(url)
        content_type = req.headers['content-type']
-        if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(url).endswith(
+        if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(
+                url).endswith(
                ".mp3") or content_type == 'audio/ogg' or str(url).endswith(".ogg"):
            return "audio"
-        elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(url).endswith(
+        elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(
+                url).endswith(
                ".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or content_type == 'image/png' or
              str(url).endswith(".png")):
            return "image"
-        elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(url).endswith(
+        elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(
+                url).endswith(
                ".avi") or content_type == 'video/mov' or str(url).endswith(".mov"):
            return "video"
        elif (str(url)).endswith(".pdf"):
@@ -189,7 +194,6 @@ def get_amount_per_task(task, dvm_config, duration=1):
        return None


-
 def keep_alive():
    try:
        while True:
@@ -197,5 +201,3 @@ def keep_alive():
    except KeyboardInterrupt:
        os.kill(os.getpid(), signal.SIGKILL)
        exit(1)
-
-
--- a/utils/definitions.py
+++ b/utils/definitions.py
@@ -20,6 +20,8 @@ class EventDefinitions:
    KIND_NIP90_RESULT_GENERATE_IMAGE = 6100
    KIND_NIP90_CONVERT_VIDEO = 5200
    KIND_NIP90_RESULT_CONVERT_VIDEO = 6200
+    KIND_NIP90_GENERATE_VIDEO = 5202
+    KIND_NIP90_RESULT_GENERATE_VIDEO = 6202
    KIND_NIP90_CONTENT_DISCOVERY = 5300
    KIND_NIP90_RESULT_CONTENT_DISCOVERY = 6300
    KIND_NIP90_PEOPLE_DISCOVERY = 5301
@@ -38,6 +40,7 @@ class EventDefinitions:
                  KIND_NIP90_RESULT_CONVERT_VIDEO,
                  KIND_NIP90_RESULT_CONTENT_DISCOVERY,
                  KIND_NIP90_RESULT_PEOPLE_DISCOVERY,
+                  KIND_NIP90_RESULT_GENERATE_VIDEO,
                  KIND_NIP90_RESULT_GENERIC]


--- a/utils/mediasource_utils.py
+++ b/utils/mediasource_utils.py
@@ -3,7 +3,7 @@ import urllib
 from datetime import time
 from urllib.parse import urlparse
 import ffmpegio
-from decord import AudioReader, cpu
+from decord import AudioReader, VideoReader, cpu
 import requests
 from utils.nostr_utils import get_event_by_id

@@ -74,7 +74,13 @@ def organize_input_media_data(input_value, input_type, start, end, dvm_config, c
            duration = float(file_reader.duration())
        except Exception as e:
            print(e)
-            return ""
+            try:
+                from moviepy.editor import VideoFileClip
+                clip = VideoFileClip(filename)
+                duration = clip.duration
+            except Exception as e:
+                print(e)
+                return ""

        print("Original Duration of the Media file: " + str(duration))
        start_time, end_time, new_duration = (
@@ -95,6 +101,11 @@ def organize_input_media_data(input_value, input_type, start, end, dvm_config, c
            elif media_format.split('/')[0] == "video":
                print("Converting Video from " + str(start_time) + " until " + str(end_time))
                ffmpegio.transcode(filename, final_filename, overwrite=True, show_log=True)
+            elif media_format.split('/')[1] == "gif":
+                from moviepy.editor import VideoFileClip
+                print("Converting Video from " + str(start_time) + " until " + str(end_time))
+                videoClip = VideoFileClip(filename)
+                videoClip.write_gif(final_filename, program="ffmpeg")
            print(final_filename)
            return final_filename
        else: