add stable video diffusion, gif conversion

2025-06-06 18:39:12 +02:00 · 2023-12-07 13:58:24 +01:00 · 2023-12-07 13:58:24 +01:00 · d2db32ed73
commit d2db32ed73
parent afde70c672
6 changed files with 198 additions and 25 deletions
--- a/main.py
+++ b/main.py
@ -1,12 +1,7 @@
 import json
 import os
 from datetime import timedelta
 from pathlib import Path
 import dotenv
 from nostr_sdk import PublicKey, Options, Client, Keys
 from bot.bot import Bot
 from interfaces.dvmtaskinterface import DVMTaskInterface
 import tasks.convert_media as convert_media
 import tasks.discovery_inactive_follows as discovery_inactive_follows
@ -15,7 +10,7 @@ import tasks.textextraction_pdf as textextraction_pdf
 import tasks.textextraction_google as textextraction_google
 import tasks.translation_google as translation_google
 import tasks.translation_libretranslate as translation_libretranslate
-from tasks import imagegeneration_replicate
+from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd
 from utils.admin_utils import AdminConfig
 from utils.backend_utils import keep_alive
@ -77,10 +72,14 @@ def playground():
        dalle.run()
    if os.getenv("REPLICATE_API_TOKEN") is not None and os.getenv("REPLICATE_API_TOKEN") != "":
-        sdxlreplicate = imagegeneration_replicate.build_example("Stable Diffusion XL", "replicate_sdxl", admin_config)
+        sdxlreplicate = imagegeneration_replicate_sdxl.build_example("Stable Diffusion XL", "replicate_sdxl", admin_config)
        bot_config.SUPPORTED_DVMS.append(sdxlreplicate)
        sdxlreplicate.run()
    if os.getenv("REPLICATE_API_TOKEN") is not None and os.getenv("REPLICATE_API_TOKEN") != "":
        svdreplicate = videogeneration_replicate_svd.build_example("Stable Video Diffusion", "replicate_svd", admin_config)
        bot_config.SUPPORTED_DVMS.append(svdreplicate)
        svdreplicate.run()
    #Let's define a function so we can add external DVMs to our bot, we will instanciate it afterwards
--- a/tasks/imagegeneration_replicate_sdxl.py
+++ b/tasks/imagegeneration_replicate_sdxl.py
@ -26,7 +26,7 @@ Params:
 """
-class ImageGenerationReplicate(DVMTaskInterface):
+class ImageGenerationReplicateSDXL(DVMTaskInterface):
    KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
    TASK: str = "text-to-image"
    FIX_COST: float = 120
@ -148,7 +148,7 @@ def build_example(name, identifier, admin_config):
                                                              nip89info["image"])
    nip89config.CONTENT = json.dumps(nip89info)
    # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89
-    return ImageGenerationReplicate(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
+    return ImageGenerationReplicateSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
 if __name__ == '__main__':
--- a/tasks/videogeneration_replicate_svd.py
+++ b/tasks/videogeneration_replicate_svd.py
@ -0,0 +1,158 @@
 import json
 import os
 from io import BytesIO
 from pathlib import Path
 import dotenv
 import requests
 import urllib.request
 from PIL import Image
 from interfaces.dvmtaskinterface import DVMTaskInterface
 from utils.admin_utils import AdminConfig
 from utils.backend_utils import keep_alive
 from utils.definitions import EventDefinitions
 from utils.dvmconfig import DVMConfig
 from utils.nip89_utils import NIP89Config, check_and_set_d_tag
 from utils.nostr_utils import check_and_set_private_key
 from utils.output_utils import upload_media_to_hoster
 from utils.zap_utils import get_price_per_sat
 """
 This File contains a Module to transform an image to a short video clip using Stable Video Diffusion with replicate
 Accepted Inputs: Prompt (text)
 Outputs: An url to an Image
 Params: 
 """
 class VideoGenerationReplicateSVD(DVMTaskInterface):
    KIND: int = EventDefinitions.KIND_NIP90_GENERATE_VIDEO
    TASK: str = "image-to-video"
    FIX_COST: float = 120
    def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
                 admin_config: AdminConfig = None, options=None):
        super().__init__(name, dvm_config, nip89config, admin_config, options)
    def is_input_supported(self, tags):
        for tag in tags:
            if tag.as_vec()[0] == 'i':
                input_value = tag.as_vec()[1]
                input_type = tag.as_vec()[2]
                if input_type != "url":
                    return False
        return True
    def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
        request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
        url = ""
        frames = 14  # 25
        if frames == 25:
            length = "25_frames_with_svd_xt"
        else:
            length = "14_frames_with_svd"
        sizing_strategy = "maintain_aspect_ratio" #crop_to_16_9, use_image_dimensions
        frames_per_second = 6
        motion_bucket_id = 127  #Increase overall motion in the generated video
        cond_aug = 0.02  # Amount of noise to add to input image
        for tag in event.tags():
            if tag.as_vec()[0] == 'i':
                input_type = tag.as_vec()[2]
                if input_type == "url":
                    url = tag.as_vec()[1]
        #TODO add params as defined above
        options = {
            "url": url,
            "length": length,
            "sizing_strategy": sizing_strategy,
            "frames_per_second": frames_per_second,
            "motion_bucket_id": motion_bucket_id,
            "cond_aug": cond_aug
        }
        request_form['options'] = json.dumps(options)
        return request_form
    def process(self, request_form):
        try:
            options = DVMTaskInterface.set_options(request_form)
            print(options["url"])
            response = requests.get(options["url"])
            image = Image.open(BytesIO(response.content)).convert("RGB")
            image.save("./outputs/input.jpg")
            import replicate
            output = replicate.run(
                "stability-ai/stable-video-diffusion:3f0457e4619daac51203dedb472816fd4af51f3149fa7a9e0b5ffcf1b8172438",
                input={"input_image": open("./outputs/input.jpg", "rb"),
                       "video_length": options["length"],
                       "sizing_strategy": options["sizing_strategy"],
                       "frames_per_second": options["frames_per_second"],
                       "motion_bucket_id": options["motion_bucket_id"],
                       "cond_aug": options["cond_aug"]
                       }
            )
            print(output)
            urllib.request.urlretrieve(output, "./outputs/svd.mp4")
            result = upload_media_to_hoster("./outputs/svd.mp4")
            return result
        except Exception as e:
            print("Error in Module")
            raise Exception(e)
 # We build an example here that we can call by either calling this file directly from the main directory,
 # or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
 # playground or elsewhere
 def build_example(name, identifier, admin_config):
    dvm_config = DVMConfig()
    dvm_config.PRIVATE_KEY = check_and_set_private_key(identifier)
    dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
    dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
    profit_in_sats = 10
    cost_in_cent = 4.0
    dvm_config.FIX_COST = int(((cost_in_cent / (get_price_per_sat("USD") * 100)) + profit_in_sats))
    nip90params = {
    }
    nip89info = {
        "name": name,
        "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
        "about": "I use Replicate to run StableDiffusion XL",
        "encryptionSupported": True,
        "cashuAccepted": True,
        "nip90Params": nip90params
    }
    nip89config = NIP89Config()
    nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
                                                              nip89info["image"])
    nip89config.CONTENT = json.dumps(nip89info)
    # We add an optional AdminConfig for this one, and tell the dvm to rebroadcast its NIP89
    return VideoGenerationReplicateSVD(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
 if __name__ == '__main__':
    env_path = Path('.env')
    if env_path.is_file():
        print(f'loading environment from {env_path.resolve()}')
        dotenv.load_dotenv(env_path, verbose=True, override=True)
    else:
        raise FileNotFoundError(f'.env file not found at {env_path} ')
    admin_config = AdminConfig()
    admin_config.REBROADCAST_NIP89 = False
    admin_config.UPDATE_PROFILE = False
    admin_config.LUD16 = ""
    dvm = build_example("Stable Video Diffusion", "replicate_svd", admin_config)
    dvm.run()
    keep_alive()
--- a/utils/backend_utils.py
+++ b/utils/backend_utils.py
@ -88,7 +88,6 @@ def get_task(event, client, dvm_config):
        #  TODO if a task can consist of multiple inputs add them here
        #  This is not ideal. Maybe such events should have their own kind
        #  else if kind is supported, simply return task
        else:
@ -102,6 +101,7 @@ def get_task(event, client, dvm_config):
 def is_input_supported_generic(tags, client, dvm_config) -> bool:
    # Handle malformed tags, missing events etc here.
    try:
        for tag in tags:
            if tag.as_vec()[0] == 'i':
@ -129,17 +129,19 @@ def is_input_supported_generic(tags, client, dvm_config) -> bool:
 def check_task_is_supported(event: Event, client, config=None):
    try:
        dvm_config = config
        # Check for generic issues, event maformed, referenced event not found etc..
        if not is_input_supported_generic(event.tags(), client, dvm_config):
            return False, ""
        # See if current dvm supports the task
        task = get_task(event, client=client, dvm_config=dvm_config)
        if task not in (x.TASK for x in dvm_config.SUPPORTED_DVMS):
            return False, task
-
+        # See if current dvm can handle input for given task
        if not is_input_supported_generic(event.tags(), client, dvm_config):
            return False, ""
        for dvm in dvm_config.SUPPORTED_DVMS:
            if dvm.TASK == task:
                if not dvm.is_input_supported(event.tags()):
                    return False, task
        return True, task
@ -158,14 +160,17 @@ def check_url_is_readable(url):
        # If link is comaptible with one of these file formats, move on.
        req = requests.get(url)
        content_type = req.headers['content-type']
-        if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(url).endswith(
+        if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(
                url).endswith(
                ".mp3") or content_type == 'audio/ogg' or str(url).endswith(".ogg"):
            return "audio"
-        elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(url).endswith(
+        elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(
                url).endswith(
                ".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or content_type == 'image/png' or
              str(url).endswith(".png")):
            return "image"
-        elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(url).endswith(
+        elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(
                url).endswith(
                ".avi") or content_type == 'video/mov' or str(url).endswith(".mov"):
            return "video"
        elif (str(url)).endswith(".pdf"):
@ -189,7 +194,6 @@ def get_amount_per_task(task, dvm_config, duration=1):
        return None
 def keep_alive():
    try:
        while True:
@ -197,5 +201,3 @@ def keep_alive():
    except KeyboardInterrupt:
        os.kill(os.getpid(), signal.SIGKILL)
        exit(1)
--- a/utils/definitions.py
+++ b/utils/definitions.py
@ -20,6 +20,8 @@ class EventDefinitions:
    KIND_NIP90_RESULT_GENERATE_IMAGE = 6100
    KIND_NIP90_CONVERT_VIDEO = 5200
    KIND_NIP90_RESULT_CONVERT_VIDEO = 6200
    KIND_NIP90_GENERATE_VIDEO = 5202
    KIND_NIP90_RESULT_GENERATE_VIDEO = 6202
    KIND_NIP90_CONTENT_DISCOVERY = 5300
    KIND_NIP90_RESULT_CONTENT_DISCOVERY = 6300
    KIND_NIP90_PEOPLE_DISCOVERY = 5301
@ -38,6 +40,7 @@ class EventDefinitions:
                  KIND_NIP90_RESULT_CONVERT_VIDEO,
                  KIND_NIP90_RESULT_CONTENT_DISCOVERY,
                  KIND_NIP90_RESULT_PEOPLE_DISCOVERY,
                  KIND_NIP90_RESULT_GENERATE_VIDEO,
                  KIND_NIP90_RESULT_GENERIC]
--- a/utils/mediasource_utils.py
+++ b/utils/mediasource_utils.py
@ -3,7 +3,7 @@ import urllib
 from datetime import time
 from urllib.parse import urlparse
 import ffmpegio
-from decord import AudioReader, cpu
+from decord import AudioReader, VideoReader, cpu
 import requests
 from utils.nostr_utils import get_event_by_id
@ -72,6 +72,12 @@ def organize_input_media_data(input_value, input_type, start, end, dvm_config, c
        try:
            file_reader = AudioReader(filename, ctx=cpu(0), mono=False)
            duration = float(file_reader.duration())
        except Exception as e:
            print(e)
            try:
                from moviepy.editor import VideoFileClip
                clip = VideoFileClip(filename)
                duration = clip.duration
            except Exception as e:
                print(e)
                return ""
@ -95,6 +101,11 @@ def organize_input_media_data(input_value, input_type, start, end, dvm_config, c
            elif media_format.split('/')[0] == "video":
                print("Converting Video from " + str(start_time) + " until " + str(end_time))
                ffmpegio.transcode(filename, final_filename, overwrite=True, show_log=True)
            elif media_format.split('/')[1] == "gif":
                from moviepy.editor import VideoFileClip
                print("Converting Video from " + str(start_time) + " until " + str(end_time))
                videoClip = VideoFileClip(filename)
                videoClip.write_gif(final_filename, program="ffmpeg")
            print(final_filename)
            return final_filename
        else: