From 21372ff779b2dd6409994c2bf386dd5dd542dd5e Mon Sep 17 00:00:00 2001 From: Believethehype Date: Mon, 15 Jan 2024 09:24:03 +0100 Subject: [PATCH] added svd --- .../image_interrogator/requirements.txt | 2 +- .../modules/image_upscale/requirements.txt | 2 +- .../stablediffusionxl/requirements.txt | 2 +- .../stablevideodiffusion/requirements.txt | 7 + .../stablevideodiffusion.py | 100 ++++++++++++++ .../stablevideodiffusion.trainer | 9 ++ .../modules/stablevideodiffusion/version.py | 12 ++ .../modules/whisperx/requirements.txt | 2 +- nostr_dvm/tasks/videogeneration_svd.py | 123 ++++++++++++++++++ 9 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt create mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py create mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer create mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py create mode 100644 nostr_dvm/tasks/videogeneration_svd.py diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt index a9b489d..7f625e1 100644 --- a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt @@ -1,4 +1,4 @@ -hcai-nova-utils>=1.5.5 +hcai-nova-utils>=1.5.7 --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.1 clip_interrogator diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt index 0cf3e2b..cad359c 100644 --- a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt @@ -1,5 +1,5 @@ realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git -hcai-nova-utils>=1.5.5 +hcai-nova-utils>=1.5.7 --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0 torchvision diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt index 9b9e167..c29cdeb 100644 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt @@ -1,4 +1,4 @@ -hcai-nova-utils>=1.5.5 +hcai-nova-utils>=1.5.7 --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0 compel~=2.0.2 diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt new file mode 100644 index 0000000..6a92be6 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt @@ -0,0 +1,7 @@ +hcai-nova-utils>=1.5.7 +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.1.0 +git+https://github.com/huggingface/diffusers.git +transformers +accelerate +opencv-python \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py new file mode 100644 index 0000000..62e6a66 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py @@ -0,0 +1,100 @@ +import gc +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + +from ssl import Options +from nova_utils.interfaces.server_module import Processor +import torch +from diffusers import StableVideoDiffusionPipeline +from diffusers.utils import load_image, export_to_video +from nova_utils.utils.cache_utils import get_file +import numpy as np +from PIL import Image as PILImage + + + + + +# Setting defaults +_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps":"7", "seed":""} + +# TODO: add log infos, +class StableVideoDiffusion(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.current_session = None + + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + self.input = self.input[0] + self.output = self.output[0] + def process_data(self, ds_iter) -> dict: + + + + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.ds_iter = ds_iter + current_session_name = self.ds_iter.session_names[0] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + input_image = self.current_session.input_data['input_image'].data + + + try: + pipe = StableVideoDiffusionPipeline.from_pretrained( + self.options["model"], torch_dtype=torch.float16, variant="fp16" + ) + pipe.enable_model_cpu_offload() + + # Load the conditioning image + image = PILImage.fromarray(input_image) + image = image.resize((1024, 576)) + + if self.options["seed"] != "" and self.options["seed"] != " ": + generator = torch.manual_seed(int(self.options["seed"])) + frames = pipe(image, decode_chunk_size=8, generator=generator).frames[0] + else: + frames = pipe(image, decode_chunk_size=8).frames[0] + + if torch.cuda.is_available(): + del pipe + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + + np_video = np.stack([np.asarray(x) for x in frames]) + return np_video + + + except Exception as e: + print(e) + sys.stdout.flush() + return "Error" + + def calculate_aspect(self, width: int, height: int): + def gcd(a, b): + """The GCD (greatest common divisor) is the highest number that evenly divides both width and height.""" + return a if b == 0 else gcd(b, a % b) + + r = gcd(width, height) + x = int(width / r) + y = int(height / r) + + return x, y + + + + def to_output(self, data: list): + video = self.current_session.output_data_templates['output_video'] + video.data = data + video.meta_data.sample_rate = int(self.options['fps']) + video.meta_data.media_type = 'video' + + return self.current_session.output_data_templates \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer new file mode 100644 index 0000000..9e8dfcc --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py new file mode 100644 index 0000000..f8e8e4b --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py @@ -0,0 +1,12 @@ +""" Stable Video Diffusion +""" +# We follow Semantic Versioning (https://semver.org/) +_MAJOR_VERSION = '1' +_MINOR_VERSION = '0' +_PATCH_VERSION = '0' + +__version__ = '.'.join([ + _MAJOR_VERSION, + _MINOR_VERSION, + _PATCH_VERSION, +]) diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt index cd86386..f74535f 100644 --- a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt +++ b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt @@ -1,4 +1,4 @@ -hcai-nova-utils>=1.5.5 +hcai-nova-utils>=1.5.7 --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0+cu118 torchvision>= 0.15.1+cu118 diff --git a/nostr_dvm/tasks/videogeneration_svd.py b/nostr_dvm/tasks/videogeneration_svd.py new file mode 100644 index 0000000..ad5b458 --- /dev/null +++ b/nostr_dvm/tasks/videogeneration_svd.py @@ -0,0 +1,123 @@ +import json +from multiprocessing.pool import ThreadPool + +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface, process_venv +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a module to transform an Image to a short Video Clip on n-server and receive results back. + +Accepted Inputs: An url to an Image +Outputs: An url to a video +, +""" + + +class VideoGenerationSVD(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_GENERATE_VIDEO + TASK: str = "image-to-video" + FIX_COST: float = 120 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags, client=None, dvm_config=None): + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type != "url": + return False + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\stablevideodiffusion\stablevideodiffusion.trainer' + + url = "" + frames = 14 # 25 + model = "stabilityai/stable-video-diffusion-img2vid-xt" #,stabilityai/stable-video-diffusion-img2vid + + + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "url": + url = str(tag.as_vec()[1]).split('#')[0] + # TODO add params as defined above + + io_input = { + "id": "input_image", + "type": "input", + "src": "url:Image", + "uri": url + } + + io_output = { + "id": "output_video", + "type": "output", + "src": "stream:Video" + } + + request_form['data'] = json.dumps([io_input, io_output]) + + options = { + "model": model, + "fps": 14 + + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of n-server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + # A module might have options it can be initialized with, here we set a default model, and the server + # address it should use. These parameters can be freely defined in the task component + options = {'server': server_address} + + nip89info = { + "name": name, + "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg", + "about": "I create a short video based on an image", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": {} + } + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return VideoGenerationSVD(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +if __name__ == '__main__': + process_venv(VideoGenerationSVD)