From 5485ba363813beff73c0e67ce9b991ff90991ce4 Mon Sep 17 00:00:00 2001 From: Believethehype <1097224+believethehype@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:17:26 +0200 Subject: [PATCH] cleanup backend examples --- .../{nova_server => discover}/__init__.py | 0 .../modules/__init__.py | 0 .../modules/image_interrogator/__init__.py | 0 .../image_interrogator/image_interrogator.py | 56 +++++++------ .../image_interrogator.trainer | 12 +++ .../modules/image_interrogator/readme.md | 0 .../image_interrogator/requirements.txt | 0 .../modules/image_interrogator/version.py | 0 .../modules/image_upscale/__init__.py | 0 .../image_upscale/image_upscale_realesrgan.py | 72 ++++++++--------- .../image_upscale_realesrgan.trainer | 10 +++ .../image_upscale/inference_realesrgan.py | 4 +- .../modules/image_upscale/requirements.txt | 0 .../modules/image_upscale/version.py | 0 .../modules/stablediffusionxl/__init__.py | 0 .../modules/stablediffusionxl/lora.py | 34 ++++---- .../modules/stablediffusionxl/readme.md | 2 +- .../stablediffusionxl/requirements.txt | 0 .../stablediffusionxl-img2img.py | 62 +++++++------- .../stablediffusionxl-img2img.trainer | 42 ++++++++++ .../stablediffusionxl/stablediffusionxl.py | 0 .../stablediffusionxl.trainer | 68 ++++++++++++++++ .../modules/stablediffusionxl/version.py | 0 .../stablevideodiffusion/requirements.txt | 0 .../stablevideodiffusion.py | 30 +++---- .../stablevideodiffusion.trainer | 11 +++ .../modules/stablevideodiffusion/version.py | 0 .../modules/whisperx/__init__.py | 0 .../modules/whisperx/readme.md | 5 +- .../modules/whisperx/requirements.txt | 0 .../modules/whisperx/version.py | 0 .../modules/whisperx/whisperx_transcript.py | 40 ++++++---- .../whisperx/whisperx_transcript.trainer | 10 +++ .../{nova_server => discover}/run_windows.cmd | 0 .../setup_windows.cmd | 0 .../{nova_server => discover}/utils.py | 10 ++- .../mlx/modules/stable_diffusion/__init__.py | 18 ++--- .../mlx/modules/stable_diffusion/config.py | 2 +- .../mlx/modules/stable_diffusion/model_io.py | 11 +-- .../mlx/modules/stable_diffusion/sampler.py | 8 +- .../mlx/modules/stable_diffusion/unet.py | 80 +++++++++---------- .../mlx/modules/stable_diffusion/vae.py | 38 ++++----- .../image_interrogator.trainer | 10 --- .../image_upscale_realesrgan.trainer | 9 --- .../stablediffusionxl-img2img.trainer | 26 ------ .../stablediffusionxl.trainer | 41 ---------- .../stablevideodiffusion.trainer | 9 --- .../whisperx/whisperx_transcript.trainer | 9 --- 48 files changed, 380 insertions(+), 349 deletions(-) rename nostr_dvm/backends/{nova_server => discover}/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/image_interrogator.py (68%) create mode 100644 nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/readme.md (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/requirements.txt (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/version.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/image_upscale_realesrgan.py (83%) create mode 100644 nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/inference_realesrgan.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/requirements.txt (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/version.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/lora.py (70%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/readme.md (99%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/requirements.txt (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/stablediffusionxl-img2img.py (80%) create mode 100644 nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/stablediffusionxl.py (100%) create mode 100644 nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/version.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/requirements.txt (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/stablevideodiffusion.py (86%) create mode 100644 nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/version.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/__init__.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/readme.md (94%) rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/requirements.txt (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/version.py (100%) rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/whisperx_transcript.py (83%) create mode 100644 nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer rename nostr_dvm/backends/{nova_server => discover}/run_windows.cmd (100%) rename nostr_dvm/backends/{nova_server => discover}/setup_windows.cmd (100%) rename nostr_dvm/backends/{nova_server => discover}/utils.py (98%) delete mode 100644 nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer delete mode 100644 nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer delete mode 100644 nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer delete mode 100644 nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer delete mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer delete mode 100644 nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer diff --git a/nostr_dvm/backends/nova_server/__init__.py b/nostr_dvm/backends/discover/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/__init__.py rename to nostr_dvm/backends/discover/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/__init__.py b/nostr_dvm/backends/discover/modules/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/__init__.py rename to nostr_dvm/backends/discover/modules/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py b/nostr_dvm/backends/discover/modules/image_interrogator/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py rename to nostr_dvm/backends/discover/modules/image_interrogator/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py similarity index 68% rename from nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py rename to nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py index 217f5f3..7facb43 100644 --- a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py +++ b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py @@ -1,18 +1,17 @@ """StableDiffusionXL Module """ -import gc -import sys import os +import sys sys.path.insert(0, os.path.dirname(__file__)) - from nova_utils.interfaces.server_module import Processor # Setting defaults -_default_options = {"kind": "prompt", "mode": "fast" } +_default_options = {"kind": "prompt", "mode": "fast"} -# TODO: add log infos, + +# TODO: add log infos, class ImageInterrogator(Processor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -20,7 +19,6 @@ class ImageInterrogator(Processor): self.device = None self.ds_iter = None self.current_session = None - # IO shortcuts self.input = [x for x in self.model_io if x.io_type == "input"] @@ -36,18 +34,17 @@ class ImageInterrogator(Processor): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.ds_iter = ds_iter current_session_name = self.ds_iter.session_names[0] - self.current_session = self.ds_iter.sessions[current_session_name]['manager'] - #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" - kind = self.options['kind'] #"prompt" #"analysis" #prompt + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" + kind = self.options['kind'] # "prompt" #"analysis" #prompt mode = self.options['mode'] - #url = self.current_session.input_data['input_image_url'].data[0] - #print(url) + # url = self.current_session.input_data['input_image_url'].data[0] + # print(url) input_image = self.current_session.input_data['input_image'].data - init_image = PILImage.fromarray(input_image) + init_image = PILImage.fromarray(input_image) mwidth = 256 mheight = 256 - w = mwidth h = mheight if init_image.width > init_image.height: @@ -68,11 +65,9 @@ class ImageInterrogator(Processor): config = Config(clip_model_name="ViT-L-14/openai", device="cuda") - if kind == "analysis": ci = Interrogator(config) - image_features = ci.image_to_features(init_image) top_mediums = ci.mediums.rank(image_features, 5) @@ -81,15 +76,20 @@ class ImageInterrogator(Processor): top_trendings = ci.trendings.rank(image_features, 5) top_flavors = ci.flavors.rank(image_features, 5) - medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))} - artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))} + medium_ranks = {medium: sim for medium, sim in + zip(top_mediums, ci.similarities(image_features, top_mediums))} + artist_ranks = {artist: sim for artist, sim in + zip(top_artists, ci.similarities(image_features, top_artists))} movement_ranks = {movement: sim for movement, sim in - zip(top_movements, ci.similarities(image_features, top_movements))} + zip(top_movements, ci.similarities(image_features, top_movements))} trending_ranks = {trending: sim for trending, sim in - zip(top_trendings, ci.similarities(image_features, top_trendings))} - flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))} + zip(top_trendings, ci.similarities(image_features, top_trendings))} + flavor_ranks = {flavor: sim for flavor, sim in + zip(top_flavors, ci.similarities(image_features, top_flavors))} - result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks) + result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str( + artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str( + trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks) print(result) return result @@ -100,8 +100,8 @@ class ImageInterrogator(Processor): ci.config.chunk_size = 2024 ci.config.clip_offload = True ci.config.apply_low_vram_defaults() - #MODELS = ['ViT-L (best for Stable Diffusion 1.*)'] - ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024 + # MODELS = ['ViT-L (best for Stable Diffusion 1.*)'] + ci.config.flavor_intermediate_count = 2024 # if clip_model_name == MODELS[0] else 1024 image = init_image if mode == 'best': @@ -113,17 +113,15 @@ class ImageInterrogator(Processor): elif mode == 'negative': prompt = ci.interrogate_negative(image) - #print(str(prompt)) + # print(str(prompt)) return prompt - # config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git # ci = Interrogator(config) - # "ViT-L-14/openai")) - # "ViT-g-14/laion2B-s34B-b88K")) + # "ViT-L-14/openai")) + # "ViT-g-14/laion2B-s34B-b88K")) - def to_output(self, data: dict): import numpy as np self.current_session.output_data_templates['output'].data = np.array([data]) - return self.current_session.output_data_templates \ No newline at end of file + return self.current_session.output_data_templates diff --git a/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer new file mode 100644 index 0000000..e218aa2 --- /dev/null +++ b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer @@ -0,0 +1,12 @@ + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md b/nostr_dvm/backends/discover/modules/image_interrogator/readme.md similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md rename to nostr_dvm/backends/discover/modules/image_interrogator/readme.md diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt b/nostr_dvm/backends/discover/modules/image_interrogator/requirements.txt similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt rename to nostr_dvm/backends/discover/modules/image_interrogator/requirements.txt diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py b/nostr_dvm/backends/discover/modules/image_interrogator/version.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_interrogator/version.py rename to nostr_dvm/backends/discover/modules/image_interrogator/version.py diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py b/nostr_dvm/backends/discover/modules/image_upscale/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py rename to nostr_dvm/backends/discover/modules/image_upscale/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py similarity index 83% rename from nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py rename to nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py index 32ec7c8..a38dbdb 100644 --- a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py +++ b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py @@ -2,25 +2,23 @@ """ import os -import glob import sys -from nova_utils.interfaces.server_module import Processor + +import cv2 +import numpy as np +from PIL import Image as PILImage from basicsr.archs.rrdbnet_arch import RRDBNet from basicsr.utils.download_util import load_file_from_url -import numpy as np - - - +from nova_utils.interfaces.server_module import Processor from realesrgan import RealESRGANer from realesrgan.archs.srvgg_arch import SRVGGNetCompact -import cv2 -from PIL import Image as PILImage - # Setting defaults -_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False } +_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0, "tile_pad": 10, + "pre_pad": 0, "compute_type": "fp32", "face_enhance": False} -# TODO: add log infos, + +# TODO: add log infos, class RealESRGan(Processor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -28,8 +26,7 @@ class RealESRGan(Processor): self.device = None self.ds_iter = None self.current_session = None - self.model_path = None #Maybe need this later for manual path - + self.model_path = None # Maybe need this later for manual path # IO shortcuts self.input = [x for x in self.model_io if x.io_type == "input"] @@ -42,12 +39,11 @@ class RealESRGan(Processor): current_session_name = self.ds_iter.session_names[0] self.current_session = self.ds_iter.sessions[current_session_name]['manager'] input_image = self.current_session.input_data['input_image'].data - try: model, netscale, file_url = self.manageModel(str(self.options['model'])) - if self.model_path is not None: + if self.model_path is not None: model_path = self.model_path else: model_path = os.path.join('weights', self.options['model'] + '.pth') @@ -58,7 +54,7 @@ class RealESRGan(Processor): model_path = load_file_from_url( url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None) - # use dni to control the denoise strength + # use dni to control the denoise strength dni_weight = None if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1: wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3') @@ -67,19 +63,18 @@ class RealESRGan(Processor): half = True if self.options["compute_type"] == "fp32": - half=False - + half = False upsampler = RealESRGANer( - scale=netscale, - model_path=model_path, - dni_weight=dni_weight, - model=model, - tile= int(self.options['tile']), - tile_pad=int(self.options['tile_pad']), - pre_pad=int(self.options['pre_pad']), - half=half, - gpu_id=None) #Can be set if multiple gpus are available + scale=netscale, + model_path=model_path, + dni_weight=dni_weight, + model=model, + tile=int(self.options['tile']), + tile_pad=int(self.options['tile_pad']), + pre_pad=int(self.options['pre_pad']), + half=half, + gpu_id=None) # Can be set if multiple gpus are available if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement from gfpgan import GFPGANer @@ -89,24 +84,24 @@ class RealESRGan(Processor): arch='clean', channel_multiplier=2, bg_upsampler=upsampler) - - - pilimage = PILImage.fromarray(input_image) + + pilimage = PILImage.fromarray(input_image) img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR) try: if bool(self.options['face_enhance']): - _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True) + _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, + paste_back=True) else: output, _ = upsampler.enhance(img, outscale=int(self.options['outscale'])) except RuntimeError as error: print('Error', error) print('If you encounter CUDA out of memory, try to set --tile with a smaller number.') - + output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) return output - - + + except Exception as e: @@ -114,12 +109,10 @@ class RealESRGan(Processor): sys.stdout.flush() return "Error" - def to_output(self, data: dict): self.current_session.output_data_templates['output_image'].data = data return self.current_session.output_data_templates - def manageModel(self, model_name): if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) @@ -132,7 +125,8 @@ class RealESRGan(Processor): elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) netscale = 4 - file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'] + file_url = [ + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'] elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) netscale = 2 @@ -148,5 +142,5 @@ class RealESRGan(Processor): 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth', 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth' ] - - return model, netscale, file_url \ No newline at end of file + + return model, netscale, file_url diff --git a/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer new file mode 100644 index 0000000..4c6e346 --- /dev/null +++ b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py b/nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py rename to nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py index 0a8cc43..5185382 100644 --- a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py +++ b/nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py @@ -1,10 +1,10 @@ import argparse -import cv2 import glob import os + +import cv2 from basicsr.archs.rrdbnet_arch import RRDBNet from basicsr.utils.download_util import load_file_from_url - from realesrgan import RealESRGANer from realesrgan.archs.srvgg_arch import SRVGGNetCompact diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt b/nostr_dvm/backends/discover/modules/image_upscale/requirements.txt similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt rename to nostr_dvm/backends/discover/modules/image_upscale/requirements.txt diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/version.py b/nostr_dvm/backends/discover/modules/image_upscale/version.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/image_upscale/version.py rename to nostr_dvm/backends/discover/modules/image_upscale/version.py diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py rename to nostr_dvm/backends/discover/modules/stablediffusionxl/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py similarity index 70% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py rename to nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py index 919e1b1..cf5f546 100644 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py +++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py @@ -3,98 +3,96 @@ def build_lora_xl(lora, prompt, lora_weight): if lora == "3drenderstyle": if lora_weight == "": lora_weight = "1" - prompt = "3d style, 3d render, " + prompt + " " + prompt = "3d style, 3d render, " + prompt + " " existing_lora = True if lora == "psychedelicnoir": if lora_weight == "": lora_weight = "1" - prompt = prompt + " >" + prompt = prompt + " >" existing_lora = True if lora == "wojak": if lora_weight == "": lora_weight = "1" - prompt = ", " + prompt + ", wojak" + prompt = ", " + prompt + ", wojak" existing_lora = True if lora == "dreamarts": if lora_weight == "": lora_weight = "1" - prompt = ", " + prompt + prompt = ", " + prompt existing_lora = True if lora == "voxel": if lora_weight == "": lora_weight = "1" - prompt = "voxel style, " + prompt + " " + prompt = "voxel style, " + prompt + " " existing_lora = True if lora == "kru3ger": if lora_weight == "": lora_weight = "1" - prompt = "kru3ger_style, " + prompt + "" + prompt = "kru3ger_style, " + prompt + "" existing_lora = True if lora == "inkpunk": if lora_weight == "": lora_weight = "0.5" - prompt = "inkpunk style, " + prompt + " " + prompt = "inkpunk style, " + prompt + " " existing_lora = True if lora == "inkscenery": if lora_weight == "": lora_weight = "1" - prompt = " ink scenery, " + prompt + " " + prompt = " ink scenery, " + prompt + " " existing_lora = True if lora == "inkpainting": if lora_weight == "": lora_weight = "0.7" - prompt = "painting style, " + prompt + " ," + prompt = "painting style, " + prompt + " ," existing_lora = True if lora == "timburton": if lora_weight == "": lora_weight = "1.27" pencil_weight = "1.15" - prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")" + prompt = prompt + " (hand drawn with pencil" + pencil_weight + "), (tim burton style:" + lora_weight + ")" existing_lora = True if lora == "pixelart": if lora_weight == "": lora_weight = "1" - prompt = prompt + " (flat shading:1.2), (minimalist:1.4), " + prompt = prompt + " (flat shading:1.2), (minimalist:1.4), " existing_lora = True if lora == "pepe": if lora_weight == "": lora_weight = "0.8" - prompt = prompt + " , pepe" + prompt = prompt + " , pepe" existing_lora = True if lora == "bettertext": if lora_weight == "": lora_weight = "1" - prompt = prompt + " ," + prompt = prompt + " ," existing_lora = True if lora == "mspaint": if lora_weight == "": lora_weight = "1" - prompt = "MSPaint drawing " + prompt +">" + prompt = "MSPaint drawing " + prompt + ">" existing_lora = True if lora == "woodfigure": if lora_weight == "": lora_weight = "0.7" - prompt = prompt + ",woodfigurez,artistic style " + prompt = prompt + ",woodfigurez,artistic style " existing_lora = True if lora == "fireelement": prompt = prompt + ",composed of fire elements, fire element" existing_lora = True - - - return lora, prompt, existing_lora \ No newline at end of file + return lora, prompt, existing_lora diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md b/nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md similarity index 99% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md rename to nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md index cccbe30..281942b 100644 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md +++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md @@ -14,7 +14,7 @@ This modules provides image generation based on prompts - `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16` - `high_noise_frac`: Denoising factor - + - `n_steps`: how many iterations should be performed ## Example payload diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt b/nostr_dvm/backends/discover/modules/stablediffusionxl/requirements.txt similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt rename to nostr_dvm/backends/discover/modules/stablediffusionxl/requirements.txt diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py similarity index 80% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py rename to nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py index bae89e8..08a90d3 100644 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py +++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py @@ -2,26 +2,26 @@ """ import gc -import sys import os +import sys # Add local dir to path for relative imports sys.path.insert(0, os.path.dirname(__file__)) from nova_utils.interfaces.server_module import Processor from nova_utils.utils.cache_utils import get_file -from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler -from diffusers.utils import load_image +from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, \ + EulerAncestralDiscreteScheduler import numpy as np from PIL import Image as PILImage from lora import build_lora_xl - - # Setting defaults -_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" } +_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength": "0.58", + "guidance_scale": "11.0", "n_steps": "30", "lora": "", "lora_weight": "0.5"} -# TODO: add log infos, + +# TODO: add log infos, class StableDiffusionXL(Processor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -29,7 +29,6 @@ class StableDiffusionXL(Processor): self.device = None self.ds_iter = None self.current_session = None - # IO shortcuts self.input = [x for x in self.model_io if x.io_type == "input"] @@ -42,15 +41,15 @@ class StableDiffusionXL(Processor): self.device = "cuda" if torch.cuda.is_available() else "cpu" self.ds_iter = ds_iter current_session_name = self.ds_iter.session_names[0] - self.current_session = self.ds_iter.sessions[current_session_name]['manager'] - #input_image_url = self.current_session.input_data['input_image_url'].data - #input_image_url = ' '.join(input_image_url) + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + # input_image_url = self.current_session.input_data['input_image_url'].data + # input_image_url = ' '.join(input_image_url) input_image = self.current_session.input_data['input_image'].data input_prompt = self.current_session.input_data['input_prompt'].data input_prompt = ' '.join(input_prompt) negative_prompt = self.current_session.input_data['negative_prompt'].data negative_prompt = ' '.join(negative_prompt) - # print("Input Image: " + input_image_url) + # print("Input Image: " + input_image_url) print("Input prompt: " + input_prompt) print("Negative prompt: " + negative_prompt) @@ -58,8 +57,8 @@ class StableDiffusionXL(Processor): model = self.options['model'] lora = self.options['lora'] - #init_image = load_image(input_image_url).convert("RGB") - init_image = PILImage.fromarray(input_image) + # init_image = load_image(input_image_url).convert("RGB") + init_image = PILImage.fromarray(input_image) mwidth = 1024 mheight = 1024 @@ -82,44 +81,42 @@ class StableDiffusionXL(Processor): if lora != "" and lora != "None": print("Loading lora...") - lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" ) + lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "") from diffusers import AutoPipelineForImage2Image import torch - - - #init_image = init_image.resize((int(w/2), int(h/2))) + # init_image = init_image.resize((int(w/2), int(h/2))) pipe = AutoPipelineForImage2Image.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda") if existing_lora: - lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0] + lora_uri = [x for x in self.trainer.meta_uri if x.uri_id == lora][0] if str(lora_uri) == "": - return "Lora not found" + return "Lora not found" lora_path = get_file( fname=str(lora_uri.uri_id) + ".safetensors", origin=lora_uri.uri_url, file_hash=lora_uri.uri_hash, cache_dir=os.getenv("CACHE_DIR"), tmp_dir=os.getenv("TMP_DIR"), - ) + ) pipe.load_lora_weights(str(lora_path)) print("Loaded Lora: " + str(lora_path)) seed = 20000 generator = torch.manual_seed(seed) - #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" - + # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" + image = pipe( prompt=input_prompt, negative_prompt=negative_prompt, image=init_image, generator=generator, - num_inference_steps=int(self.options['n_steps']), + num_inference_steps=int(self.options['n_steps']), image_guidance_scale=float(self.options['guidance_scale']), strength=float(str(self.options['strength']))).images[0] @@ -137,19 +134,21 @@ class StableDiffusionXL(Processor): pipe = pipe.to(self.device) image = pipe(input_prompt, image=init_image, - negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0] - + negative_prompt=negative_prompt, num_inference_steps=n_steps, + strength=transformation_strength, guidance_scale=cfg_scale).images[0] + elif model == "timbrooks/instruct-pix2pix": pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16, - safety_checker=None) + safety_checker=None) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) pipe.to(self.device) n_steps = int(self.options['n_steps']) cfg_scale = float(self.options['guidance_scale']) - image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0] - + image = \ + pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, + image_guidance_scale=cfg_scale).images[0] if torch.cuda.is_available(): del pipe @@ -157,7 +156,6 @@ class StableDiffusionXL(Processor): torch.cuda.empty_cache() torch.cuda.ipc_collect() - numpy_array = np.array(image) return numpy_array @@ -167,10 +165,6 @@ class StableDiffusionXL(Processor): sys.stdout.flush() return "Error" - def to_output(self, data: dict): self.current_session.output_data_templates['output_image'].data = data return self.current_session.output_data_templates - - - \ No newline at end of file diff --git a/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer new file mode 100644 index 0000000..d561da7 --- /dev/null +++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py rename to nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.py diff --git a/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer new file mode 100644 index 0000000..466a13c --- /dev/null +++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/version.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py rename to nostr_dvm/backends/discover/modules/stablediffusionxl/version.py diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt b/nostr_dvm/backends/discover/modules/stablevideodiffusion/requirements.txt similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/requirements.txt diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py similarity index 86% rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py index 62e6a66..82042a4 100644 --- a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py +++ b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py @@ -1,26 +1,20 @@ import gc -import sys import os +import sys sys.path.insert(0, os.path.dirname(__file__)) -from ssl import Options from nova_utils.interfaces.server_module import Processor import torch from diffusers import StableVideoDiffusionPipeline -from diffusers.utils import load_image, export_to_video -from nova_utils.utils.cache_utils import get_file import numpy as np from PIL import Image as PILImage - - - - # Setting defaults -_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps":"7", "seed":""} +_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps": "7", "seed": ""} -# TODO: add log infos, + +# TODO: add log infos, class StableVideoDiffusion(Processor): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -28,24 +22,21 @@ class StableVideoDiffusion(Processor): self.device = None self.ds_iter = None self.current_session = None - # IO shortcuts self.input = [x for x in self.model_io if x.io_type == "input"] self.output = [x for x in self.model_io if x.io_type == "output"] self.input = self.input[0] self.output = self.output[0] + def process_data(self, ds_iter) -> dict: - - self.device = "cuda" if torch.cuda.is_available() else "cpu" self.ds_iter = ds_iter current_session_name = self.ds_iter.session_names[0] - self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] input_image = self.current_session.input_data['input_image'].data - try: pipe = StableVideoDiffusionPipeline.from_pretrained( self.options["model"], torch_dtype=torch.float16, variant="fp16" @@ -53,7 +44,7 @@ class StableVideoDiffusion(Processor): pipe.enable_model_cpu_offload() # Load the conditioning image - image = PILImage.fromarray(input_image) + image = PILImage.fromarray(input_image) image = image.resize((1024, 576)) if self.options["seed"] != "" and self.options["seed"] != " ": @@ -68,7 +59,6 @@ class StableVideoDiffusion(Processor): torch.cuda.empty_cache() torch.cuda.ipc_collect() - np_video = np.stack([np.asarray(x) for x in frames]) return np_video @@ -77,7 +67,7 @@ class StableVideoDiffusion(Processor): print(e) sys.stdout.flush() return "Error" - + def calculate_aspect(self, width: int, height: int): def gcd(a, b): """The GCD (greatest common divisor) is the highest number that evenly divides both width and height.""" @@ -89,12 +79,10 @@ class StableVideoDiffusion(Processor): return x, y - - def to_output(self, data: list): video = self.current_session.output_data_templates['output_video'] video.data = data video.meta_data.sample_rate = int(self.options['fps']) video.meta_data.media_type = 'video' - return self.current_session.output_data_templates \ No newline at end of file + return self.current_session.output_data_templates diff --git a/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer new file mode 100644 index 0000000..d6b9123 --- /dev/null +++ b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer @@ -0,0 +1,11 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py b/nostr_dvm/backends/discover/modules/stablevideodiffusion/version.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/version.py diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py b/nostr_dvm/backends/discover/modules/whisperx/__init__.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/whisperx/__init__.py rename to nostr_dvm/backends/discover/modules/whisperx/__init__.py diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md b/nostr_dvm/backends/discover/modules/whisperx/readme.md similarity index 94% rename from nostr_dvm/backends/nova_server/modules/whisperx/readme.md rename to nostr_dvm/backends/discover/modules/whisperx/readme.md index ffe67a3..32878d1 100644 --- a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md +++ b/nostr_dvm/backends/discover/modules/whisperx/readme.md @@ -21,9 +21,10 @@ speaker diarization. - `word` Improved segmentation using separate alignment model. Equivalent to word alignment. - `language`: language code for transcription and alignment models. Supported languages: - - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh` + - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, + `te`, `tr`, `uk`, `ur`, `vi`, `zh` - `None`: auto-detect language from first 30 seconds of audio - + - `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption ## Examples diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt b/nostr_dvm/backends/discover/modules/whisperx/requirements.txt similarity index 100% rename from nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt rename to nostr_dvm/backends/discover/modules/whisperx/requirements.txt diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/version.py b/nostr_dvm/backends/discover/modules/whisperx/version.py similarity index 100% rename from nostr_dvm/backends/nova_server/modules/whisperx/version.py rename to nostr_dvm/backends/discover/modules/whisperx/version.py diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py similarity index 83% rename from nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py rename to nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py index f24e63e..e8148f0 100644 --- a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py +++ b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py @@ -1,10 +1,13 @@ """WhisperX Module """ -from nova_utils.interfaces.server_module import Processor import sys +from nova_utils.interfaces.server_module import Processor + # Setting defaults -_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'} +_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, + 'compute_type': 'float16'} + # supported language codes, cf. whisperx/alignment.py # DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None} @@ -45,11 +48,14 @@ class WhisperX(Processor): sys.stdout.flush() model = whisperx.load_model(self.options["model"], self.device, compute_type='float32', language=self.options['language']) - + result = model.transcribe(audio, batch_size=int(self.options["batch_size"])) # delete model if low on GPU resources - import gc; gc.collect(); torch.cuda.empty_cache(); del model + import gc; + gc.collect(); + torch.cuda.empty_cache(); + del model if not self.options["alignment_mode"] == "raw": # load alignment model and metadata @@ -64,7 +70,10 @@ class WhisperX(Processor): result = result_aligned # delete model if low on GPU resources - import gc; gc.collect(); torch.cuda.empty_cache(); del model_a + import gc; + gc.collect(); + torch.cuda.empty_cache(); + del model_a return result @@ -83,26 +92,26 @@ class WhisperX(Processor): if "end" in w.keys(): last_end = w["end"] else: - #TODO: rethink lower bound for confidence; place word centred instead of left aligned + # TODO: rethink lower bound for confidence; place word centred instead of left aligned w["start"] = last_end last_end += 0.065 w["end"] = last_end - #w["score"] = 0.000 + # w["score"] = 0.000 w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4]) - + def _hmean(scores): if len(scores) > 0: prod = scores[0] for s in scores[1:]: prod *= s - prod = prod**(1/len(scores)) + prod = prod ** (1 / len(scores)) else: prod = 0 return prod - + if ( - self.options["alignment_mode"] == "word" - or self.options["alignment_mode"] == "segment" + self.options["alignment_mode"] == "word" + or self.options["alignment_mode"] == "segment" ): _fix_missing_timestamps(data) @@ -113,12 +122,13 @@ class WhisperX(Processor): ] else: anno_data = [ - #(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"] - (w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?) + # (w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"] + (w["start"], w["end"], w["text"], 1) for w in data["segments"] + # alignment 'raw' no longer contains a score(?) ] # convert to milliseconds - anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data] + anno_data = [(x[0] * 1000, x[1] * 1000, x[2], x[3]) for x in anno_data] out = self.session_manager.output_data_templates[self.output.io_id] out.data = anno_data return self.session_manager.output_data_templates diff --git a/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer new file mode 100644 index 0000000..423a1fd --- /dev/null +++ b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/run_windows.cmd b/nostr_dvm/backends/discover/run_windows.cmd similarity index 100% rename from nostr_dvm/backends/nova_server/run_windows.cmd rename to nostr_dvm/backends/discover/run_windows.cmd diff --git a/nostr_dvm/backends/nova_server/setup_windows.cmd b/nostr_dvm/backends/discover/setup_windows.cmd similarity index 100% rename from nostr_dvm/backends/nova_server/setup_windows.cmd rename to nostr_dvm/backends/discover/setup_windows.cmd diff --git a/nostr_dvm/backends/nova_server/utils.py b/nostr_dvm/backends/discover/utils.py similarity index 98% rename from nostr_dvm/backends/nova_server/utils.py rename to nostr_dvm/backends/discover/utils.py index 77e73b0..41bb317 100644 --- a/nostr_dvm/backends/nova_server/utils.py +++ b/nostr_dvm/backends/discover/utils.py @@ -4,9 +4,10 @@ import json import os import time import zipfile + +import PIL.Image as Image import pandas as pd import requests -import PIL.Image as Image from moviepy.video.io.VideoFileClip import VideoFileClip from nostr_dvm.utils.output_utils import upload_media_to_hoster @@ -24,6 +25,7 @@ in the module that is calling the server """ + def send_request_to_server(request_form, address): print("Sending job to Server") url = ('http://' + address + '/process') @@ -46,6 +48,7 @@ def send_file_to_server(filepath, address): return result + """ check_n_server_status(request_form, address) Function that requests the status of the current process with the jobID (we use the Nostr event as jobID). @@ -76,7 +79,6 @@ def check_server_status(jobID, address) -> str | pd.DataFrame: # WAITING = 0, RUNNING = 1, FINISHED = 2, ERROR = 3 time.sleep(1.0) - if status == 2: try: url_fetch = 'http://' + address + '/fetch_result' @@ -93,7 +95,7 @@ def check_server_status(jobID, address) -> str | pd.DataFrame: return result elif content_type == 'video/mp4': with open('./outputs/video.mp4', 'wb') as f: - f.write(response.content) + f.write(response.content) f.close() clip = VideoFileClip("./outputs/video.mp4") clip.write_videofile("./outputs/video2.mp4") @@ -121,4 +123,4 @@ def check_server_status(jobID, address) -> str | pd.DataFrame: print("Couldn't fetch result: " + str(e)) elif status == 3: - return "error" \ No newline at end of file + return "error" diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py index 079e10f..bc10cff 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py @@ -42,14 +42,14 @@ class StableDiffusion: self.tokenizer = load_tokenizer(model) def generate_latents( - self, - text: str, - n_images: int = 1, - num_steps: int = 50, - cfg_weight: float = 7.5, - negative_text: str = "", - latent_size: Tuple[int] = (64, 64), - seed=None, + self, + text: str, + n_images: int = 1, + num_steps: int = 50, + cfg_weight: float = 7.5, + negative_text: str = "", + latent_size: Tuple[int] = (64, 64), + seed=None, ): # Set the PRNG state seed = seed or int(time.time()) @@ -94,4 +94,4 @@ class StableDiffusion: def decode(self, x_t): x = self.autoencoder.decode(x_t / self.autoencoder.scaling_factor) x = mx.minimum(1, mx.maximum(0, x / 2 + 0.5)) - return x \ No newline at end of file + return x diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py index 6fcf595..29d023a 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py @@ -1,7 +1,7 @@ # Copyright © 2023 Apple Inc. from dataclasses import dataclass -from typing import Optional, Tuple +from typing import Tuple @dataclass diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py index 57879ef..6863910 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py @@ -1,14 +1,12 @@ # Copyright © 2023 Apple Inc. import json -from functools import partial - -import numpy as np -from huggingface_hub import hf_hub_download -from safetensors import safe_open as safetensor_open import mlx.core as mx +import numpy as np +from huggingface_hub import hf_hub_download from mlx.utils import tree_unflatten +from safetensors import safe_open as safetensor_open from .clip import CLIPTextModel from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig @@ -16,7 +14,6 @@ from .tokenizer import Tokenizer from .unet import UNetModel from .vae import Autoencoder - _DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base" _MODELS = { # See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license @@ -285,7 +282,7 @@ def load_tokenizer(key: str = _DEFAULT_MODEL): merges_file = hf_hub_download(key, _MODELS[key]["tokenizer_merges"]) with open(merges_file, encoding="utf-8") as f: - bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1] + bpe_merges = f.read().strip().split("\n")[1: 49152 - 256 - 2 + 1] bpe_merges = [tuple(m.split()) for m in bpe_merges] bpe_ranks = dict(map(reversed, enumerate(bpe_merges))) diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py index a1edf93..ee80e6a 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py @@ -1,9 +1,9 @@ # Copyright © 2023 Apple Inc. -from .config import DiffusionConfig - import mlx.core as mx +from .config import DiffusionConfig + def _linspace(a, b, num): x = mx.arange(0, num) / (num - 1) @@ -37,7 +37,7 @@ class SimpleEulerSampler: ) elif config.beta_schedule == "scaled_linear": betas = _linspace( - config.beta_start**0.5, config.beta_end**0.5, config.num_train_steps + config.beta_start ** 0.5, config.beta_end ** 0.5, config.num_train_steps ).square() else: raise NotImplementedError(f"{config.beta_schedule} is not implemented.") @@ -52,7 +52,7 @@ class SimpleEulerSampler: def sample_prior(self, shape, dtype=mx.float32, key=None): noise = mx.random.normal(shape, key=key) return ( - noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt() + noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt() ).astype(dtype) def sigmas(self, t): diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py index c1a3121..73ee31e 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py @@ -34,11 +34,11 @@ class TimestepEmbedding(nn.Module): class TransformerBlock(nn.Module): def __init__( - self, - model_dims: int, - num_heads: int, - hidden_dims: Optional[int] = None, - memory_dims: Optional[int] = None, + self, + model_dims: int, + num_heads: int, + hidden_dims: Optional[int] = None, + memory_dims: Optional[int] = None, ): super().__init__() @@ -85,13 +85,13 @@ class Transformer2D(nn.Module): """A transformer model for inputs with 2 spatial dimensions.""" def __init__( - self, - in_channels: int, - model_dims: int, - encoder_dims: int, - num_heads: int, - num_layers: int = 1, - norm_num_groups: int = 32, + self, + in_channels: int, + model_dims: int, + encoder_dims: int, + num_heads: int, + num_layers: int = 1, + norm_num_groups: int = 32, ): super().__init__() @@ -125,11 +125,11 @@ class Transformer2D(nn.Module): class ResnetBlock2D(nn.Module): def __init__( - self, - in_channels: int, - out_channels: Optional[int] = None, - groups: int = 32, - temb_channels: Optional[int] = None, + self, + in_channels: int, + out_channels: Optional[int] = None, + groups: int = 32, + temb_channels: Optional[int] = None, ): super().__init__() @@ -169,19 +169,19 @@ class ResnetBlock2D(nn.Module): class UNetBlock2D(nn.Module): def __init__( - self, - in_channels: int, - out_channels: int, - temb_channels: int, - prev_out_channels: Optional[int] = None, - num_layers: int = 1, - transformer_layers_per_block: int = 1, - num_attention_heads: int = 8, - cross_attention_dim=1280, - resnet_groups: int = 32, - add_downsample=True, - add_upsample=True, - add_cross_attention=True, + self, + in_channels: int, + out_channels: int, + temb_channels: int, + prev_out_channels: Optional[int] = None, + num_layers: int = 1, + transformer_layers_per_block: int = 1, + num_attention_heads: int = 8, + cross_attention_dim=1280, + resnet_groups: int = 32, + add_downsample=True, + add_upsample=True, + add_cross_attention=True, ): super().__init__() @@ -232,13 +232,13 @@ class UNetBlock2D(nn.Module): ) def __call__( - self, - x, - encoder_x=None, - temb=None, - attn_mask=None, - encoder_attn_mask=None, - residual_hidden_states=None, + self, + x, + encoder_x=None, + temb=None, + attn_mask=None, + encoder_attn_mask=None, + residual_hidden_states=None, ): output_states = [] @@ -340,9 +340,9 @@ class UNetModel(nn.Module): # Make the upsampling blocks block_channels = ( - [config.block_out_channels[0]] - + list(config.block_out_channels) - + [config.block_out_channels[-1]] + [config.block_out_channels[0]] + + list(config.block_out_channels) + + [config.block_out_channels[-1]] ) self.up_blocks = [ UNetBlock2D( diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py index fe473d4..239b49d 100644 --- a/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py +++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py @@ -44,13 +44,13 @@ class Attention(nn.Module): class EncoderDecoderBlock2D(nn.Module): def __init__( - self, - in_channels: int, - out_channels: int, - num_layers: int = 1, - resnet_groups: int = 32, - add_downsample=True, - add_upsample=True, + self, + in_channels: int, + out_channels: int, + num_layers: int = 1, + resnet_groups: int = 32, + add_downsample=True, + add_upsample=True, ): super().__init__() @@ -93,12 +93,12 @@ class Encoder(nn.Module): """Implements the encoder side of the Autoencoder.""" def __init__( - self, - in_channels: int, - out_channels: int, - block_out_channels: List[int] = [64], - layers_per_block: int = 2, - resnet_groups: int = 32, + self, + in_channels: int, + out_channels: int, + block_out_channels: List[int] = [64], + layers_per_block: int = 2, + resnet_groups: int = 32, ): super().__init__() @@ -159,12 +159,12 @@ class Decoder(nn.Module): """Implements the decoder side of the Autoencoder.""" def __init__( - self, - in_channels: int, - out_channels: int, - block_out_channels: List[int] = [64], - layers_per_block: int = 2, - resnet_groups: int = 32, + self, + in_channels: int, + out_channels: int, + block_out_channels: List[int] = [64], + layers_per_block: int = 2, + resnet_groups: int = 32, ): super().__init__() diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer deleted file mode 100644 index 216205c..0000000 --- a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer deleted file mode 100644 index b3bf12f..0000000 --- a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer deleted file mode 100644 index b6f4167..0000000 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer deleted file mode 100644 index 0e86e7e..0000000 --- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer deleted file mode 100644 index 9e8dfcc..0000000 --- a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer deleted file mode 100644 index 44dae41..0000000 --- a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - -