From 5485ba363813beff73c0e67ce9b991ff90991ce4 Mon Sep 17 00:00:00 2001
From: Believethehype <1097224+believethehype@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:17:26 +0200
Subject: [PATCH] cleanup backend examples
---
.../{nova_server => discover}/__init__.py | 0
.../modules/__init__.py | 0
.../modules/image_interrogator/__init__.py | 0
.../image_interrogator/image_interrogator.py | 56 +++++++------
.../image_interrogator.trainer | 12 +++
.../modules/image_interrogator/readme.md | 0
.../image_interrogator/requirements.txt | 0
.../modules/image_interrogator/version.py | 0
.../modules/image_upscale/__init__.py | 0
.../image_upscale/image_upscale_realesrgan.py | 72 ++++++++---------
.../image_upscale_realesrgan.trainer | 10 +++
.../image_upscale/inference_realesrgan.py | 4 +-
.../modules/image_upscale/requirements.txt | 0
.../modules/image_upscale/version.py | 0
.../modules/stablediffusionxl/__init__.py | 0
.../modules/stablediffusionxl/lora.py | 34 ++++----
.../modules/stablediffusionxl/readme.md | 2 +-
.../stablediffusionxl/requirements.txt | 0
.../stablediffusionxl-img2img.py | 62 +++++++-------
.../stablediffusionxl-img2img.trainer | 42 ++++++++++
.../stablediffusionxl/stablediffusionxl.py | 0
.../stablediffusionxl.trainer | 68 ++++++++++++++++
.../modules/stablediffusionxl/version.py | 0
.../stablevideodiffusion/requirements.txt | 0
.../stablevideodiffusion.py | 30 +++----
.../stablevideodiffusion.trainer | 11 +++
.../modules/stablevideodiffusion/version.py | 0
.../modules/whisperx/__init__.py | 0
.../modules/whisperx/readme.md | 5 +-
.../modules/whisperx/requirements.txt | 0
.../modules/whisperx/version.py | 0
.../modules/whisperx/whisperx_transcript.py | 40 ++++++----
.../whisperx/whisperx_transcript.trainer | 10 +++
.../{nova_server => discover}/run_windows.cmd | 0
.../setup_windows.cmd | 0
.../{nova_server => discover}/utils.py | 10 ++-
.../mlx/modules/stable_diffusion/__init__.py | 18 ++---
.../mlx/modules/stable_diffusion/config.py | 2 +-
.../mlx/modules/stable_diffusion/model_io.py | 11 +--
.../mlx/modules/stable_diffusion/sampler.py | 8 +-
.../mlx/modules/stable_diffusion/unet.py | 80 +++++++++----------
.../mlx/modules/stable_diffusion/vae.py | 38 ++++-----
.../image_interrogator.trainer | 10 ---
.../image_upscale_realesrgan.trainer | 9 ---
.../stablediffusionxl-img2img.trainer | 26 ------
.../stablediffusionxl.trainer | 41 ----------
.../stablevideodiffusion.trainer | 9 ---
.../whisperx/whisperx_transcript.trainer | 9 ---
48 files changed, 380 insertions(+), 349 deletions(-)
rename nostr_dvm/backends/{nova_server => discover}/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/image_interrogator.py (68%)
create mode 100644 nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer
rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/readme.md (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/requirements.txt (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_interrogator/version.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/image_upscale_realesrgan.py (83%)
create mode 100644 nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer
rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/inference_realesrgan.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/requirements.txt (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/image_upscale/version.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/lora.py (70%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/readme.md (99%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/requirements.txt (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/stablediffusionxl-img2img.py (80%)
create mode 100644 nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/stablediffusionxl.py (100%)
create mode 100644 nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer
rename nostr_dvm/backends/{nova_server => discover}/modules/stablediffusionxl/version.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/requirements.txt (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/stablevideodiffusion.py (86%)
create mode 100644 nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer
rename nostr_dvm/backends/{nova_server => discover}/modules/stablevideodiffusion/version.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/__init__.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/readme.md (94%)
rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/requirements.txt (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/version.py (100%)
rename nostr_dvm/backends/{nova_server => discover}/modules/whisperx/whisperx_transcript.py (83%)
create mode 100644 nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer
rename nostr_dvm/backends/{nova_server => discover}/run_windows.cmd (100%)
rename nostr_dvm/backends/{nova_server => discover}/setup_windows.cmd (100%)
rename nostr_dvm/backends/{nova_server => discover}/utils.py (98%)
delete mode 100644 nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer
delete mode 100644 nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer
delete mode 100644 nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
delete mode 100644 nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer
delete mode 100644 nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer
delete mode 100644 nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer
diff --git a/nostr_dvm/backends/nova_server/__init__.py b/nostr_dvm/backends/discover/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/__init__.py
rename to nostr_dvm/backends/discover/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/__init__.py b/nostr_dvm/backends/discover/modules/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/__init__.py
rename to nostr_dvm/backends/discover/modules/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py b/nostr_dvm/backends/discover/modules/image_interrogator/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py
rename to nostr_dvm/backends/discover/modules/image_interrogator/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py
similarity index 68%
rename from nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py
rename to nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py
index 217f5f3..7facb43 100644
--- a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py
+++ b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.py
@@ -1,18 +1,17 @@
"""StableDiffusionXL Module
"""
-import gc
-import sys
import os
+import sys
sys.path.insert(0, os.path.dirname(__file__))
-
from nova_utils.interfaces.server_module import Processor
# Setting defaults
-_default_options = {"kind": "prompt", "mode": "fast" }
+_default_options = {"kind": "prompt", "mode": "fast"}
-# TODO: add log infos,
+
+# TODO: add log infos,
class ImageInterrogator(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -20,7 +19,6 @@ class ImageInterrogator(Processor):
self.device = None
self.ds_iter = None
self.current_session = None
-
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -36,18 +34,17 @@ class ImageInterrogator(Processor):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
- self.current_session = self.ds_iter.sessions[current_session_name]['manager']
- #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
- kind = self.options['kind'] #"prompt" #"analysis" #prompt
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+ kind = self.options['kind'] # "prompt" #"analysis" #prompt
mode = self.options['mode']
- #url = self.current_session.input_data['input_image_url'].data[0]
- #print(url)
+ # url = self.current_session.input_data['input_image_url'].data[0]
+ # print(url)
input_image = self.current_session.input_data['input_image'].data
- init_image = PILImage.fromarray(input_image)
+ init_image = PILImage.fromarray(input_image)
mwidth = 256
mheight = 256
-
w = mwidth
h = mheight
if init_image.width > init_image.height:
@@ -68,11 +65,9 @@ class ImageInterrogator(Processor):
config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
-
if kind == "analysis":
ci = Interrogator(config)
-
image_features = ci.image_to_features(init_image)
top_mediums = ci.mediums.rank(image_features, 5)
@@ -81,15 +76,20 @@ class ImageInterrogator(Processor):
top_trendings = ci.trendings.rank(image_features, 5)
top_flavors = ci.flavors.rank(image_features, 5)
- medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
- artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
+ medium_ranks = {medium: sim for medium, sim in
+ zip(top_mediums, ci.similarities(image_features, top_mediums))}
+ artist_ranks = {artist: sim for artist, sim in
+ zip(top_artists, ci.similarities(image_features, top_artists))}
movement_ranks = {movement: sim for movement, sim in
- zip(top_movements, ci.similarities(image_features, top_movements))}
+ zip(top_movements, ci.similarities(image_features, top_movements))}
trending_ranks = {trending: sim for trending, sim in
- zip(top_trendings, ci.similarities(image_features, top_trendings))}
- flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
+ zip(top_trendings, ci.similarities(image_features, top_trendings))}
+ flavor_ranks = {flavor: sim for flavor, sim in
+ zip(top_flavors, ci.similarities(image_features, top_flavors))}
- result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
+ result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(
+ artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(
+ trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
print(result)
return result
@@ -100,8 +100,8 @@ class ImageInterrogator(Processor):
ci.config.chunk_size = 2024
ci.config.clip_offload = True
ci.config.apply_low_vram_defaults()
- #MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
- ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
+ # MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
+ ci.config.flavor_intermediate_count = 2024 # if clip_model_name == MODELS[0] else 1024
image = init_image
if mode == 'best':
@@ -113,17 +113,15 @@ class ImageInterrogator(Processor):
elif mode == 'negative':
prompt = ci.interrogate_negative(image)
- #print(str(prompt))
+ # print(str(prompt))
return prompt
-
# config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
# ci = Interrogator(config)
- # "ViT-L-14/openai"))
- # "ViT-g-14/laion2B-s34B-b88K"))
+ # "ViT-L-14/openai"))
+ # "ViT-g-14/laion2B-s34B-b88K"))
-
def to_output(self, data: dict):
import numpy as np
self.current_session.output_data_templates['output'].data = np.array([data])
- return self.current_session.output_data_templates
\ No newline at end of file
+ return self.current_session.output_data_templates
diff --git a/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer
new file mode 100644
index 0000000..e218aa2
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/image_interrogator/image_interrogator.trainer
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md b/nostr_dvm/backends/discover/modules/image_interrogator/readme.md
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md
rename to nostr_dvm/backends/discover/modules/image_interrogator/readme.md
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt b/nostr_dvm/backends/discover/modules/image_interrogator/requirements.txt
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt
rename to nostr_dvm/backends/discover/modules/image_interrogator/requirements.txt
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py b/nostr_dvm/backends/discover/modules/image_interrogator/version.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_interrogator/version.py
rename to nostr_dvm/backends/discover/modules/image_interrogator/version.py
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py b/nostr_dvm/backends/discover/modules/image_upscale/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py
rename to nostr_dvm/backends/discover/modules/image_upscale/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py
similarity index 83%
rename from nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py
rename to nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py
index 32ec7c8..a38dbdb 100644
--- a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py
+++ b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.py
@@ -2,25 +2,23 @@
"""
import os
-import glob
import sys
-from nova_utils.interfaces.server_module import Processor
+
+import cv2
+import numpy as np
+from PIL import Image as PILImage
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
-import numpy as np
-
-
-
+from nova_utils.interfaces.server_module import Processor
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
-import cv2
-from PIL import Image as PILImage
-
# Setting defaults
-_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
+_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0, "tile_pad": 10,
+ "pre_pad": 0, "compute_type": "fp32", "face_enhance": False}
-# TODO: add log infos,
+
+# TODO: add log infos,
class RealESRGan(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -28,8 +26,7 @@ class RealESRGan(Processor):
self.device = None
self.ds_iter = None
self.current_session = None
- self.model_path = None #Maybe need this later for manual path
-
+ self.model_path = None # Maybe need this later for manual path
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -42,12 +39,11 @@ class RealESRGan(Processor):
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data
-
try:
model, netscale, file_url = self.manageModel(str(self.options['model']))
- if self.model_path is not None:
+ if self.model_path is not None:
model_path = self.model_path
else:
model_path = os.path.join('weights', self.options['model'] + '.pth')
@@ -58,7 +54,7 @@ class RealESRGan(Processor):
model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
- # use dni to control the denoise strength
+ # use dni to control the denoise strength
dni_weight = None
if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
@@ -67,19 +63,18 @@ class RealESRGan(Processor):
half = True
if self.options["compute_type"] == "fp32":
- half=False
-
+ half = False
upsampler = RealESRGANer(
- scale=netscale,
- model_path=model_path,
- dni_weight=dni_weight,
- model=model,
- tile= int(self.options['tile']),
- tile_pad=int(self.options['tile_pad']),
- pre_pad=int(self.options['pre_pad']),
- half=half,
- gpu_id=None) #Can be set if multiple gpus are available
+ scale=netscale,
+ model_path=model_path,
+ dni_weight=dni_weight,
+ model=model,
+ tile=int(self.options['tile']),
+ tile_pad=int(self.options['tile_pad']),
+ pre_pad=int(self.options['pre_pad']),
+ half=half,
+ gpu_id=None) # Can be set if multiple gpus are available
if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
from gfpgan import GFPGANer
@@ -89,24 +84,24 @@ class RealESRGan(Processor):
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler)
-
-
- pilimage = PILImage.fromarray(input_image)
+
+ pilimage = PILImage.fromarray(input_image)
img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
try:
if bool(self.options['face_enhance']):
- _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False,
+ paste_back=True)
else:
output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
except RuntimeError as error:
print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
-
+
output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
return output
-
-
+
+
except Exception as e:
@@ -114,12 +109,10 @@ class RealESRGan(Processor):
sys.stdout.flush()
return "Error"
-
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates
-
def manageModel(self, model_name):
if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
@@ -132,7 +125,8 @@ class RealESRGan(Processor):
elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4
- file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
+ file_url = [
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2
@@ -148,5 +142,5 @@ class RealESRGan(Processor):
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
]
-
- return model, netscale, file_url
\ No newline at end of file
+
+ return model, netscale, file_url
diff --git a/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer
new file mode 100644
index 0000000..4c6e346
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/image_upscale/image_upscale_realesrgan.trainer
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py b/nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py
rename to nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py
index 0a8cc43..5185382 100644
--- a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py
+++ b/nostr_dvm/backends/discover/modules/image_upscale/inference_realesrgan.py
@@ -1,10 +1,10 @@
import argparse
-import cv2
import glob
import os
+
+import cv2
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
-
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt b/nostr_dvm/backends/discover/modules/image_upscale/requirements.txt
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt
rename to nostr_dvm/backends/discover/modules/image_upscale/requirements.txt
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/version.py b/nostr_dvm/backends/discover/modules/image_upscale/version.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/image_upscale/version.py
rename to nostr_dvm/backends/discover/modules/image_upscale/version.py
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py
similarity index 70%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py
index 919e1b1..cf5f546 100644
--- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py
+++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/lora.py
@@ -3,98 +3,96 @@ def build_lora_xl(lora, prompt, lora_weight):
if lora == "3drenderstyle":
if lora_weight == "":
lora_weight = "1"
- prompt = "3d style, 3d render, " + prompt + " "
+ prompt = "3d style, 3d render, " + prompt + " "
existing_lora = True
if lora == "psychedelicnoir":
if lora_weight == "":
lora_weight = "1"
- prompt = prompt + " >"
+ prompt = prompt + " >"
existing_lora = True
if lora == "wojak":
if lora_weight == "":
lora_weight = "1"
- prompt = ", " + prompt + ", wojak"
+ prompt = ", " + prompt + ", wojak"
existing_lora = True
if lora == "dreamarts":
if lora_weight == "":
lora_weight = "1"
- prompt = ", " + prompt
+ prompt = ", " + prompt
existing_lora = True
if lora == "voxel":
if lora_weight == "":
lora_weight = "1"
- prompt = "voxel style, " + prompt + " "
+ prompt = "voxel style, " + prompt + " "
existing_lora = True
if lora == "kru3ger":
if lora_weight == "":
lora_weight = "1"
- prompt = "kru3ger_style, " + prompt + ""
+ prompt = "kru3ger_style, " + prompt + ""
existing_lora = True
if lora == "inkpunk":
if lora_weight == "":
lora_weight = "0.5"
- prompt = "inkpunk style, " + prompt + " "
+ prompt = "inkpunk style, " + prompt + " "
existing_lora = True
if lora == "inkscenery":
if lora_weight == "":
lora_weight = "1"
- prompt = " ink scenery, " + prompt + " "
+ prompt = " ink scenery, " + prompt + " "
existing_lora = True
if lora == "inkpainting":
if lora_weight == "":
lora_weight = "0.7"
- prompt = "painting style, " + prompt + " ,"
+ prompt = "painting style, " + prompt + " ,"
existing_lora = True
if lora == "timburton":
if lora_weight == "":
lora_weight = "1.27"
pencil_weight = "1.15"
- prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
+ prompt = prompt + " (hand drawn with pencil" + pencil_weight + "), (tim burton style:" + lora_weight + ")"
existing_lora = True
if lora == "pixelart":
if lora_weight == "":
lora_weight = "1"
- prompt = prompt + " (flat shading:1.2), (minimalist:1.4), "
+ prompt = prompt + " (flat shading:1.2), (minimalist:1.4), "
existing_lora = True
if lora == "pepe":
if lora_weight == "":
lora_weight = "0.8"
- prompt = prompt + " , pepe"
+ prompt = prompt + " , pepe"
existing_lora = True
if lora == "bettertext":
if lora_weight == "":
lora_weight = "1"
- prompt = prompt + " ,"
+ prompt = prompt + " ,"
existing_lora = True
if lora == "mspaint":
if lora_weight == "":
lora_weight = "1"
- prompt = "MSPaint drawing " + prompt +">"
+ prompt = "MSPaint drawing " + prompt + ">"
existing_lora = True
if lora == "woodfigure":
if lora_weight == "":
lora_weight = "0.7"
- prompt = prompt + ",woodfigurez,artistic style "
+ prompt = prompt + ",woodfigurez,artistic style "
existing_lora = True
if lora == "fireelement":
prompt = prompt + ",composed of fire elements, fire element"
existing_lora = True
-
-
- return lora, prompt, existing_lora
\ No newline at end of file
+ return lora, prompt, existing_lora
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md b/nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md
similarity index 99%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md
index cccbe30..281942b 100644
--- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md
+++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/readme.md
@@ -14,7 +14,7 @@ This modules provides image generation based on prompts
- `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
- `high_noise_frac`: Denoising factor
-
+
- `n_steps`: how many iterations should be performed
## Example payload
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt b/nostr_dvm/backends/discover/modules/stablediffusionxl/requirements.txt
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/requirements.txt
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py
similarity index 80%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py
index bae89e8..08a90d3 100644
--- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py
+++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.py
@@ -2,26 +2,26 @@
"""
import gc
-import sys
import os
+import sys
# Add local dir to path for relative imports
sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor
from nova_utils.utils.cache_utils import get_file
-from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
-from diffusers.utils import load_image
+from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, \
+ EulerAncestralDiscreteScheduler
import numpy as np
from PIL import Image as PILImage
from lora import build_lora_xl
-
-
# Setting defaults
-_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
+_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength": "0.58",
+ "guidance_scale": "11.0", "n_steps": "30", "lora": "", "lora_weight": "0.5"}
-# TODO: add log infos,
+
+# TODO: add log infos,
class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -29,7 +29,6 @@ class StableDiffusionXL(Processor):
self.device = None
self.ds_iter = None
self.current_session = None
-
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -42,15 +41,15 @@ class StableDiffusionXL(Processor):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
- self.current_session = self.ds_iter.sessions[current_session_name]['manager']
- #input_image_url = self.current_session.input_data['input_image_url'].data
- #input_image_url = ' '.join(input_image_url)
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ # input_image_url = self.current_session.input_data['input_image_url'].data
+ # input_image_url = ' '.join(input_image_url)
input_image = self.current_session.input_data['input_image'].data
input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt)
- # print("Input Image: " + input_image_url)
+ # print("Input Image: " + input_image_url)
print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt)
@@ -58,8 +57,8 @@ class StableDiffusionXL(Processor):
model = self.options['model']
lora = self.options['lora']
- #init_image = load_image(input_image_url).convert("RGB")
- init_image = PILImage.fromarray(input_image)
+ # init_image = load_image(input_image_url).convert("RGB")
+ init_image = PILImage.fromarray(input_image)
mwidth = 1024
mheight = 1024
@@ -82,44 +81,42 @@ class StableDiffusionXL(Processor):
if lora != "" and lora != "None":
print("Loading lora...")
- lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
+ lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
from diffusers import AutoPipelineForImage2Image
import torch
-
-
- #init_image = init_image.resize((int(w/2), int(h/2)))
+ # init_image = init_image.resize((int(w/2), int(h/2)))
pipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16).to("cuda")
if existing_lora:
- lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
+ lora_uri = [x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "":
- return "Lora not found"
+ return "Lora not found"
lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
- )
+ )
pipe.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path))
seed = 20000
generator = torch.manual_seed(seed)
- #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
-
+ # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+
image = pipe(
prompt=input_prompt,
negative_prompt=negative_prompt,
image=init_image,
generator=generator,
- num_inference_steps=int(self.options['n_steps']),
+ num_inference_steps=int(self.options['n_steps']),
image_guidance_scale=float(self.options['guidance_scale']),
strength=float(str(self.options['strength']))).images[0]
@@ -137,19 +134,21 @@ class StableDiffusionXL(Processor):
pipe = pipe.to(self.device)
image = pipe(input_prompt, image=init_image,
- negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
-
+ negative_prompt=negative_prompt, num_inference_steps=n_steps,
+ strength=transformation_strength, guidance_scale=cfg_scale).images[0]
+
elif model == "timbrooks/instruct-pix2pix":
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
- safety_checker=None)
+ safety_checker=None)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(self.device)
n_steps = int(self.options['n_steps'])
cfg_scale = float(self.options['guidance_scale'])
- image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
-
+ image = \
+ pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps,
+ image_guidance_scale=cfg_scale).images[0]
if torch.cuda.is_available():
del pipe
@@ -157,7 +156,6 @@ class StableDiffusionXL(Processor):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
-
numpy_array = np.array(image)
return numpy_array
@@ -167,10 +165,6 @@ class StableDiffusionXL(Processor):
sys.stdout.flush()
return "Error"
-
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates
-
-
-
\ No newline at end of file
diff --git a/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
new file mode 100644
index 0000000..d561da7
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
@@ -0,0 +1,42 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.py
diff --git a/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer
new file mode 100644
index 0000000..466a13c
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/stablediffusionxl/stablediffusionxl.trainer
@@ -0,0 +1,68 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py b/nostr_dvm/backends/discover/modules/stablediffusionxl/version.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py
rename to nostr_dvm/backends/discover/modules/stablediffusionxl/version.py
diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt b/nostr_dvm/backends/discover/modules/stablevideodiffusion/requirements.txt
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/requirements.txt
rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/requirements.txt
diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py
similarity index 86%
rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py
rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py
index 62e6a66..82042a4 100644
--- a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.py
+++ b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.py
@@ -1,26 +1,20 @@
import gc
-import sys
import os
+import sys
sys.path.insert(0, os.path.dirname(__file__))
-from ssl import Options
from nova_utils.interfaces.server_module import Processor
import torch
from diffusers import StableVideoDiffusionPipeline
-from diffusers.utils import load_image, export_to_video
-from nova_utils.utils.cache_utils import get_file
import numpy as np
from PIL import Image as PILImage
-
-
-
-
# Setting defaults
-_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps":"7", "seed":""}
+_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps": "7", "seed": ""}
-# TODO: add log infos,
+
+# TODO: add log infos,
class StableVideoDiffusion(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
@@ -28,24 +22,21 @@ class StableVideoDiffusion(Processor):
self.device = None
self.ds_iter = None
self.current_session = None
-
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
+
def process_data(self, ds_iter) -> dict:
-
-
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
- self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data
-
try:
pipe = StableVideoDiffusionPipeline.from_pretrained(
self.options["model"], torch_dtype=torch.float16, variant="fp16"
@@ -53,7 +44,7 @@ class StableVideoDiffusion(Processor):
pipe.enable_model_cpu_offload()
# Load the conditioning image
- image = PILImage.fromarray(input_image)
+ image = PILImage.fromarray(input_image)
image = image.resize((1024, 576))
if self.options["seed"] != "" and self.options["seed"] != " ":
@@ -68,7 +59,6 @@ class StableVideoDiffusion(Processor):
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
-
np_video = np.stack([np.asarray(x) for x in frames])
return np_video
@@ -77,7 +67,7 @@ class StableVideoDiffusion(Processor):
print(e)
sys.stdout.flush()
return "Error"
-
+
def calculate_aspect(self, width: int, height: int):
def gcd(a, b):
"""The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
@@ -89,12 +79,10 @@ class StableVideoDiffusion(Processor):
return x, y
-
-
def to_output(self, data: list):
video = self.current_session.output_data_templates['output_video']
video.data = data
video.meta_data.sample_rate = int(self.options['fps'])
video.meta_data.media_type = 'video'
- return self.current_session.output_data_templates
\ No newline at end of file
+ return self.current_session.output_data_templates
diff --git a/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer
new file mode 100644
index 0000000..d6b9123
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/stablevideodiffusion/stablevideodiffusion.trainer
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py b/nostr_dvm/backends/discover/modules/stablevideodiffusion/version.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/stablevideodiffusion/version.py
rename to nostr_dvm/backends/discover/modules/stablevideodiffusion/version.py
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py b/nostr_dvm/backends/discover/modules/whisperx/__init__.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/whisperx/__init__.py
rename to nostr_dvm/backends/discover/modules/whisperx/__init__.py
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md b/nostr_dvm/backends/discover/modules/whisperx/readme.md
similarity index 94%
rename from nostr_dvm/backends/nova_server/modules/whisperx/readme.md
rename to nostr_dvm/backends/discover/modules/whisperx/readme.md
index ffe67a3..32878d1 100644
--- a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md
+++ b/nostr_dvm/backends/discover/modules/whisperx/readme.md
@@ -21,9 +21,10 @@ speaker diarization.
- `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
- `language`: language code for transcription and alignment models. Supported languages:
- - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
+ - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`,
+ `te`, `tr`, `uk`, `ur`, `vi`, `zh`
- `None`: auto-detect language from first 30 seconds of audio
-
+
- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
## Examples
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt b/nostr_dvm/backends/discover/modules/whisperx/requirements.txt
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt
rename to nostr_dvm/backends/discover/modules/whisperx/requirements.txt
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/version.py b/nostr_dvm/backends/discover/modules/whisperx/version.py
similarity index 100%
rename from nostr_dvm/backends/nova_server/modules/whisperx/version.py
rename to nostr_dvm/backends/discover/modules/whisperx/version.py
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py
similarity index 83%
rename from nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py
rename to nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py
index f24e63e..e8148f0 100644
--- a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py
+++ b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.py
@@ -1,10 +1,13 @@
"""WhisperX Module
"""
-from nova_utils.interfaces.server_module import Processor
import sys
+from nova_utils.interfaces.server_module import Processor
+
# Setting defaults
-_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
+_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None,
+ 'compute_type': 'float16'}
+
# supported language codes, cf. whisperx/alignment.py
# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
@@ -45,11 +48,14 @@ class WhisperX(Processor):
sys.stdout.flush()
model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
language=self.options['language'])
-
+
result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
# delete model if low on GPU resources
- import gc; gc.collect(); torch.cuda.empty_cache(); del model
+ import gc;
+ gc.collect();
+ torch.cuda.empty_cache();
+ del model
if not self.options["alignment_mode"] == "raw":
# load alignment model and metadata
@@ -64,7 +70,10 @@ class WhisperX(Processor):
result = result_aligned
# delete model if low on GPU resources
- import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
+ import gc;
+ gc.collect();
+ torch.cuda.empty_cache();
+ del model_a
return result
@@ -83,26 +92,26 @@ class WhisperX(Processor):
if "end" in w.keys():
last_end = w["end"]
else:
- #TODO: rethink lower bound for confidence; place word centred instead of left aligned
+ # TODO: rethink lower bound for confidence; place word centred instead of left aligned
w["start"] = last_end
last_end += 0.065
w["end"] = last_end
- #w["score"] = 0.000
+ # w["score"] = 0.000
w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
-
+
def _hmean(scores):
if len(scores) > 0:
prod = scores[0]
for s in scores[1:]:
prod *= s
- prod = prod**(1/len(scores))
+ prod = prod ** (1 / len(scores))
else:
prod = 0
return prod
-
+
if (
- self.options["alignment_mode"] == "word"
- or self.options["alignment_mode"] == "segment"
+ self.options["alignment_mode"] == "word"
+ or self.options["alignment_mode"] == "segment"
):
_fix_missing_timestamps(data)
@@ -113,12 +122,13 @@ class WhisperX(Processor):
]
else:
anno_data = [
- #(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
- (w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
+ # (w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
+ (w["start"], w["end"], w["text"], 1) for w in data["segments"]
+ # alignment 'raw' no longer contains a score(?)
]
# convert to milliseconds
- anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
+ anno_data = [(x[0] * 1000, x[1] * 1000, x[2], x[3]) for x in anno_data]
out = self.session_manager.output_data_templates[self.output.io_id]
out.data = anno_data
return self.session_manager.output_data_templates
diff --git a/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer
new file mode 100644
index 0000000..423a1fd
--- /dev/null
+++ b/nostr_dvm/backends/discover/modules/whisperx/whisperx_transcript.trainer
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/run_windows.cmd b/nostr_dvm/backends/discover/run_windows.cmd
similarity index 100%
rename from nostr_dvm/backends/nova_server/run_windows.cmd
rename to nostr_dvm/backends/discover/run_windows.cmd
diff --git a/nostr_dvm/backends/nova_server/setup_windows.cmd b/nostr_dvm/backends/discover/setup_windows.cmd
similarity index 100%
rename from nostr_dvm/backends/nova_server/setup_windows.cmd
rename to nostr_dvm/backends/discover/setup_windows.cmd
diff --git a/nostr_dvm/backends/nova_server/utils.py b/nostr_dvm/backends/discover/utils.py
similarity index 98%
rename from nostr_dvm/backends/nova_server/utils.py
rename to nostr_dvm/backends/discover/utils.py
index 77e73b0..41bb317 100644
--- a/nostr_dvm/backends/nova_server/utils.py
+++ b/nostr_dvm/backends/discover/utils.py
@@ -4,9 +4,10 @@ import json
import os
import time
import zipfile
+
+import PIL.Image as Image
import pandas as pd
import requests
-import PIL.Image as Image
from moviepy.video.io.VideoFileClip import VideoFileClip
from nostr_dvm.utils.output_utils import upload_media_to_hoster
@@ -24,6 +25,7 @@ in the module that is calling the server
"""
+
def send_request_to_server(request_form, address):
print("Sending job to Server")
url = ('http://' + address + '/process')
@@ -46,6 +48,7 @@ def send_file_to_server(filepath, address):
return result
+
"""
check_n_server_status(request_form, address)
Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
@@ -76,7 +79,6 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
# WAITING = 0, RUNNING = 1, FINISHED = 2, ERROR = 3
time.sleep(1.0)
-
if status == 2:
try:
url_fetch = 'http://' + address + '/fetch_result'
@@ -93,7 +95,7 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
return result
elif content_type == 'video/mp4':
with open('./outputs/video.mp4', 'wb') as f:
- f.write(response.content)
+ f.write(response.content)
f.close()
clip = VideoFileClip("./outputs/video.mp4")
clip.write_videofile("./outputs/video2.mp4")
@@ -121,4 +123,4 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
print("Couldn't fetch result: " + str(e))
elif status == 3:
- return "error"
\ No newline at end of file
+ return "error"
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py
index 079e10f..bc10cff 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py
@@ -42,14 +42,14 @@ class StableDiffusion:
self.tokenizer = load_tokenizer(model)
def generate_latents(
- self,
- text: str,
- n_images: int = 1,
- num_steps: int = 50,
- cfg_weight: float = 7.5,
- negative_text: str = "",
- latent_size: Tuple[int] = (64, 64),
- seed=None,
+ self,
+ text: str,
+ n_images: int = 1,
+ num_steps: int = 50,
+ cfg_weight: float = 7.5,
+ negative_text: str = "",
+ latent_size: Tuple[int] = (64, 64),
+ seed=None,
):
# Set the PRNG state
seed = seed or int(time.time())
@@ -94,4 +94,4 @@ class StableDiffusion:
def decode(self, x_t):
x = self.autoencoder.decode(x_t / self.autoencoder.scaling_factor)
x = mx.minimum(1, mx.maximum(0, x / 2 + 0.5))
- return x
\ No newline at end of file
+ return x
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py
index 6fcf595..29d023a 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py
@@ -1,7 +1,7 @@
# Copyright © 2023 Apple Inc.
from dataclasses import dataclass
-from typing import Optional, Tuple
+from typing import Tuple
@dataclass
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py
index 57879ef..6863910 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py
@@ -1,14 +1,12 @@
# Copyright © 2023 Apple Inc.
import json
-from functools import partial
-
-import numpy as np
-from huggingface_hub import hf_hub_download
-from safetensors import safe_open as safetensor_open
import mlx.core as mx
+import numpy as np
+from huggingface_hub import hf_hub_download
from mlx.utils import tree_unflatten
+from safetensors import safe_open as safetensor_open
from .clip import CLIPTextModel
from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
@@ -16,7 +14,6 @@ from .tokenizer import Tokenizer
from .unet import UNetModel
from .vae import Autoencoder
-
_DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base"
_MODELS = {
# See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license
@@ -285,7 +282,7 @@ def load_tokenizer(key: str = _DEFAULT_MODEL):
merges_file = hf_hub_download(key, _MODELS[key]["tokenizer_merges"])
with open(merges_file, encoding="utf-8") as f:
- bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
+ bpe_merges = f.read().strip().split("\n")[1: 49152 - 256 - 2 + 1]
bpe_merges = [tuple(m.split()) for m in bpe_merges]
bpe_ranks = dict(map(reversed, enumerate(bpe_merges)))
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py
index a1edf93..ee80e6a 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py
@@ -1,9 +1,9 @@
# Copyright © 2023 Apple Inc.
-from .config import DiffusionConfig
-
import mlx.core as mx
+from .config import DiffusionConfig
+
def _linspace(a, b, num):
x = mx.arange(0, num) / (num - 1)
@@ -37,7 +37,7 @@ class SimpleEulerSampler:
)
elif config.beta_schedule == "scaled_linear":
betas = _linspace(
- config.beta_start**0.5, config.beta_end**0.5, config.num_train_steps
+ config.beta_start ** 0.5, config.beta_end ** 0.5, config.num_train_steps
).square()
else:
raise NotImplementedError(f"{config.beta_schedule} is not implemented.")
@@ -52,7 +52,7 @@ class SimpleEulerSampler:
def sample_prior(self, shape, dtype=mx.float32, key=None):
noise = mx.random.normal(shape, key=key)
return (
- noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt()
+ noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt()
).astype(dtype)
def sigmas(self, t):
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py
index c1a3121..73ee31e 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py
@@ -34,11 +34,11 @@ class TimestepEmbedding(nn.Module):
class TransformerBlock(nn.Module):
def __init__(
- self,
- model_dims: int,
- num_heads: int,
- hidden_dims: Optional[int] = None,
- memory_dims: Optional[int] = None,
+ self,
+ model_dims: int,
+ num_heads: int,
+ hidden_dims: Optional[int] = None,
+ memory_dims: Optional[int] = None,
):
super().__init__()
@@ -85,13 +85,13 @@ class Transformer2D(nn.Module):
"""A transformer model for inputs with 2 spatial dimensions."""
def __init__(
- self,
- in_channels: int,
- model_dims: int,
- encoder_dims: int,
- num_heads: int,
- num_layers: int = 1,
- norm_num_groups: int = 32,
+ self,
+ in_channels: int,
+ model_dims: int,
+ encoder_dims: int,
+ num_heads: int,
+ num_layers: int = 1,
+ norm_num_groups: int = 32,
):
super().__init__()
@@ -125,11 +125,11 @@ class Transformer2D(nn.Module):
class ResnetBlock2D(nn.Module):
def __init__(
- self,
- in_channels: int,
- out_channels: Optional[int] = None,
- groups: int = 32,
- temb_channels: Optional[int] = None,
+ self,
+ in_channels: int,
+ out_channels: Optional[int] = None,
+ groups: int = 32,
+ temb_channels: Optional[int] = None,
):
super().__init__()
@@ -169,19 +169,19 @@ class ResnetBlock2D(nn.Module):
class UNetBlock2D(nn.Module):
def __init__(
- self,
- in_channels: int,
- out_channels: int,
- temb_channels: int,
- prev_out_channels: Optional[int] = None,
- num_layers: int = 1,
- transformer_layers_per_block: int = 1,
- num_attention_heads: int = 8,
- cross_attention_dim=1280,
- resnet_groups: int = 32,
- add_downsample=True,
- add_upsample=True,
- add_cross_attention=True,
+ self,
+ in_channels: int,
+ out_channels: int,
+ temb_channels: int,
+ prev_out_channels: Optional[int] = None,
+ num_layers: int = 1,
+ transformer_layers_per_block: int = 1,
+ num_attention_heads: int = 8,
+ cross_attention_dim=1280,
+ resnet_groups: int = 32,
+ add_downsample=True,
+ add_upsample=True,
+ add_cross_attention=True,
):
super().__init__()
@@ -232,13 +232,13 @@ class UNetBlock2D(nn.Module):
)
def __call__(
- self,
- x,
- encoder_x=None,
- temb=None,
- attn_mask=None,
- encoder_attn_mask=None,
- residual_hidden_states=None,
+ self,
+ x,
+ encoder_x=None,
+ temb=None,
+ attn_mask=None,
+ encoder_attn_mask=None,
+ residual_hidden_states=None,
):
output_states = []
@@ -340,9 +340,9 @@ class UNetModel(nn.Module):
# Make the upsampling blocks
block_channels = (
- [config.block_out_channels[0]]
- + list(config.block_out_channels)
- + [config.block_out_channels[-1]]
+ [config.block_out_channels[0]]
+ + list(config.block_out_channels)
+ + [config.block_out_channels[-1]]
)
self.up_blocks = [
UNetBlock2D(
diff --git a/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py
index fe473d4..239b49d 100644
--- a/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py
+++ b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py
@@ -44,13 +44,13 @@ class Attention(nn.Module):
class EncoderDecoderBlock2D(nn.Module):
def __init__(
- self,
- in_channels: int,
- out_channels: int,
- num_layers: int = 1,
- resnet_groups: int = 32,
- add_downsample=True,
- add_upsample=True,
+ self,
+ in_channels: int,
+ out_channels: int,
+ num_layers: int = 1,
+ resnet_groups: int = 32,
+ add_downsample=True,
+ add_upsample=True,
):
super().__init__()
@@ -93,12 +93,12 @@ class Encoder(nn.Module):
"""Implements the encoder side of the Autoencoder."""
def __init__(
- self,
- in_channels: int,
- out_channels: int,
- block_out_channels: List[int] = [64],
- layers_per_block: int = 2,
- resnet_groups: int = 32,
+ self,
+ in_channels: int,
+ out_channels: int,
+ block_out_channels: List[int] = [64],
+ layers_per_block: int = 2,
+ resnet_groups: int = 32,
):
super().__init__()
@@ -159,12 +159,12 @@ class Decoder(nn.Module):
"""Implements the decoder side of the Autoencoder."""
def __init__(
- self,
- in_channels: int,
- out_channels: int,
- block_out_channels: List[int] = [64],
- layers_per_block: int = 2,
- resnet_groups: int = 32,
+ self,
+ in_channels: int,
+ out_channels: int,
+ block_out_channels: List[int] = [64],
+ layers_per_block: int = 2,
+ resnet_groups: int = 32,
):
super().__init__()
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer
deleted file mode 100644
index 216205c..0000000
--- a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer
deleted file mode 100644
index b3bf12f..0000000
--- a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
deleted file mode 100644
index b6f4167..0000000
--- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer
deleted file mode 100644
index 0e86e7e..0000000
--- a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer
+++ /dev/null
@@ -1,41 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer b/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer
deleted file mode 100644
index 9e8dfcc..0000000
--- a/nostr_dvm/backends/nova_server/modules/stablevideodiffusion/stablevideodiffusion.trainer
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer
deleted file mode 100644
index 44dae41..0000000
--- a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
-
-
-
-
-