diff --git a/main.py b/main.py
index 5157144..7f410cd 100644
--- a/main.py
+++ b/main.py
@@ -1,12 +1,13 @@
import os
from pathlib import Path
import dotenv
+from sys import platform
from nostr_dvm.bot import Bot
from nostr_dvm.tasks import videogeneration_replicate_svd, imagegeneration_replicate_sdxl, textgeneration_llmlite, \
trending_notes_nostrband, discovery_inactive_follows, translation_google, textextraction_pdf, \
translation_libretranslate, textextraction_google, convert_media, imagegeneration_openai_dalle, texttospeech, \
- imagegeneration_mlx, advanced_search, textextraction_whisper_mlx
+ imagegeneration_sd21_mlx, advanced_search
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
@@ -139,10 +140,10 @@ def playground():
bot_config.SUPPORTED_DVMS.append(tts)
tts.run()
- from sys import platform
+
if platform == "darwin":
# Test with MLX for OSX M1/M2/M3 chips
- mlx = imagegeneration_mlx.build_example("SD with MLX", "mlx_sd", admin_config)
+ mlx = imagegeneration_sd21_mlx.build_example("SD with MLX", "mlx_sd", admin_config)
bot_config.SUPPORTED_DVMS.append(mlx)
mlx.run()
diff --git a/backends/__init__.py b/nostr_dvm/backends/mlx/__init__.py
similarity index 100%
rename from backends/__init__.py
rename to nostr_dvm/backends/mlx/__init__.py
diff --git a/backends/mlx/__init__.py b/nostr_dvm/backends/mlx/modules/__init__.py
similarity index 100%
rename from backends/mlx/__init__.py
rename to nostr_dvm/backends/mlx/modules/__init__.py
diff --git a/backends/mlx/stable_diffusion/__init__.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py
similarity index 100%
rename from backends/mlx/stable_diffusion/__init__.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py
diff --git a/backends/mlx/stable_diffusion/clip.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/clip.py
similarity index 100%
rename from backends/mlx/stable_diffusion/clip.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/clip.py
diff --git a/backends/mlx/stable_diffusion/config.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py
similarity index 100%
rename from backends/mlx/stable_diffusion/config.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/config.py
diff --git a/backends/mlx/stable_diffusion/model_io.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py
similarity index 100%
rename from backends/mlx/stable_diffusion/model_io.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py
diff --git a/backends/mlx/stable_diffusion/sampler.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py
similarity index 100%
rename from backends/mlx/stable_diffusion/sampler.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py
diff --git a/backends/mlx/stable_diffusion/tokenizer.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/tokenizer.py
similarity index 100%
rename from backends/mlx/stable_diffusion/tokenizer.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/tokenizer.py
diff --git a/backends/mlx/stable_diffusion/unet.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py
similarity index 100%
rename from backends/mlx/stable_diffusion/unet.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py
diff --git a/backends/mlx/stable_diffusion/vae.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py
similarity index 100%
rename from backends/mlx/stable_diffusion/vae.py
rename to nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py
diff --git a/nostr_dvm/backends/nova_server/modules/__init__.py b/nostr_dvm/backends/nova_server/modules/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py
new file mode 100644
index 0000000..217f5f3
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py
@@ -0,0 +1,129 @@
+"""StableDiffusionXL Module
+"""
+import gc
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+
+from nova_utils.interfaces.server_module import Processor
+
+# Setting defaults
+_default_options = {"kind": "prompt", "mode": "fast" }
+
+# TODO: add log infos,
+class ImageInterrogator(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+
+ from PIL import Image as PILImage
+ import torch
+
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+ kind = self.options['kind'] #"prompt" #"analysis" #prompt
+ mode = self.options['mode']
+ #url = self.current_session.input_data['input_image_url'].data[0]
+ #print(url)
+ input_image = self.current_session.input_data['input_image'].data
+ init_image = PILImage.fromarray(input_image)
+ mwidth = 256
+ mheight = 256
+
+
+ w = mwidth
+ h = mheight
+ if init_image.width > init_image.height:
+ scale = float(init_image.height / init_image.width)
+ w = mwidth
+ h = int(mheight * scale)
+ elif init_image.width < init_image.height:
+ scale = float(init_image.width / init_image.height)
+ w = int(mwidth * scale)
+ h = mheight
+ else:
+ w = mwidth
+ h = mheight
+
+ init_image = init_image.resize((w, h))
+
+ from clip_interrogator import Config, Interrogator
+
+ config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
+
+
+ if kind == "analysis":
+ ci = Interrogator(config)
+
+
+ image_features = ci.image_to_features(init_image)
+
+ top_mediums = ci.mediums.rank(image_features, 5)
+ top_artists = ci.artists.rank(image_features, 5)
+ top_movements = ci.movements.rank(image_features, 5)
+ top_trendings = ci.trendings.rank(image_features, 5)
+ top_flavors = ci.flavors.rank(image_features, 5)
+
+ medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
+ artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
+ movement_ranks = {movement: sim for movement, sim in
+ zip(top_movements, ci.similarities(image_features, top_movements))}
+ trending_ranks = {trending: sim for trending, sim in
+ zip(top_trendings, ci.similarities(image_features, top_trendings))}
+ flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
+
+ result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
+
+ print(result)
+ return result
+ else:
+
+ ci = Interrogator(config)
+ ci.config.blip_num_beams = 64
+ ci.config.chunk_size = 2024
+ ci.config.clip_offload = True
+ ci.config.apply_low_vram_defaults()
+ #MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
+ ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
+
+ image = init_image
+ if mode == 'best':
+ prompt = ci.interrogate(image)
+ elif mode == 'classic':
+ prompt = ci.interrogate_classic(image)
+ elif mode == 'fast':
+ prompt = ci.interrogate_fast(image)
+ elif mode == 'negative':
+ prompt = ci.interrogate_negative(image)
+
+ #print(str(prompt))
+ return prompt
+
+
+ # config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
+ # ci = Interrogator(config)
+ # "ViT-L-14/openai"))
+ # "ViT-g-14/laion2B-s34B-b88K"))
+
+
+ def to_output(self, data: dict):
+ import numpy as np
+ self.current_session.output_data_templates['output'].data = np.array([data])
+ return self.current_session.output_data_templates
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer
new file mode 100644
index 0000000..216205c
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md b/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md
new file mode 100644
index 0000000..ec092db
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md
@@ -0,0 +1,11 @@
+#Clip Interogator
+
+This modules provides prompt generation based on images
+
+* https://huggingface.co/spaces/pharmapsychotic/CLIP-Interrogator
+
+## Options
+
+- `kind`: string, identifier of the kind of processing
+ - `prompt`: Generates a prompt from image
+ - `analysis`: Generates a categorical analysis
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt
new file mode 100644
index 0000000..a9b489d
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt
@@ -0,0 +1,5 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.1
+clip_interrogator
+git+https://github.com/huggingface/diffusers.git
diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py
new file mode 100644
index 0000000..adf3132
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py
@@ -0,0 +1,12 @@
+""" Clip Interrorgator
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py b/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py
new file mode 100644
index 0000000..32ec7c8
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py
@@ -0,0 +1,152 @@
+"""RealESRGan Module
+"""
+
+import os
+import glob
+import sys
+from nova_utils.interfaces.server_module import Processor
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+import numpy as np
+
+
+
+from realesrgan import RealESRGANer
+from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+import cv2
+from PIL import Image as PILImage
+
+
+# Setting defaults
+_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
+
+# TODO: add log infos,
+class RealESRGan(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+ self.model_path = None #Maybe need this later for manual path
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ input_image = self.current_session.input_data['input_image'].data
+
+
+ try:
+ model, netscale, file_url = self.manageModel(str(self.options['model']))
+
+ if self.model_path is not None:
+ model_path = self.model_path
+ else:
+ model_path = os.path.join('weights', self.options['model'] + '.pth')
+ if not os.path.isfile(model_path):
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+ for url in file_url:
+ # model_path will be updated
+ model_path = load_file_from_url(
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+
+ # use dni to control the denoise strength
+ dni_weight = None
+ if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
+ model_path = [model_path, wdn_model_path]
+ dni_weight = [float(self.options['denoise_strength']), 1 - float(self.options['denoise_strength'])]
+
+ half = True
+ if self.options["compute_type"] == "fp32":
+ half=False
+
+
+ upsampler = RealESRGANer(
+ scale=netscale,
+ model_path=model_path,
+ dni_weight=dni_weight,
+ model=model,
+ tile= int(self.options['tile']),
+ tile_pad=int(self.options['tile_pad']),
+ pre_pad=int(self.options['pre_pad']),
+ half=half,
+ gpu_id=None) #Can be set if multiple gpus are available
+
+ if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
+ from gfpgan import GFPGANer
+ face_enhancer = GFPGANer(
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
+ upscale=int(self.options['outscale']),
+ arch='clean',
+ channel_multiplier=2,
+ bg_upsampler=upsampler)
+
+
+ pilimage = PILImage.fromarray(input_image)
+ img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
+ try:
+ if bool(self.options['face_enhance']):
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
+ else:
+ output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
+ except RuntimeError as error:
+ print('Error', error)
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
+
+ output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
+
+ return output
+
+
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
+
+
+ def manageModel(self, model_name):
+ if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
+ elif model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
+ elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
+ elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+ netscale = 2
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
+ elif model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
+ elif model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = [
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
+ ]
+
+ return model, netscale, file_url
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer
new file mode 100644
index 0000000..b3bf12f
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py b/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py
new file mode 100644
index 0000000..0a8cc43
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py
@@ -0,0 +1,166 @@
+import argparse
+import cv2
+import glob
+import os
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+
+from realesrgan import RealESRGANer
+from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+
+
+def main():
+ """Inference demo for Real-ESRGAN.
+ """
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
+ parser.add_argument(
+ '-n',
+ '--model_name',
+ type=str,
+ default='RealESRGAN_x4plus',
+ help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
+ 'realesr-animevideov3 | realesr-general-x4v3'))
+ parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
+ parser.add_argument(
+ '-dn',
+ '--denoise_strength',
+ type=float,
+ default=0.5,
+ help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
+ 'Only used for the realesr-general-x4v3 model'))
+ parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
+ parser.add_argument(
+ '--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
+ parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
+ parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
+ parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
+ parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
+ parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
+ parser.add_argument(
+ '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
+ parser.add_argument(
+ '--alpha_upsampler',
+ type=str,
+ default='realesrgan',
+ help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
+ parser.add_argument(
+ '--ext',
+ type=str,
+ default='auto',
+ help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
+ parser.add_argument(
+ '-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
+
+ args = parser.parse_args()
+
+ # determine models according to model names
+ args.model_name = args.model_name.split('.')[0]
+ if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
+ elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
+ elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
+ elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+ netscale = 2
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
+ elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
+ elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = [
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
+ ]
+
+ # determine model paths
+ if args.model_path is not None:
+ model_path = args.model_path
+ else:
+ model_path = os.path.join('weights', args.model_name + '.pth')
+ if not os.path.isfile(model_path):
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+ for url in file_url:
+ # model_path will be updated
+ model_path = load_file_from_url(
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+
+ # use dni to control the denoise strength
+ dni_weight = None
+ if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
+ model_path = [model_path, wdn_model_path]
+ dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
+
+ # restorer
+ upsampler = RealESRGANer(
+ scale=netscale,
+ model_path=model_path,
+ dni_weight=dni_weight,
+ model=model,
+ tile=args.tile,
+ tile_pad=args.tile_pad,
+ pre_pad=args.pre_pad,
+ half=not args.fp32,
+ gpu_id=args.gpu_id)
+
+ if args.face_enhance: # Use GFPGAN for face enhancement
+ from gfpgan import GFPGANer
+ face_enhancer = GFPGANer(
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
+ upscale=args.outscale,
+ arch='clean',
+ channel_multiplier=2,
+ bg_upsampler=upsampler)
+ os.makedirs(args.output, exist_ok=True)
+
+ if os.path.isfile(args.input):
+ paths = [args.input]
+ else:
+ paths = sorted(glob.glob(os.path.join(args.input, '*')))
+
+ for idx, path in enumerate(paths):
+ imgname, extension = os.path.splitext(os.path.basename(path))
+ print('Testing', idx, imgname)
+
+ img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+ if len(img.shape) == 3 and img.shape[2] == 4:
+ img_mode = 'RGBA'
+ else:
+ img_mode = None
+
+ try:
+ if args.face_enhance:
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
+ else:
+ output, _ = upsampler.enhance(img, outscale=args.outscale)
+ except RuntimeError as error:
+ print('Error', error)
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
+ else:
+ if args.ext == 'auto':
+ extension = extension[1:]
+ else:
+ extension = args.ext
+ if img_mode == 'RGBA': # RGBA images should be saved in png format
+ extension = 'png'
+ if args.suffix == '':
+ save_path = os.path.join(args.output, f'{imgname}.{extension}')
+ else:
+ save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
+ cv2.imwrite(save_path, output)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt
new file mode 100644
index 0000000..0cf3e2b
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt
@@ -0,0 +1,13 @@
+realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0
+torchvision
+basicsr>=1.4.2
+facexlib>=0.2.5
+gfpgan>=1.3.5
+numpy
+opencv-python
+Pillow
+tqdm
+git+https://github.com/huggingface/diffusers.git
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/version.py b/nostr_dvm/backends/nova_server/modules/image_upscale/version.py
new file mode 100644
index 0000000..7963e09
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/image_upscale/version.py
@@ -0,0 +1,12 @@
+""" RealESRGan
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py
new file mode 100644
index 0000000..919e1b1
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py
@@ -0,0 +1,100 @@
+def build_lora_xl(lora, prompt, lora_weight):
+ existing_lora = False
+ if lora == "3drenderstyle":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "3d style, 3d render, " + prompt + " "
+ existing_lora = True
+
+ if lora == "psychedelicnoir":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " >"
+ existing_lora = True
+
+ if lora == "wojak":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = ", " + prompt + ", wojak"
+ existing_lora = True
+
+ if lora == "dreamarts":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = ", " + prompt
+ existing_lora = True
+
+ if lora == "voxel":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "voxel style, " + prompt + " "
+ existing_lora = True
+
+ if lora == "kru3ger":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "kru3ger_style, " + prompt + ""
+ existing_lora = True
+
+ if lora == "inkpunk":
+ if lora_weight == "":
+ lora_weight = "0.5"
+ prompt = "inkpunk style, " + prompt + " "
+ existing_lora = True
+
+ if lora == "inkscenery":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = " ink scenery, " + prompt + " "
+ existing_lora = True
+
+ if lora == "inkpainting":
+ if lora_weight == "":
+ lora_weight = "0.7"
+ prompt = "painting style, " + prompt + " ,"
+ existing_lora = True
+
+ if lora == "timburton":
+ if lora_weight == "":
+ lora_weight = "1.27"
+ pencil_weight = "1.15"
+ prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
+ existing_lora = True
+
+ if lora == "pixelart":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " (flat shading:1.2), (minimalist:1.4), "
+ existing_lora = True
+
+ if lora == "pepe":
+ if lora_weight == "":
+ lora_weight = "0.8"
+ prompt = prompt + " , pepe"
+ existing_lora = True
+
+ if lora == "bettertext":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " ,"
+ existing_lora = True
+
+ if lora == "mspaint":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "MSPaint drawing " + prompt +">"
+ existing_lora = True
+
+ if lora == "woodfigure":
+ if lora_weight == "":
+ lora_weight = "0.7"
+ prompt = prompt + ",woodfigurez,artistic style "
+ existing_lora = True
+
+ if lora == "fireelement":
+ prompt = prompt + ",composed of fire elements, fire element"
+ existing_lora = True
+
+
+
+ return lora, prompt, existing_lora
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md
new file mode 100644
index 0000000..cccbe30
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md
@@ -0,0 +1,35 @@
+# Stable Diffusion XL
+
+This modules provides image generation based on prompts
+
+* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+
+## Options
+
+- `model`: string, identifier of the model to choose
+ - `stabilityai/stable-diffusion-xl-base-1.0`: Default Stable Diffusion XL model
+
+
+- `ratio`: Ratio of the output image
+ - `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
+
+- `high_noise_frac`: Denoising factor
+
+- `n_steps`: how many iterations should be performed
+
+## Example payload
+
+```python
+payload = {
+ 'trainerFilePath': 'modules\\stablediffusionxl\\stablediffusionxl.trainer',
+ 'server': '127.0.0.1',
+ 'data' = '[{"id":"input_prompt","type":"input","src":"user:text","prompt":"' + prompt +'","active":"True"},{"id":"negative_prompt","type":"input","src":"user:text","prompt":"' + negative_prompt +'","active":"True"},{"id":"output_image","type":"output","src":"file:image","uri":"' + outputfile+'","active":"True"}]'
+ 'optStr': 'model=stabilityai/stable-diffusion-xl-base-1.0;ratio=4-3'
+}
+
+import requests
+
+url = 'http://127.0.0.1:53770/predict'
+headers = {'Content-type': 'application/x-www-form-urlencoded'}
+requests.post(url, headers=headers, data=payload)
+```
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt
new file mode 100644
index 0000000..9b9e167
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt
@@ -0,0 +1,9 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0
+compel~=2.0.2
+git+https://github.com/huggingface/diffusers.git
+transformers
+accelerate
+numpy
+omegaconf
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py
new file mode 100644
index 0000000..bae89e8
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py
@@ -0,0 +1,176 @@
+"""StableDiffusionXL Module
+"""
+
+import gc
+import sys
+import os
+
+# Add local dir to path for relative imports
+sys.path.insert(0, os.path.dirname(__file__))
+
+from nova_utils.interfaces.server_module import Processor
+from nova_utils.utils.cache_utils import get_file
+from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
+from diffusers.utils import load_image
+import numpy as np
+from PIL import Image as PILImage
+from lora import build_lora_xl
+
+
+
+# Setting defaults
+_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
+
+# TODO: add log infos,
+class StableDiffusionXL(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ import torch
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ #input_image_url = self.current_session.input_data['input_image_url'].data
+ #input_image_url = ' '.join(input_image_url)
+ input_image = self.current_session.input_data['input_image'].data
+ input_prompt = self.current_session.input_data['input_prompt'].data
+ input_prompt = ' '.join(input_prompt)
+ negative_prompt = self.current_session.input_data['negative_prompt'].data
+ negative_prompt = ' '.join(negative_prompt)
+ # print("Input Image: " + input_image_url)
+ print("Input prompt: " + input_prompt)
+ print("Negative prompt: " + negative_prompt)
+
+ try:
+
+ model = self.options['model']
+ lora = self.options['lora']
+ #init_image = load_image(input_image_url).convert("RGB")
+ init_image = PILImage.fromarray(input_image)
+
+ mwidth = 1024
+ mheight = 1024
+ w = mwidth
+ h = mheight
+ if init_image.width > init_image.height:
+ scale = float(init_image.height / init_image.width)
+ w = mwidth
+ h = int(mheight * scale)
+ elif init_image.width < init_image.height:
+ scale = float(init_image.width / init_image.height)
+ w = int(mwidth * scale)
+ h = mheight
+ else:
+ w = mwidth
+ h = mheight
+
+ init_image = init_image.resize((w, h))
+
+ if lora != "" and lora != "None":
+ print("Loading lora...")
+
+ lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
+
+ from diffusers import AutoPipelineForImage2Image
+ import torch
+
+
+
+ #init_image = init_image.resize((int(w/2), int(h/2)))
+
+ pipe = AutoPipelineForImage2Image.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=torch.float16).to("cuda")
+
+ if existing_lora:
+ lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
+ if str(lora_uri) == "":
+ return "Lora not found"
+ lora_path = get_file(
+ fname=str(lora_uri.uri_id) + ".safetensors",
+ origin=lora_uri.uri_url,
+ file_hash=lora_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+ pipe.load_lora_weights(str(lora_path))
+ print("Loaded Lora: " + str(lora_path))
+
+ seed = 20000
+ generator = torch.manual_seed(seed)
+
+ #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+
+ image = pipe(
+ prompt=input_prompt,
+ negative_prompt=negative_prompt,
+ image=init_image,
+ generator=generator,
+ num_inference_steps=int(self.options['n_steps']),
+ image_guidance_scale=float(self.options['guidance_scale']),
+ strength=float(str(self.options['strength']))).images[0]
+
+
+ elif model == "stabilityai/stable-diffusion-xl-refiner-1.0":
+
+ pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+ model, torch_dtype=torch.float16, variant="fp16",
+ use_safetensors=True
+ )
+
+ n_steps = int(self.options['n_steps'])
+ transformation_strength = float(self.options['strength'])
+ cfg_scale = float(self.options['guidance_scale'])
+
+ pipe = pipe.to(self.device)
+ image = pipe(input_prompt, image=init_image,
+ negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
+
+ elif model == "timbrooks/instruct-pix2pix":
+ pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
+ safety_checker=None)
+
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+
+ pipe.to(self.device)
+ n_steps = int(self.options['n_steps'])
+ cfg_scale = float(self.options['guidance_scale'])
+ image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
+
+
+ if torch.cuda.is_available():
+ del pipe
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+
+ numpy_array = np.array(image)
+ return numpy_array
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
+
+
+
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
new file mode 100644
index 0000000..b6f4167
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py
new file mode 100644
index 0000000..3f446eb
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py
@@ -0,0 +1,242 @@
+"""StableDiffusionXL Module
+"""
+import gc
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from ssl import Options
+from nova_utils.interfaces.server_module import Processor
+from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, logging
+from compel import Compel, ReturnedEmbeddingsType
+from nova_utils.utils.cache_utils import get_file
+import numpy as np
+PYTORCH_ENABLE_MPS_FALLBACK = 1
+
+import torch
+from PIL import Image
+from lora import build_lora_xl
+logging.disable_progress_bar()
+logging.enable_explicit_format()
+#logging.set_verbosity_info()
+
+
+# Setting defaults
+_default_options = {"model": "stabilityai/stable-diffusion-xl-base-1.0", "ratio": "1-1", "width": "", "height":"", "high_noise_frac" : "0.8", "n_steps" : "35", "lora" : "" }
+
+# TODO: add log infos,
+class StableDiffusionXL(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ self._device = ("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_built() else "cpu"))
+ self.variant = "fp16"
+ self.torch_d_type = torch.float16
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ input_prompt = self.current_session.input_data['input_prompt'].data
+ input_prompt = ' '.join(input_prompt)
+ negative_prompt = self.current_session.input_data['negative_prompt'].data
+ negative_prompt = ' '.join(negative_prompt)
+ new_width = 0
+ new_height = 0
+ print("Input prompt: " + input_prompt)
+ print("Negative prompt: " + negative_prompt)
+
+ try:
+ if self.options['width'] != "" and self.options['height'] != "":
+ new_width = int(self.options['width'])
+ new_height = int(self.options['height'])
+ ratiow, ratioh = self.calculate_aspect(new_width, new_height)
+ print("Ratio:" + str(ratiow) + ":" + str(ratioh))
+
+ else:
+ ratiow = str(self.options['ratio']).split('-')[0]
+ ratioh =str(self.options['ratio']).split('-')[1]
+
+ model = self.options["model"]
+ lora = self.options["lora"]
+ mwidth = 1024
+ mheight = 1024
+
+ height = mheight
+ width = mwidth
+
+ ratiown = int(ratiow)
+ ratiohn= int(ratioh)
+
+ if ratiown > ratiohn:
+ height = int((ratiohn/ratiown) * float(width))
+ elif ratiown < ratiohn:
+ width = int((ratiown/ratiohn) * float(height))
+ elif ratiown == ratiohn:
+ width = height
+
+
+ print("Processing Output width: " + str(width) + " Output height: " + str(height))
+
+
+
+
+ if model == "stabilityai/stable-diffusion-xl-base-1.0":
+ base = StableDiffusionXLPipeline.from_pretrained(model, torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
+ print("Loaded model: " + model)
+
+ else:
+
+ model_uri = [ x for x in self.trainer.meta_uri if x.uri_id == model][0]
+ if str(model_uri) == "":
+ return "Model not found"
+
+ model_path = get_file(
+ fname=str(model_uri.uri_id) + ".safetensors",
+ origin=model_uri.uri_url,
+ file_hash=model_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+
+ print(str(model_path))
+
+
+ base = StableDiffusionXLPipeline.from_single_file(str(model_path), torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
+ print("Loaded model: " + model)
+
+ if lora != "" and lora != "None":
+ print("Loading lora...")
+ lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
+
+ if existing_lora:
+ lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
+ if str(lora_uri) == "":
+ return "Lora not found"
+ lora_path = get_file(
+ fname=str(lora_uri.uri_id) + ".safetensors",
+ origin=lora_uri.uri_url,
+ file_hash=lora_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+
+ base.load_lora_weights(str(lora_path))
+ print("Loaded Lora: " + str(lora_path))
+
+ refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
+ text_encoder_2=base.text_encoder_2,
+ vae=base.vae,
+ torch_dtype=self.torch_d_type,
+ use_safetensors=True,
+ variant=self.variant,
+ )
+
+
+ compel_base = Compel(
+ tokenizer=[base.tokenizer, base.tokenizer_2],
+ text_encoder=[base.text_encoder, base.text_encoder_2],
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+ requires_pooled=[False, True],
+ )
+
+ compel_refiner = Compel(
+ tokenizer=[refiner.tokenizer_2],
+ text_encoder=[refiner.text_encoder_2],
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+ requires_pooled=[True])
+
+ conditioning, pooled = compel_base(input_prompt)
+ negative_conditioning, negative_pooled = compel_base(negative_prompt)
+
+ conditioning_refiner, pooled_refiner = compel_refiner(input_prompt)
+ negative_conditioning_refiner, negative_pooled_refiner = compel_refiner(
+ negative_prompt)
+
+
+ n_steps = int(self.options['n_steps'])
+ high_noise_frac = float(self.options['high_noise_frac'])
+
+
+ #base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
+
+
+
+ img = base(
+ prompt_embeds=conditioning,
+ pooled_prompt_embeds=pooled,
+ negative_prompt_embeds=negative_conditioning,
+ negative_pooled_prompt_embeds=negative_pooled,
+ width=width,
+ height=height,
+ num_inference_steps=n_steps,
+ denoising_end=high_noise_frac,
+ output_type="latent",
+ ).images
+
+ if torch.cuda.is_available():
+ del base
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+ refiner.to(self.device)
+ # refiner.enable_model_cpu_offload()
+ image = refiner(
+ prompt_embeds=conditioning_refiner,
+ pooled_prompt_embeds=pooled_refiner,
+ negative_prompt_embeds=negative_conditioning_refiner,
+ negative_pooled_prompt_embeds=negative_pooled_refiner,
+ num_inference_steps=n_steps,
+ denoising_start=high_noise_frac,
+ num_images_per_prompt=1,
+ image=img,
+ ).images[0]
+
+ if torch.cuda.is_available():
+ del refiner
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+ if new_height != 0 or new_width != 0 and (new_width != mwidth or new_height != mheight) :
+ print("Resizing to width: " + str(new_width) + " height: " + str(new_height))
+ image = image.resize((new_width, new_height), Image.LANCZOS)
+
+ numpy_array = np.array(image)
+ return numpy_array
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+ def calculate_aspect(self, width: int, height: int):
+ def gcd(a, b):
+ """The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
+ return a if b == 0 else gcd(b, a % b)
+
+ r = gcd(width, height)
+ x = int(width / r)
+ y = int(height / r)
+
+ return x, y
+
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer
new file mode 100644
index 0000000..0e86e7e
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py
new file mode 100644
index 0000000..bba6553
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py
@@ -0,0 +1,12 @@
+""" Stable Diffusion XL
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py b/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md b/nostr_dvm/backends/nova_server/modules/whisperx/readme.md
new file mode 100644
index 0000000..ffe67a3
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/whisperx/readme.md
@@ -0,0 +1,52 @@
+# WhisperX
+
+This modules provides fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and
+speaker diarization.
+
+* https://github.com/m-bain/whisperX
+
+## Options
+
+- `model`: string, identifier of the model to choose, sorted ascending in required (V)RAM:
+ - `tiny`, `tiny.en`
+ - `base`, `base.en`
+ - `small`, `small.en`
+ - `medium`, `medium.en`
+ - `large-v1`
+ - `large-v2`
+
+- `alignment_mode`: string, alignment method to use
+ - `raw` Segments as identified by Whisper
+ - `segment` Improved segmentation using separate alignment model. Roughly equivalent to sentence alignment.
+ - `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
+
+- `language`: language code for transcription and alignment models. Supported languages:
+ - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
+ - `None`: auto-detect language from first 30 seconds of audio
+
+- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
+
+## Examples
+
+### Request
+
+```python
+import requests
+import json
+
+payload = {
+ "jobID" : "whisper_transcript",
+ "data": json.dumps([
+ {"src":"file:stream:audio", "type":"input", "id":"audio", "uri":"path/to/my/file.wav"},
+ {"src":"file:annotation:free", "type":"output", "id":"transcript", "uri":"path/to/my/transcript.annotation"}
+ ]),
+ "trainerFilePath": "modules\\whisperx\\whisperx_transcript.trainer",
+}
+
+
+url = 'http://127.0.0.1:8080/process'
+headers = {'Content-type': 'application/x-www-form-urlencoded'}
+x = requests.post(url, headers=headers, data=payload)
+print(x.text)
+
+```
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt
new file mode 100644
index 0000000..cd86386
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt
@@ -0,0 +1,7 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0+cu118
+torchvision>= 0.15.1+cu118
+torchaudio >= 2.0.0+cu118
+pyannote-audio @ git+https://github.com/shelm/pyannote-audio.git@d7b4de3
+whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/version.py b/nostr_dvm/backends/nova_server/modules/whisperx/version.py
new file mode 100644
index 0000000..aa37301
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/whisperx/version.py
@@ -0,0 +1,12 @@
+""" WhisperX
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '1'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py
new file mode 100644
index 0000000..f24e63e
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py
@@ -0,0 +1,124 @@
+"""WhisperX Module
+"""
+from nova_utils.interfaces.server_module import Processor
+import sys
+
+# Setting defaults
+_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
+
+# supported language codes, cf. whisperx/alignment.py
+# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
+# {'vi', 'uk', 'pl', 'ur', 'ru', 'ko', 'en', 'zh', 'es', 'it', 'el', 'te', 'da', 'he', 'fa', 'pt', 'de',
+# 'fr', 'tr', 'nl', 'cs', 'hu', 'fi', 'ar', 'ja', None}
+
+class WhisperX(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.session_manager = None
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ assert len(self.input) == 1 and len(self.output) == 1
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_manager) -> dict:
+ import whisperx
+ import torch
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.session_manager = self.get_session_manager(ds_manager)
+ input_audio = self.session_manager.input_data['audio']
+
+ # sliding window will be applied by WhisperX
+ audio = whisperx.load_audio(input_audio.meta_data.file_path)
+
+ # transcribe with original whisper
+ try:
+ model = whisperx.load_model(self.options["model"], self.device, compute_type=self.options['compute_type'],
+ language=self.options['language'])
+ except ValueError:
+ print(f'Your hardware does not support {self.options["compute_type"]} - fallback to float32')
+ sys.stdout.flush()
+ model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
+ language=self.options['language'])
+
+ result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
+
+ # delete model if low on GPU resources
+ import gc; gc.collect(); torch.cuda.empty_cache(); del model
+
+ if not self.options["alignment_mode"] == "raw":
+ # load alignment model and metadata
+ model_a, metadata = whisperx.load_align_model(
+ language_code=result["language"], device=self.device
+ )
+
+ # align whisper output
+ result_aligned = whisperx.align(
+ result["segments"], model_a, metadata, audio, self.device
+ )
+ result = result_aligned
+
+ # delete model if low on GPU resources
+ import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
+
+ return result
+
+ def to_output(self, data: dict):
+ def _fix_missing_timestamps(data):
+ """
+ https://github.com/m-bain/whisperX/issues/253
+ Some characters might miss timestamps and recognition scores. This function adds estimated time stamps assuming a fixed time per character of 65ms.
+ Confidence for each added timestamp will be 0.
+ Args:
+ data (dictionary): output dictionary as returned by process_data
+ """
+ last_end = 0
+ for s in data["segments"]:
+ for w in s["words"]:
+ if "end" in w.keys():
+ last_end = w["end"]
+ else:
+ #TODO: rethink lower bound for confidence; place word centred instead of left aligned
+ w["start"] = last_end
+ last_end += 0.065
+ w["end"] = last_end
+ #w["score"] = 0.000
+ w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
+
+ def _hmean(scores):
+ if len(scores) > 0:
+ prod = scores[0]
+ for s in scores[1:]:
+ prod *= s
+ prod = prod**(1/len(scores))
+ else:
+ prod = 0
+ return prod
+
+ if (
+ self.options["alignment_mode"] == "word"
+ or self.options["alignment_mode"] == "segment"
+ ):
+ _fix_missing_timestamps(data)
+
+ if self.options["alignment_mode"] == "word":
+ anno_data = [
+ (w["start"], w["end"], w["word"], w["score"])
+ for w in data["word_segments"]
+ ]
+ else:
+ anno_data = [
+ #(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
+ (w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
+ ]
+
+ # convert to milliseconds
+ anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
+ out = self.session_manager.output_data_templates[self.output.io_id]
+ out.data = anno_data
+ return self.session_manager.output_data_templates
diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer
new file mode 100644
index 0000000..44dae41
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/nostr_dvm/backends/nova_server/run_windows.cmd b/nostr_dvm/backends/nova_server/run_windows.cmd
new file mode 100644
index 0000000..f274dbc
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/run_windows.cmd
@@ -0,0 +1,2 @@
+call venv/Scripts/activate
+nova-server
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/setup_windows.cmd b/nostr_dvm/backends/nova_server/setup_windows.cmd
new file mode 100644
index 0000000..04f49db
--- /dev/null
+++ b/nostr_dvm/backends/nova_server/setup_windows.cmd
@@ -0,0 +1,3 @@
+python -m venv venv
+call venv/Scripts/activate
+pip install hcai-nova-server
\ No newline at end of file
diff --git a/nostr_dvm/backends/nova_server/utils.py b/nostr_dvm/backends/nova_server/utils.py
index 2796cc8..b94ea09 100644
--- a/nostr_dvm/backends/nova_server/utils.py
+++ b/nostr_dvm/backends/nova_server/utils.py
@@ -11,7 +11,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster
"""
This file contains basic calling functions for ML tasks that are outsourced to nova server. It is an Open-Source backend
-that enables running models locally based on preefined modules, by accepting a request form.
+that enables running models locally based on preefined modules, by accepting a request.
Modules are deployed in in separate virtual environments so dependencies won't conflict.
"""
diff --git a/nostr_dvm/dvm.py b/nostr_dvm/dvm.py
index 44febf7..8a5110a 100644
--- a/nostr_dvm/dvm.py
+++ b/nostr_dvm/dvm.py
@@ -1,9 +1,8 @@
-import importlib
import json
import os
import subprocess
from datetime import timedelta
-from pathlib import Path
+from sys import platform
from nostr_sdk import PublicKey, Keys, Client, Tag, Event, EventBuilder, Filter, HandleNotification, Timestamp, \
init_logger, LogLevel, Options, nip04_encrypt
@@ -475,8 +474,11 @@ class DVM:
request_form = dvm.create_request_from_nostr_event(job_event, self.client, self.dvm_config)
if dvm_config.USE_OWN_VENV:
- python_bin = (r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
- + "/bin/python")
+ python_location = "/bin/python"
+ if platform == "win32":
+ python_location = "/Scripts/python"
+ python_bin = ( r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
+ + python_location)
retcode = subprocess.call([python_bin, dvm_config.SCRIPT,
'--request', json.dumps(request_form),
'--identifier', dvm_config.IDENTIFIER,
diff --git a/nostr_dvm/interfaces/dvmtaskinterface.py b/nostr_dvm/interfaces/dvmtaskinterface.py
index 3035ef4..b4f720c 100644
--- a/nostr_dvm/interfaces/dvmtaskinterface.py
+++ b/nostr_dvm/interfaces/dvmtaskinterface.py
@@ -3,6 +3,7 @@ import os
import subprocess
from subprocess import run
import sys
+from sys import platform
from threading import Thread
from venv import create
from nostr_sdk import Keys
@@ -59,15 +60,18 @@ class DVMTaskInterface:
def install_dependencies(self, dvm_config):
if dvm_config.SCRIPT != "":
if self.dvm_config.USE_OWN_VENV:
-
dir = r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
+ pip_location = 'bin/pip'
+ if platform == "win32":
+ pip_location = dir + '/Scripts/pip'
+
if not os.path.isdir(dir):
- print(dir)
+ print("Creating Venv: " + dir)
create(dir, with_pip=True, upgrade_deps=True)
self.dependencies.append(("nostr-dvm", "nostr-dvm"))
for (module, package) in self.dependencies:
print("Installing Venv Module: " + module)
- run(["bin/pip", "install", "--force-reinstall", package], cwd=dir)
+ run([pip_location, "install", "--upgrade", package], cwd=dir)
else:
for module, package in self.dependencies:
if module != "nostr-dvm":
diff --git a/nostr_dvm/tasks/README.md b/nostr_dvm/tasks/README.md
index b8f794f..b2c1aea 100644
--- a/nostr_dvm/tasks/README.md
+++ b/nostr_dvm/tasks/README.md
@@ -6,12 +6,27 @@ Reusable backend functions can be defined in backends (e.g. API calls)
Current List of Tasks:
-| Module | Kind | Description | Backend |
-|-------------------------|------|------------------------------------------------|-------------|
-| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local |
-| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google | googleAPI |
-| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI |
-| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI |
-| ImageGenerationDALLE | 5100 | Generates an Image with Dall-E | openAI |
-| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI |
-| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local |
\ No newline at end of file
+| Module | Kind | Description | Backend |
+|------------------------------|--------------|------------------------------------------------------------|------------------|
+| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local |
+| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google Services | googleAPI |
+| SpeechToTextWhisperX | 5000 | Extracts Speech from Media files via local WhisperX | nserver |
+| ImageInterrogator | 5000 | Extracts Prompts from Images | nserver |
+| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI |
+| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI |
+| TextGenerationLLMLite | 5050 | Chat with LLM backends like Ollama, ChatGPT etc | local/api/openai |
+| ImageGenerationSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | nserver |
+| ImageGenerationSDXLIMG2IMG | 5100 | Generates an Image from an Image with Stable Diffusion XL | nserver |
+| ImageGenerationReplicateSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | replicate |
+| ImageGenerationMLX | 5100 | Generates an Image with Stable Diffusion 2.1 on M1/2/3 Mac | mlx |
+| ImageGenerationDALLE | 5100 | Generates an Image with OpenAI's Dall-E | openAI |
+| ImageUpscale | 5100 | Upscales an Image | nserver |
+| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI |
+| VideoGenerationReplicateSVD | 5202 (inoff) | Generates a Video from an Image | replicate |
+| TextToSpeech | 5250 (inoff) | Generate Audio from a prompt | local |
+| TrendingNotesNostrBand | 5300 | Show trending notes on nostr.band | nostr.band api |
+| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local |
+| AdvancedSearch | 5302 (inoff) | Search Content on nostr.band | local |
+
+Kinds with (inoff) are suggestions and not merged yet and might change in the future.
+Backends might require to add an API key to the .env file or run an external server/framework the dvm will communicate with.
\ No newline at end of file
diff --git a/nostr_dvm/tasks/advanced_search.py b/nostr_dvm/tasks/advanced_search.py
index 217ea18..2b11a08 100644
--- a/nostr_dvm/tasks/advanced_search.py
+++ b/nostr_dvm/tasks/advanced_search.py
@@ -47,8 +47,8 @@ class AdvancedSearch(DVMTaskInterface):
# default values
user = ""
- since_days = 800 #days ago
- until_days = 0 #days ago
+ since_days = 800 # days ago
+ until_days = 0 # days ago
search = ""
max_results = 20
@@ -98,11 +98,14 @@ class AdvancedSearch(DVMTaskInterface):
search_until = Timestamp.from_secs(dif)
if options["user"] == "":
- notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"])
+ notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit(
+ options["max_results"])
elif options["search"] == "":
- notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until(search_until).limit(options["max_results"])
+ notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until(
+ search_until).limit(options["max_results"])
else:
- notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"])
+ notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since(
+ search_since).until(search_until).limit(options["max_results"])
events = cli.get_events_of([notes_filter], timedelta(seconds=5))
@@ -116,8 +119,6 @@ class AdvancedSearch(DVMTaskInterface):
return json.dumps(result_list)
-
-
def post_process(self, result, event):
"""Overwrite the interface function to return a social client readable format, if requested"""
for tag in event.tags():
@@ -170,9 +171,9 @@ def build_example(name, identifier, admin_config):
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
-
+
return AdvancedSearch(name=name, dvm_config=dvm_config, nip89config=nip89config,
- admin_config=admin_config)
+ admin_config=admin_config)
def process_venv():
@@ -182,5 +183,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/convert_media.py b/nostr_dvm/tasks/convert_media.py
index fa8655c..970bbc9 100644
--- a/nostr_dvm/tasks/convert_media.py
+++ b/nostr_dvm/tasks/convert_media.py
@@ -8,7 +8,6 @@ from nostr_dvm.utils.nip89_utils import NIP89Config
from nostr_dvm.utils.mediasource_utils import organize_input_media_data
from nostr_dvm.utils.output_utils import upload_media_to_hoster
-
"""
This File contains a Module convert media locally
@@ -98,10 +97,10 @@ def build_example(name, identifier, admin_config):
nip89config = NIP89Config()
-
return MediaConverter(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -111,4 +110,4 @@ def process_venv():
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/discovery_inactive_follows.py b/nostr_dvm/tasks/discovery_inactive_follows.py
index 961380e..5dd2e47 100644
--- a/nostr_dvm/tasks/discovery_inactive_follows.py
+++ b/nostr_dvm/tasks/discovery_inactive_follows.py
@@ -163,6 +163,7 @@ class DiscoverInactiveFollows(DVMTaskInterface):
# if not text/plain, don't post-process
return result
+
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -196,6 +197,7 @@ def build_example(name, identifier, admin_config):
return DiscoverInactiveFollows(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -203,5 +205,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/imagegeneration_openai_dalle.py b/nostr_dvm/tasks/imagegeneration_openai_dalle.py
index 86ebdcb..fc03938 100644
--- a/nostr_dvm/tasks/imagegeneration_openai_dalle.py
+++ b/nostr_dvm/tasks/imagegeneration_openai_dalle.py
@@ -107,7 +107,6 @@ class ImageGenerationDALLE(DVMTaskInterface):
n=int(options['number']),
)
-
image_url = response.data[0].url
# rehost the result instead of relying on the openai link
response = requests.get(image_url)
@@ -162,8 +161,8 @@ def process_venv():
result = dvm.process(json.loads(args.request))
time.sleep(10)
-
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py b/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py
index 5c3cdec..eb72398 100644
--- a/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py
+++ b/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py
@@ -6,7 +6,6 @@ from PIL import Image
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -153,5 +152,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/imagegeneration_mlx.py b/nostr_dvm/tasks/imagegeneration_sd21_mlx.py
similarity index 97%
rename from nostr_dvm/tasks/imagegeneration_mlx.py
rename to nostr_dvm/tasks/imagegeneration_sd21_mlx.py
index 8743bb9..11de72c 100644
--- a/nostr_dvm/tasks/imagegeneration_mlx.py
+++ b/nostr_dvm/tasks/imagegeneration_sd21_mlx.py
@@ -12,7 +12,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster
from nostr_dvm.utils.zap_utils import get_price_per_sat
"""
-This File contains a Module to generate an Image on replicate and receive results back.
+This File contains a Module to generate an Image on Macs with M1/M2/M3 chips and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -95,7 +95,7 @@ class ImageGenerationMLX(DVMTaskInterface):
def process(self, request_form):
try:
import mlx.core as mx
- from backends.mlx.stable_diffusion import StableDiffusion
+ from nostr_dvm.backends.mlx.modules.stable_diffusion import StableDiffusion
options = DVMTaskInterface.set_options(request_form)
sd = StableDiffusion()
diff --git a/nostr_dvm/tasks/imagegeneration_sdxl.py b/nostr_dvm/tasks/imagegeneration_sdxl.py
new file mode 100644
index 0000000..532ab09
--- /dev/null
+++ b/nostr_dvm/tasks/imagegeneration_sdxl.py
@@ -0,0 +1,206 @@
+import json
+from multiprocessing.pool import ThreadPool
+
+from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
+from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
+from nostr_dvm.utils.admin_utils import AdminConfig
+from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
+from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from nostr_dvm.utils.definitions import EventDefinitions
+
+"""
+This File contains a module to transform Text input on n-server and receive results back.
+
+Accepted Inputs: Prompt (text)
+Outputs: An url to an Image
+Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable
+ -lora # loras (weights on top of models) voxel,
+"""
+
+
+class ImageGenerationSDXL(DVMTaskInterface):
+ KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
+ TASK: str = "text-to-image"
+ FIX_COST: float = 70
+
+ def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+ admin_config: AdminConfig = None, options=None):
+ super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+ def is_input_supported(self, tags):
+ for tag in tags:
+ if tag.as_vec()[0] == 'i':
+ input_value = tag.as_vec()[1]
+ input_type = tag.as_vec()[2]
+ if input_type != "text":
+ return False
+
+ elif tag.as_vec()[0] == 'output':
+ output = tag.as_vec()[1]
+ if (output == "" or
+ not (output == "image/png" or "image/jpg"
+ or output == "image/png;format=url" or output == "image/jpg;format=url")):
+ print("Output format not supported, skipping..")
+ return False
+
+ return True
+
+ def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+ request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
+ request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl.trainer'
+
+ prompt = ""
+ negative_prompt = ""
+ if self.options.get("default_model") and self.options.get("default_model") != "":
+ model = self.options['default_model']
+ else:
+ model = "stabilityai/stable-diffusion-xl-base-1.0"
+
+ ratio_width = "1"
+ ratio_height = "1"
+ width = ""
+ height = ""
+ if self.options.get("default_lora") and self.options.get("default_lora") != "":
+ lora = self.options['default_lora']
+ else:
+ lora = ""
+ lora_weight = ""
+ strength = ""
+ guidance_scale = ""
+ for tag in event.tags():
+ if tag.as_vec()[0] == 'i':
+ input_type = tag.as_vec()[2]
+ if input_type == "text":
+ prompt = tag.as_vec()[1]
+
+ elif tag.as_vec()[0] == 'param':
+ print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+ if tag.as_vec()[1] == "negative_prompt":
+ negative_prompt = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "lora":
+ lora = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "lora_weight":
+ lora_weight = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "strength":
+ strength = float(tag.as_vec()[2])
+ elif tag.as_vec()[1] == "guidance_scale":
+ guidance_scale = float(tag.as_vec()[2])
+ elif tag.as_vec()[1] == "ratio":
+ if len(tag.as_vec()) > 3:
+ ratio_width = (tag.as_vec()[2])
+ ratio_height = (tag.as_vec()[3])
+ elif len(tag.as_vec()) == 3:
+ split = tag.as_vec()[2].split(":")
+ ratio_width = split[0]
+ ratio_height = split[1]
+ # if size is set it will overwrite ratio.
+ elif tag.as_vec()[1] == "size":
+ if len(tag.as_vec()) > 3:
+ width = (tag.as_vec()[2])
+ height = (tag.as_vec()[3])
+ elif len(tag.as_vec()) == 3:
+ split = tag.as_vec()[2].split("x")
+ if len(split) > 1:
+ width = split[0]
+ height = split[1]
+ elif tag.as_vec()[1] == "model":
+ model = tag.as_vec()[2]
+
+ io_input = {
+ "id": "input_prompt",
+ "type": "input",
+ "src": "request:text",
+ "data": prompt
+ }
+ io_negative = {
+ "id": "negative_prompt",
+ "type": "input",
+ "src": "request:text",
+ "data": negative_prompt
+ }
+ io_output = {
+ "id": "output_image",
+ "type": "output",
+ "src": "request:image"
+ }
+
+ request_form['data'] = json.dumps([io_input, io_negative, io_output])
+
+ options = {
+ "model": model,
+ "ratio": ratio_width + '-' + ratio_height,
+ "width": width,
+ "height": height,
+ "strength": strength,
+ "guidance_scale": guidance_scale,
+ "lora": lora,
+ "lora_weight": lora_weight
+ }
+ request_form['options'] = json.dumps(options)
+
+ return request_form
+
+ def process(self, request_form):
+ try:
+ # Call the process route of n-server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
+ if bool(json.loads(response)['success']):
+ print("Job " + request_form['jobID'] + " sent to server")
+
+ pool = ThreadPool(processes=1)
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
+ result = thread.get()
+ return result
+
+ except Exception as e:
+ raise Exception(e)
+
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config, server_address, default_model="stabilityai/stable-diffusion-xl"
+ "-base-1.0", default_lora=""):
+ dvm_config = build_default_config(identifier)
+ dvm_config.USE_OWN_VENV = False
+ admin_config.LUD16 = dvm_config.LN_ADDRESS
+ # A module might have options it can be initialized with, here we set a default model, and the server
+ # address it should use. These parameters can be freely defined in the task component
+ options = {'default_model': default_model, 'default_lora': default_lora, 'server': server_address}
+
+ nip89info = {
+ "name": name,
+ "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
+ "about": "I draw images based on a prompt with a Model called unstable diffusion",
+ "encryptionSupported": True,
+ "cashuAccepted": True,
+ "nip90Params": {
+ "negative_prompt": {
+ "required": False,
+ "values": []
+ },
+ "ratio": {
+ "required": False,
+ "values": ["1:1", "4:3", "16:9", "3:4", "9:16", "10:16"]
+ }
+ }
+ }
+ nip89config = NIP89Config()
+ nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
+ nip89config.CONTENT = json.dumps(nip89info)
+
+ return ImageGenerationSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config,
+ admin_config=admin_config, options=options)
+
+
+def process_venv():
+ args = DVMTaskInterface.process_args()
+ dvm_config = build_default_config(args.identifier)
+ dvm = ImageGenerationSDXL(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
+ result = dvm.process(json.loads(args.request))
+ DVMTaskInterface.write_output(result, args.output)
+
+
+if __name__ == '__main__':
+ process_venv()
diff --git a/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py b/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py
new file mode 100644
index 0000000..1fdf30c
--- /dev/null
+++ b/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py
@@ -0,0 +1,235 @@
+import json
+from multiprocessing.pool import ThreadPool
+
+from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
+from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
+from nostr_dvm.utils.admin_utils import AdminConfig
+from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
+from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from nostr_dvm.utils.definitions import EventDefinitions
+
+"""
+This File contains a Module to transform Image (and Text) input on N-server and receive results back.
+
+Accepted Inputs: Prompt (text)
+Outputs: An url to an Image
+Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable
+ -lora # loras (weights on top of models) voxel,
+"""
+
+
+class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
+ KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
+ TASK: str = "image-to-image"
+ FIX_COST: float = 70
+
+ def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+ admin_config: AdminConfig = None, options=None):
+ super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+ def is_input_supported(self, tags):
+ hasurl = False
+ hasprompt = False
+ for tag in tags:
+ if tag.as_vec()[0] == 'i':
+ input_value = tag.as_vec()[1]
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ hasurl = True
+ elif input_type == "text":
+ hasprompt = True # Little optional when lora is set
+
+ elif tag.as_vec()[0] == 'output':
+ output = tag.as_vec()[1]
+ if (output == "" or
+ not (output == "image/png" or "image/jpg"
+ or output == "image/png;format=url" or output == "image/jpg;format=url")):
+ print("Output format not supported, skipping..")
+ return False
+
+ if not hasurl:
+ return False
+
+ return True
+
+ def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+ request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
+ request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl-img2img.trainer'
+
+ prompt = ""
+ negative_prompt = ""
+ url = ""
+ if self.options.get("default_model"):
+ model = self.options['default_model']
+ else:
+ model = "stabilityai/stable-diffusion-xl-refiner-1.0"
+
+ ratio_width = "1"
+ ratio_height = "1"
+ width = ""
+ height = ""
+
+ if self.options.get("default_lora") and self.options.get("default_lora") != "":
+ lora = self.options['default_lora']
+ else:
+ lora = ""
+
+ lora_weight = ""
+ if self.options.get("strength"):
+ strength = float(self.options['strength'])
+ else:
+ strength = 0.8
+ if self.options.get("guidance_scale"):
+ guidance_scale = float(self.options['guidance_scale'])
+ else:
+ guidance_scale = 11.0
+ for tag in event.tags():
+ if tag.as_vec()[0] == 'i':
+ input_type = tag.as_vec()[2]
+ if input_type == "text":
+ prompt = tag.as_vec()[1]
+ elif input_type == "url":
+ url = tag.as_vec()[1]
+
+ elif tag.as_vec()[0] == 'param':
+ print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+ if tag.as_vec()[1] == "negative_prompt":
+ negative_prompt = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "lora":
+ lora = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "lora_weight":
+ lora_weight = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "strength":
+ strength = float(tag.as_vec()[2])
+ elif tag.as_vec()[1] == "guidance_scale":
+ guidance_scale = float(tag.as_vec()[2])
+ elif tag.as_vec()[1] == "ratio":
+ if len(tag.as_vec()) > 3:
+ ratio_width = (tag.as_vec()[2])
+ ratio_height = (tag.as_vec()[3])
+ elif len(tag.as_vec()) == 3:
+ split = tag.as_vec()[2].split(":")
+ ratio_width = split[0]
+ ratio_height = split[1]
+ # if size is set it will overwrite ratio.
+ elif tag.as_vec()[1] == "size":
+ if len(tag.as_vec()) > 3:
+ width = (tag.as_vec()[2])
+ height = (tag.as_vec()[3])
+ elif len(tag.as_vec()) == 3:
+ split = tag.as_vec()[2].split("x")
+ if len(split) > 1:
+ width = split[0]
+ height = split[1]
+ elif tag.as_vec()[1] == "model":
+ model = tag.as_vec()[2]
+
+ io_input_image = {
+ "id": "input_image",
+ "type": "input",
+ "src": "url:Image",
+ "uri": url
+ }
+ io_input = {
+ "id": "input_prompt",
+ "type": "input",
+ "src": "request:text",
+ "data": prompt
+ }
+ io_negative = {
+ "id": "negative_prompt",
+ "type": "input",
+ "src": "request:text",
+ "data": negative_prompt
+ }
+ io_output = {
+ "id": "output_image",
+ "type": "output",
+ "src": "request:image"
+ }
+
+ request_form['data'] = json.dumps([io_input_image, io_input, io_negative, io_output])
+
+ options = {
+ "model": model,
+ "ratio": ratio_width + '-' + ratio_height,
+ "width": width,
+ "height": height,
+ "strength": strength,
+ "guidance_scale": guidance_scale,
+ "lora": lora,
+ "lora_weight": lora_weight,
+ "n_steps": 30
+ }
+ request_form['options'] = json.dumps(options)
+
+ return request_form
+
+ def process(self, request_form):
+ try:
+ # Call the process route of NOVA-Server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
+ if bool(json.loads(response)['success']):
+ print("Job " + request_form['jobID'] + " sent to server")
+
+ pool = ThreadPool(processes=1)
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
+ result = thread.get()
+ return result
+
+ except Exception as e:
+ raise Exception(e)
+
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config, server_address, default_lora="", strength=0.6):
+ dvm_config = build_default_config(identifier)
+ dvm_config.USE_OWN_VENV = False
+ admin_config.LUD16 = dvm_config.LN_ADDRESS
+
+ nip89info = {
+ "name": name,
+ "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
+ "about": "I convert an image to another image, kinda random for now. ",
+ "encryptionSupported": True,
+ "cashuAccepted": True,
+ "nip90Params": {
+ "negative_prompt": {
+ "required": False,
+ "values": []
+ },
+ "lora": {
+ "required": False,
+ "values": ["inkpunk", "timburton", "voxel"]
+ },
+ "strength": {
+ "required": False,
+ "values": []
+ }
+ }
+ }
+
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'default_lora': default_lora, 'strength': strength, 'server': server_address}
+
+ nip89config = NIP89Config()
+ nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
+ nip89config.CONTENT = json.dumps(nip89info)
+
+ return ImageGenerationSDXLIMG2IMG(name=name, dvm_config=dvm_config, nip89config=nip89config,
+ admin_config=admin_config, options=options)
+
+
+def process_venv():
+ args = DVMTaskInterface.process_args()
+ dvm_config = build_default_config(args.identifier)
+ dvm = ImageGenerationSDXLIMG2IMG(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
+ result = dvm.process(json.loads(args.request))
+ DVMTaskInterface.write_output(result, args.output)
+
+
+if __name__ == '__main__':
+ process_venv()
diff --git a/nostr_dvm/tasks/imageinterrogator.py b/nostr_dvm/tasks/imageinterrogator.py
new file mode 100644
index 0000000..1bc9390
--- /dev/null
+++ b/nostr_dvm/tasks/imageinterrogator.py
@@ -0,0 +1,149 @@
+import json
+from multiprocessing.pool import ThreadPool
+
+from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
+from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
+from nostr_dvm.utils.admin_utils import AdminConfig
+from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
+from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from nostr_dvm.utils.definitions import EventDefinitions
+
+"""
+This File contains a Module to extract a prompt from an image from an url.
+
+Accepted Inputs: link to image (url)
+Outputs: An textual description of the image
+
+"""
+
+
+class ImageInterrogator(DVMTaskInterface):
+ KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
+ TASK: str = "image-to-text"
+ FIX_COST: float = 80
+
+ def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+ admin_config: AdminConfig = None, options=None):
+ super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+ def is_input_supported(self, tags):
+ hasurl = False
+ for tag in tags:
+ if tag.as_vec()[0] == 'i':
+ input_value = tag.as_vec()[1]
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ hasurl = True
+
+ if not hasurl:
+ return False
+
+ return True
+
+ def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+ request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
+ request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer'
+ url = ""
+ method = "prompt"
+ mode = "best"
+
+ for tag in event.tags():
+ if tag.as_vec()[0] == 'i':
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ url = tag.as_vec()[1]
+ elif tag.as_vec()[0] == 'param':
+ print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+ if tag.as_vec()[1] == "method":
+ method = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "mode":
+ mode = tag.as_vec()[2]
+
+ io_input_image = {
+ "id": "input_image",
+ "type": "input",
+ "src": "url:Image",
+ "uri": url
+ }
+
+ io_output = {
+ "id": "output",
+ "type": "output",
+ "src": "request:text"
+ }
+
+ request_form['data'] = json.dumps([io_input_image, io_output])
+
+ options = {
+ "kind": method,
+ "mode": mode
+
+ }
+ request_form['options'] = json.dumps(options)
+
+ return request_form
+
+ def process(self, request_form):
+ try:
+ # Call the process route of NOVA-Server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
+ if bool(json.loads(response)['success']):
+ print("Job " + request_form['jobID'] + " sent to server")
+
+ pool = ThreadPool(processes=1)
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
+ result = thread.get()
+ return result
+
+ except Exception as e:
+ raise Exception(e)
+
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config, server_address):
+ dvm_config = build_default_config(identifier)
+ dvm_config.USE_OWN_VENV = False
+ admin_config.LUD16 = dvm_config.LN_ADDRESS
+
+ nip89info = {
+ "name": name,
+ "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
+ "about": "I analyse Images an return a prompt or a prompt analysis",
+ "encryptionSupported": True,
+ "cashuAccepted": True,
+ "nip90Params": {
+ "method": {
+ "required": False,
+ "values": ["prompt", "analysis"]
+ },
+ "mode": {
+ "required": False,
+ "values": ["best", "classic", "fast", "negative"]
+ }
+ }
+ }
+
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'server': server_address}
+
+ nip89config = NIP89Config()
+ nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
+ nip89config.CONTENT = json.dumps(nip89info)
+
+ return ImageInterrogator(name=name, dvm_config=dvm_config, nip89config=nip89config,
+ admin_config=admin_config, options=options)
+
+
+def process_venv():
+ args = DVMTaskInterface.process_args()
+ dvm_config = build_default_config(args.identifier)
+ dvm = ImageInterrogator(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
+ result = dvm.process(json.loads(args.request))
+ DVMTaskInterface.write_output(result, args.output)
+
+
+if __name__ == '__main__':
+ process_venv()
diff --git a/nostr_dvm/tasks/imageupscale.py b/nostr_dvm/tasks/imageupscale.py
new file mode 100644
index 0000000..0133fdc
--- /dev/null
+++ b/nostr_dvm/tasks/imageupscale.py
@@ -0,0 +1,141 @@
+import json
+from multiprocessing.pool import ThreadPool
+
+from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
+from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
+from nostr_dvm.utils.admin_utils import AdminConfig
+from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
+from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from nostr_dvm.utils.definitions import EventDefinitions
+
+"""
+This File contains a Module to upscale an image from an url by factor 2-4
+
+Accepted Inputs: link to image (url)
+Outputs: An url to an Image
+Params: -upscale 2,3,4
+"""
+
+
+class ImageUpscale(DVMTaskInterface):
+ KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
+ TASK: str = "image-to-image"
+ FIX_COST: float = 20
+
+ def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+ admin_config: AdminConfig = None, options=None):
+ super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+ def is_input_supported(self, tags):
+ hasurl = False
+ for tag in tags:
+ if tag.as_vec()[0] == 'i':
+ input_value = tag.as_vec()[1]
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ hasurl = True
+
+ if not hasurl:
+ return False
+
+ return True
+
+ def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+ request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
+ request_form["trainerFilePath"] = r'modules\image_upscale\image_upscale_realesrgan.trainer'
+ url = ""
+ out_scale = 4
+
+ for tag in event.tags():
+ if tag.as_vec()[0] == 'i':
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ url = tag.as_vec()[1]
+
+ elif tag.as_vec()[0] == 'param':
+ print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+ if tag.as_vec()[1] == "upscale":
+ out_scale = tag.as_vec()[2]
+
+ io_input_image = {
+ "id": "input_image",
+ "type": "input",
+ "src": "url:Image",
+ "uri": url
+ }
+
+ io_output = {
+ "id": "output_image",
+ "type": "output",
+ "src": "request:image"
+ }
+
+ request_form['data'] = json.dumps([io_input_image, io_output])
+
+ options = {
+ "outscale": out_scale,
+
+ }
+ request_form['options'] = json.dumps(options)
+
+ return request_form
+
+ def process(self, request_form):
+ try:
+ # Call the process route of NOVA-Server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
+ if bool(json.loads(response)['success']):
+ print("Job " + request_form['jobID'] + " sent to server")
+
+ pool = ThreadPool(processes=1)
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
+ result = thread.get()
+ return result
+
+ except Exception as e:
+ raise Exception(e)
+
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config, server_address):
+ dvm_config = build_default_config(identifier)
+ dvm_config.USE_OWN_VENV = False
+ admin_config.LUD16 = dvm_config.LN_ADDRESS
+
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'server': server_address}
+
+ nip89info = {
+ "name": name,
+ "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
+ "about": "I upscale an image using realESRGan up to factor 4 (default is factor 4)",
+ "encryptionSupported": True,
+ "cashuAccepted": True,
+ "nip90Params": {
+ "upscale": {
+ "required": False,
+ "values": ["2", "3", "4"]
+ }
+ }
+ }
+ nip89config = NIP89Config()
+ nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
+ nip89config.CONTENT = json.dumps(nip89info)
+
+ return ImageUpscale(name=name, dvm_config=dvm_config, nip89config=nip89config,
+ admin_config=admin_config, options=options)
+
+
+def process_venv():
+ args = DVMTaskInterface.process_args()
+ dvm_config = build_default_config(args.identifier)
+ dvm = ImageUpscale(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
+ result = dvm.process(json.loads(args.request))
+ DVMTaskInterface.write_output(result, args.output)
+
+
+if __name__ == '__main__':
+ process_venv()
diff --git a/nostr_dvm/tasks/textextraction_google.py b/nostr_dvm/tasks/textextraction_google.py
index f5c6b69..23053df 100644
--- a/nostr_dvm/tasks/textextraction_google.py
+++ b/nostr_dvm/tasks/textextraction_google.py
@@ -1,20 +1,16 @@
import json
import os
import time
-from pathlib import Path
-
-import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.mediasource_utils import organize_input_media_data
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
-This File contains a Module to transform a media file input on Google Cloud
+This File contains a Module to extract text form a media file input on Google Cloud
Accepted Inputs: Url to media file (url)
Outputs: Transcribed text
@@ -30,7 +26,6 @@ class SpeechToTextGoogle(DVMTaskInterface):
dependencies = [("nostr-dvm", "nostr-dvm"),
("speech_recognition", "SpeechRecognition==3.10.0")]
-
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
dvm_config.SCRIPT = os.path.abspath(__file__)
@@ -129,6 +124,7 @@ class SpeechToTextGoogle(DVMTaskInterface):
return result
+
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -158,6 +154,8 @@ def build_example(name, identifier, admin_config):
return SpeechToTextGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
+
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -167,4 +165,4 @@ def process_venv():
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/textextraction_pdf.py b/nostr_dvm/tasks/textextraction_pdf.py
index d578299..b993097 100644
--- a/nostr_dvm/tasks/textextraction_pdf.py
+++ b/nostr_dvm/tasks/textextraction_pdf.py
@@ -1,13 +1,9 @@
import json
import os
import re
-from pathlib import Path
-
-import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -29,14 +25,11 @@ class TextExtractionPDF(DVMTaskInterface):
dependencies = [("nostr-dvm", "nostr-dvm"),
("pypdf", "pypdf==3.17.1")]
-
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
dvm_config.SCRIPT = os.path.abspath(__file__)
super().__init__(name, dvm_config, nip89config, admin_config, options)
-
-
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
@@ -118,6 +111,7 @@ def build_example(name, identifier, admin_config):
return TextExtractionPDF(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -127,4 +121,4 @@ def process_venv():
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/textextraction_whisperx.py b/nostr_dvm/tasks/textextraction_whisperx.py
new file mode 100644
index 0000000..13e998a
--- /dev/null
+++ b/nostr_dvm/tasks/textextraction_whisperx.py
@@ -0,0 +1,193 @@
+import json
+import os
+import time
+from multiprocessing.pool import ThreadPool
+from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server, send_file_to_server
+from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
+from nostr_dvm.utils.admin_utils import AdminConfig
+from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
+from nostr_dvm.utils.mediasource_utils import organize_input_media_data
+from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
+from nostr_dvm.utils.definitions import EventDefinitions
+
+"""
+This File contains a Module to transform A media file input on n-server and receive results back.
+
+Accepted Inputs: Url to media file (url)
+Outputs: Transcribed text
+
+"""
+
+
+class SpeechToTextWhisperX(DVMTaskInterface):
+ KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
+ TASK: str = "speech-to-text"
+ FIX_COST: float = 10
+ PER_UNIT_COST: float = 0.1
+
+ def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+ admin_config: AdminConfig = None, options=None):
+ super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+ def is_input_supported(self, tags):
+ for tag in tags:
+ if tag.as_vec()[0] == 'i':
+ input_value = tag.as_vec()[1]
+ input_type = tag.as_vec()[2]
+ if input_type != "url":
+ return False
+
+ elif tag.as_vec()[0] == 'output':
+ output = tag.as_vec()[1]
+ if output == "" or not (output == "text/plain"):
+ print("Output format not supported, skipping..")
+ return False
+
+ return True
+
+ def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
+ request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""),
+ "trainerFilePath": r'modules\whisperx\whisperx_transcript.trainer'}
+
+ if self.options.get("default_model"):
+ model = self.options['default_model']
+ else:
+ model = "base"
+ if self.options.get("alignment"):
+ alignment = self.options['alignment']
+ else:
+ alignment = "raw"
+
+ url = ""
+ input_type = "url"
+ start_time = 0
+ end_time = 0
+ media_format = "audio/mp3"
+
+ for tag in event.tags():
+ if tag.as_vec()[0] == 'i':
+ input_type = tag.as_vec()[2]
+ if input_type == "url":
+ url = tag.as_vec()[1]
+
+ elif tag.as_vec()[0] == 'param':
+ print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+ if tag.as_vec()[1] == "alignment":
+ alignment = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "model":
+ model = tag.as_vec()[2]
+ elif tag.as_vec()[1] == "range":
+ try:
+ t = time.strptime(tag.as_vec()[2], "%H:%M:%S")
+ seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
+ start_time = float(seconds)
+ except:
+ try:
+ t = time.strptime(tag.as_vec()[2], "%M:%S")
+ seconds = t.tm_min * 60 + t.tm_sec
+ start_time = float(seconds)
+ except:
+ start_time = tag.as_vec()[2]
+ try:
+ t = time.strptime(tag.as_vec()[3], "%H:%M:%S")
+ seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
+ end_time = float(seconds)
+ except:
+ try:
+ t = time.strptime(tag.as_vec()[3], "%M:%S")
+ seconds = t.tm_min * 60 + t.tm_sec
+ end_time = float(seconds)
+ except:
+ end_time = float(tag.as_vec()[3])
+
+ filepath = organize_input_media_data(url, input_type, start_time, end_time, dvm_config, client, True,
+ media_format)
+ path_on_server = send_file_to_server(os.path.realpath(filepath), self.options['server'])
+
+ io_input = {
+ "id": "audio",
+ "type": "input",
+ "src": "file:stream",
+ "uri": path_on_server
+ }
+
+ io_output = {
+ "id": "transcript",
+ "type": "output",
+ "src": "request:annotation:free"
+ }
+
+ request_form['data'] = json.dumps([io_input, io_output])
+
+ options = {
+ "model": model,
+ "alignment_mode": alignment,
+ }
+ request_form['options'] = json.dumps(options)
+ return request_form
+
+ def process(self, request_form):
+ try:
+ # Call the process route of NOVA-Server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
+ if bool(json.loads(response)['success']):
+ print("Job " + request_form['jobID'] + " sent to server")
+
+ pool = ThreadPool(processes=1)
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
+ result = thread.get()
+ return result
+
+ except Exception as e:
+ raise Exception(e)
+
+
+# We build an example here that we can call by either calling this file directly from the main directory,
+# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
+# playground or elsewhere
+def build_example(name, identifier, admin_config, server_address):
+ dvm_config = build_default_config(identifier)
+ dvm_config.USE_OWN_VENV = False
+ admin_config.LUD16 = dvm_config.LN_ADDRESS
+
+ # A module might have options it can be initialized with, here we set a default model, and the server
+ # address it should use. These parameters can be freely defined in the task component
+ options = {'default_model': "base", 'server': server_address}
+
+ nip89info = {
+ "name": name,
+ "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
+ "about": "I extract text from media files with WhisperX",
+ "encryptionSupported": True,
+ "cashuAccepted": True,
+ "nip90Params": {
+ "model": {
+ "required": False,
+ "values": ["base", "tiny", "small", "medium", "large-v1", "large-v2", "tiny.en", "base.en", "small.en",
+ "medium.en"]
+ },
+ "alignment": {
+ "required": False,
+ "values": ["raw", "segment", "word"]
+ }
+ }
+ }
+ nip89config = NIP89Config()
+ nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
+ nip89config.CONTENT = json.dumps(nip89info)
+
+ return SpeechToTextWhisperX(name=name, dvm_config=dvm_config, nip89config=nip89config,
+ admin_config=admin_config, options=options)
+
+
+def process_venv():
+ args = DVMTaskInterface.process_args()
+ dvm_config = build_default_config(args.identifier)
+ dvm = SpeechToTextWhisperX(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
+ result = dvm.process(json.loads(args.request))
+ DVMTaskInterface.write_output(result, args.output)
+
+
+if __name__ == '__main__':
+ process_venv()
diff --git a/nostr_dvm/tasks/textgeneration_llmlite.py b/nostr_dvm/tasks/textgeneration_llmlite.py
index ab2bd0e..e4d43ce 100644
--- a/nostr_dvm/tasks/textgeneration_llmlite.py
+++ b/nostr_dvm/tasks/textgeneration_llmlite.py
@@ -1,13 +1,8 @@
import json
import os
-from pathlib import Path
-
-import dotenv
-
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
diff --git a/nostr_dvm/tasks/texttospeech.py b/nostr_dvm/tasks/texttospeech.py
index eca7523..359c9e9 100644
--- a/nostr_dvm/tasks/texttospeech.py
+++ b/nostr_dvm/tasks/texttospeech.py
@@ -1,5 +1,6 @@
import json
import os
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
from pathlib import Path
import urllib.request
@@ -79,7 +80,7 @@ class TextToSpeech(DVMTaskInterface):
from TTS.api import TTS
options = DVMTaskInterface.set_options(request_form)
device = "cuda" if torch.cuda.is_available() else "cpu"
- #else "mps" if torch.backends.mps.is_available() \
+ # else "mps" if torch.backends.mps.is_available() \
print(TTS().list_models())
try:
@@ -102,7 +103,7 @@ def build_example(name, identifier, admin_config):
dvm_config = build_default_config(identifier)
admin_config.LUD16 = dvm_config.LN_ADDRESS
- #use an alternative local wav file you want to use for cloning
+ # use an alternative local wav file you want to use for cloning
options = {'input_file': ""}
nip89info = {
@@ -134,5 +135,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/translation_google.py b/nostr_dvm/tasks/translation_google.py
index b1f17d8..b4ed1e3 100644
--- a/nostr_dvm/tasks/translation_google.py
+++ b/nostr_dvm/tasks/translation_google.py
@@ -1,20 +1,14 @@
import json
import os
-from pathlib import Path
-
-import dotenv
-
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.nostr_utils import get_referenced_event_by_id, get_event_by_id
-
"""
-This File contains a Module to call Google Translate Services locally on the DVM Machine
+This File contains a Module to call Google Translate Services on the DVM Machine
Accepted Inputs: Text, Events, Jobs (Text Extraction, Summary, Translation)
Outputs: Text containing the TranslationGoogle in the desired language.
@@ -111,6 +105,7 @@ class TranslationGoogle(DVMTaskInterface):
return translated_text
+
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -128,12 +123,18 @@ def build_example(name, identifier, admin_config):
"nip90Params": {
"language": {
"required": False,
- "values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo", "es",
- "et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr", "ht",
- "hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku", "ky",
- "la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl",
- "no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn", "so",
- "sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur", "uz",
+ "values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo",
+ "es",
+ "et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr",
+ "ht",
+ "hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku",
+ "ky",
+ "la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne",
+ "nl",
+ "no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn",
+ "so",
+ "sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur",
+ "uz",
"vi", "xh", "yi", "yo", "zh", "zu"]
}
}
@@ -145,7 +146,6 @@ def build_example(name, identifier, admin_config):
return TranslationGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
-
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -155,4 +155,4 @@ def process_venv():
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/translation_libretranslate.py b/nostr_dvm/tasks/translation_libretranslate.py
index c3d00c7..cc6f6d2 100644
--- a/nostr_dvm/tasks/translation_libretranslate.py
+++ b/nostr_dvm/tasks/translation_libretranslate.py
@@ -1,13 +1,9 @@
import json
import os
-from pathlib import Path
-
-import dotenv
import requests
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -156,5 +152,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/trending_notes_nostrband.py b/nostr_dvm/tasks/trending_notes_nostrband.py
index 20c5cd5..85c2e73 100644
--- a/nostr_dvm/tasks/trending_notes_nostrband.py
+++ b/nostr_dvm/tasks/trending_notes_nostrband.py
@@ -1,13 +1,9 @@
import json
import os
-from pathlib import Path
-
-import dotenv
from nostr_sdk import Tag
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -118,6 +114,7 @@ def build_example(name, identifier, admin_config):
return TrendingNotesNostrBand(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -125,5 +122,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
+
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/tasks/videogeneration_replicate_svd.py b/nostr_dvm/tasks/videogeneration_replicate_svd.py
index f283c1c..fa4bed6 100644
--- a/nostr_dvm/tasks/videogeneration_replicate_svd.py
+++ b/nostr_dvm/tasks/videogeneration_replicate_svd.py
@@ -1,17 +1,12 @@
import json
import os
-import subprocess
from io import BytesIO
-from pathlib import Path
-
-import dotenv
import requests
import urllib.request
from PIL import Image
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
-from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -39,9 +34,6 @@ class VideoGenerationReplicateSVD(DVMTaskInterface):
dvm_config.SCRIPT = os.path.abspath(__file__)
super().__init__(name, dvm_config, nip89config, admin_config, options)
-
-
-
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
@@ -140,6 +132,7 @@ def build_example(name, identifier, admin_config):
return VideoGenerationReplicateSVD(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
+
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -149,4 +142,4 @@ def process_venv():
if __name__ == '__main__':
- process_venv()
\ No newline at end of file
+ process_venv()
diff --git a/nostr_dvm/utils/definitions.py b/nostr_dvm/utils/definitions.py
index 99d1df9..3397307 100644
--- a/nostr_dvm/utils/definitions.py
+++ b/nostr_dvm/utils/definitions.py
@@ -2,6 +2,8 @@ import os
from dataclasses import dataclass
from nostr_sdk import Event
+
+
class EventDefinitions:
KIND_DM = 4
KIND_ZAP = 9735
@@ -14,8 +16,6 @@ class EventDefinitions:
KIND_NIP90_RESULT_SUMMARIZE_TEXT = KIND_NIP90_SUMMARIZE_TEXT + 1000
KIND_NIP90_TRANSLATE_TEXT = 5002
KIND_NIP90_RESULT_TRANSLATE_TEXT = KIND_NIP90_TRANSLATE_TEXT + 1000
- KIND_NIP90_TEXT_TO_SPEECH = 5005
- KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000
KIND_NIP90_GENERATE_TEXT = 5050
KIND_NIP90_RESULT_GENERATE_TEXT = KIND_NIP90_GENERATE_TEXT + 1000
KIND_NIP90_GENERATE_IMAGE = 5100
@@ -23,6 +23,8 @@ class EventDefinitions:
KIND_NIP90_CONVERT_VIDEO = 5200
KIND_NIP90_RESULT_CONVERT_VIDEO = KIND_NIP90_CONVERT_VIDEO + 1000
KIND_NIP90_GENERATE_VIDEO = 5202
+ KIND_NIP90_TEXT_TO_SPEECH = 5250
+ KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000
KIND_NIP90_RESULT_GENERATE_VIDEO = KIND_NIP90_GENERATE_VIDEO + 1000
KIND_NIP90_CONTENT_DISCOVERY = 5300
KIND_NIP90_RESULT_CONTENT_DISCOVERY = KIND_NIP90_CONTENT_DISCOVERY + 1000
@@ -59,6 +61,7 @@ class JobToWatch:
payment_hash: str
expires: int
+
@dataclass
class RequiredJobToWatch:
event: Event
diff --git a/setup.py b/setup.py
index 486ecd0..9c1a6c5 100644
--- a/setup.py
+++ b/setup.py
@@ -13,10 +13,7 @@ setup(
author_email="believethehypeonnostr@proton.me",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
- packages=find_packages(include=['nostr_dvm', 'nostr_dvm.interfaces', 'nostr_dvm.tasks',
- 'nostr_dvm.utils', 'nostr_dvm.utils.scrapper',
- 'nostr_dvm.backends', 'nostr_dvm.backends.mlx',
- 'nostr_dvm.backends.mlx.stablediffusion']),
+ packages=find_packages(include=['nostr_dvm/**']),
install_requires=["nostr-sdk==0.0.5",
"bech32==1.2.0",
"pycryptodome==3.19.0",