diff --git a/main.py b/main.py index 5157144..7f410cd 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,13 @@ import os from pathlib import Path import dotenv +from sys import platform from nostr_dvm.bot import Bot from nostr_dvm.tasks import videogeneration_replicate_svd, imagegeneration_replicate_sdxl, textgeneration_llmlite, \ trending_notes_nostrband, discovery_inactive_follows, translation_google, textextraction_pdf, \ translation_libretranslate, textextraction_google, convert_media, imagegeneration_openai_dalle, texttospeech, \ - imagegeneration_mlx, advanced_search, textextraction_whisper_mlx + imagegeneration_sd21_mlx, advanced_search from nostr_dvm.utils.admin_utils import AdminConfig from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions @@ -139,10 +140,10 @@ def playground(): bot_config.SUPPORTED_DVMS.append(tts) tts.run() - from sys import platform + if platform == "darwin": # Test with MLX for OSX M1/M2/M3 chips - mlx = imagegeneration_mlx.build_example("SD with MLX", "mlx_sd", admin_config) + mlx = imagegeneration_sd21_mlx.build_example("SD with MLX", "mlx_sd", admin_config) bot_config.SUPPORTED_DVMS.append(mlx) mlx.run() diff --git a/backends/__init__.py b/nostr_dvm/backends/mlx/__init__.py similarity index 100% rename from backends/__init__.py rename to nostr_dvm/backends/mlx/__init__.py diff --git a/backends/mlx/__init__.py b/nostr_dvm/backends/mlx/modules/__init__.py similarity index 100% rename from backends/mlx/__init__.py rename to nostr_dvm/backends/mlx/modules/__init__.py diff --git a/backends/mlx/stable_diffusion/__init__.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py similarity index 100% rename from backends/mlx/stable_diffusion/__init__.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/__init__.py diff --git a/backends/mlx/stable_diffusion/clip.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/clip.py similarity index 100% rename from backends/mlx/stable_diffusion/clip.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/clip.py diff --git a/backends/mlx/stable_diffusion/config.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/config.py similarity index 100% rename from backends/mlx/stable_diffusion/config.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/config.py diff --git a/backends/mlx/stable_diffusion/model_io.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py similarity index 100% rename from backends/mlx/stable_diffusion/model_io.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/model_io.py diff --git a/backends/mlx/stable_diffusion/sampler.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py similarity index 100% rename from backends/mlx/stable_diffusion/sampler.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/sampler.py diff --git a/backends/mlx/stable_diffusion/tokenizer.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/tokenizer.py similarity index 100% rename from backends/mlx/stable_diffusion/tokenizer.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/tokenizer.py diff --git a/backends/mlx/stable_diffusion/unet.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py similarity index 100% rename from backends/mlx/stable_diffusion/unet.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/unet.py diff --git a/backends/mlx/stable_diffusion/vae.py b/nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py similarity index 100% rename from backends/mlx/stable_diffusion/vae.py rename to nostr_dvm/backends/mlx/modules/stable_diffusion/vae.py diff --git a/nostr_dvm/backends/nova_server/modules/__init__.py b/nostr_dvm/backends/nova_server/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py new file mode 100644 index 0000000..217f5f3 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.py @@ -0,0 +1,129 @@ +"""StableDiffusionXL Module +""" +import gc +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + + +from nova_utils.interfaces.server_module import Processor + +# Setting defaults +_default_options = {"kind": "prompt", "mode": "fast" } + +# TODO: add log infos, +class ImageInterrogator(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.current_session = None + + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + self.input = self.input[0] + self.output = self.output[0] + + def process_data(self, ds_iter) -> dict: + + from PIL import Image as PILImage + import torch + + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.ds_iter = ds_iter + current_session_name = self.ds_iter.session_names[0] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" + kind = self.options['kind'] #"prompt" #"analysis" #prompt + mode = self.options['mode'] + #url = self.current_session.input_data['input_image_url'].data[0] + #print(url) + input_image = self.current_session.input_data['input_image'].data + init_image = PILImage.fromarray(input_image) + mwidth = 256 + mheight = 256 + + + w = mwidth + h = mheight + if init_image.width > init_image.height: + scale = float(init_image.height / init_image.width) + w = mwidth + h = int(mheight * scale) + elif init_image.width < init_image.height: + scale = float(init_image.width / init_image.height) + w = int(mwidth * scale) + h = mheight + else: + w = mwidth + h = mheight + + init_image = init_image.resize((w, h)) + + from clip_interrogator import Config, Interrogator + + config = Config(clip_model_name="ViT-L-14/openai", device="cuda") + + + if kind == "analysis": + ci = Interrogator(config) + + + image_features = ci.image_to_features(init_image) + + top_mediums = ci.mediums.rank(image_features, 5) + top_artists = ci.artists.rank(image_features, 5) + top_movements = ci.movements.rank(image_features, 5) + top_trendings = ci.trendings.rank(image_features, 5) + top_flavors = ci.flavors.rank(image_features, 5) + + medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))} + artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))} + movement_ranks = {movement: sim for movement, sim in + zip(top_movements, ci.similarities(image_features, top_movements))} + trending_ranks = {trending: sim for trending, sim in + zip(top_trendings, ci.similarities(image_features, top_trendings))} + flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))} + + result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks) + + print(result) + return result + else: + + ci = Interrogator(config) + ci.config.blip_num_beams = 64 + ci.config.chunk_size = 2024 + ci.config.clip_offload = True + ci.config.apply_low_vram_defaults() + #MODELS = ['ViT-L (best for Stable Diffusion 1.*)'] + ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024 + + image = init_image + if mode == 'best': + prompt = ci.interrogate(image) + elif mode == 'classic': + prompt = ci.interrogate_classic(image) + elif mode == 'fast': + prompt = ci.interrogate_fast(image) + elif mode == 'negative': + prompt = ci.interrogate_negative(image) + + #print(str(prompt)) + return prompt + + + # config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git + # ci = Interrogator(config) + # "ViT-L-14/openai")) + # "ViT-g-14/laion2B-s34B-b88K")) + + + def to_output(self, data: dict): + import numpy as np + self.current_session.output_data_templates['output'].data = np.array([data]) + return self.current_session.output_data_templates \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer new file mode 100644 index 0000000..216205c --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/image_interrogator.trainer @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md b/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md new file mode 100644 index 0000000..ec092db --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/readme.md @@ -0,0 +1,11 @@ +#Clip Interogator + +This modules provides prompt generation based on images + +* https://huggingface.co/spaces/pharmapsychotic/CLIP-Interrogator + +## Options + +- `kind`: string, identifier of the kind of processing + - `prompt`: Generates a prompt from image + - `analysis`: Generates a categorical analysis diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt new file mode 100644 index 0000000..a9b489d --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/requirements.txt @@ -0,0 +1,5 @@ +hcai-nova-utils>=1.5.5 +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.1.1 +clip_interrogator +git+https://github.com/huggingface/diffusers.git diff --git a/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py b/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py new file mode 100644 index 0000000..adf3132 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_interrogator/version.py @@ -0,0 +1,12 @@ +""" Clip Interrorgator +""" +# We follow Semantic Versioning (https://semver.org/) +_MAJOR_VERSION = '1' +_MINOR_VERSION = '0' +_PATCH_VERSION = '0' + +__version__ = '.'.join([ + _MAJOR_VERSION, + _MINOR_VERSION, + _PATCH_VERSION, +]) diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py b/nostr_dvm/backends/nova_server/modules/image_upscale/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py new file mode 100644 index 0000000..32ec7c8 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.py @@ -0,0 +1,152 @@ +"""RealESRGan Module +""" + +import os +import glob +import sys +from nova_utils.interfaces.server_module import Processor +from basicsr.archs.rrdbnet_arch import RRDBNet +from basicsr.utils.download_util import load_file_from_url +import numpy as np + + + +from realesrgan import RealESRGANer +from realesrgan.archs.srvgg_arch import SRVGGNetCompact +import cv2 +from PIL import Image as PILImage + + +# Setting defaults +_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False } + +# TODO: add log infos, +class RealESRGan(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.current_session = None + self.model_path = None #Maybe need this later for manual path + + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + self.input = self.input[0] + self.output = self.output[0] + + def process_data(self, ds_iter) -> dict: + self.ds_iter = ds_iter + current_session_name = self.ds_iter.session_names[0] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + input_image = self.current_session.input_data['input_image'].data + + + try: + model, netscale, file_url = self.manageModel(str(self.options['model'])) + + if self.model_path is not None: + model_path = self.model_path + else: + model_path = os.path.join('weights', self.options['model'] + '.pth') + if not os.path.isfile(model_path): + ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + for url in file_url: + # model_path will be updated + model_path = load_file_from_url( + url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None) + + # use dni to control the denoise strength + dni_weight = None + if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1: + wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3') + model_path = [model_path, wdn_model_path] + dni_weight = [float(self.options['denoise_strength']), 1 - float(self.options['denoise_strength'])] + + half = True + if self.options["compute_type"] == "fp32": + half=False + + + upsampler = RealESRGANer( + scale=netscale, + model_path=model_path, + dni_weight=dni_weight, + model=model, + tile= int(self.options['tile']), + tile_pad=int(self.options['tile_pad']), + pre_pad=int(self.options['pre_pad']), + half=half, + gpu_id=None) #Can be set if multiple gpus are available + + if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement + from gfpgan import GFPGANer + face_enhancer = GFPGANer( + model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth', + upscale=int(self.options['outscale']), + arch='clean', + channel_multiplier=2, + bg_upsampler=upsampler) + + + pilimage = PILImage.fromarray(input_image) + img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR) + try: + if bool(self.options['face_enhance']): + _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True) + else: + output, _ = upsampler.enhance(img, outscale=int(self.options['outscale'])) + except RuntimeError as error: + print('Error', error) + print('If you encounter CUDA out of memory, try to set --tile with a smaller number.') + + output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) + + return output + + + + + except Exception as e: + print(e) + sys.stdout.flush() + return "Error" + + + def to_output(self, data: dict): + self.current_session.output_data_templates['output_image'].data = data + return self.current_session.output_data_templates + + + def manageModel(self, model_name): + if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth'] + elif model_name == 'RealESRNet_x4plus': # x4 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth'] + elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'] + elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + netscale = 2 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth'] + elif model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size) + model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu') + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth'] + elif model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size) + model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu') + netscale = 4 + file_url = [ + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth', + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth' + ] + + return model, netscale, file_url \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer new file mode 100644 index 0000000..b3bf12f --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/image_upscale_realesrgan.trainer @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py b/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py new file mode 100644 index 0000000..0a8cc43 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/inference_realesrgan.py @@ -0,0 +1,166 @@ +import argparse +import cv2 +import glob +import os +from basicsr.archs.rrdbnet_arch import RRDBNet +from basicsr.utils.download_util import load_file_from_url + +from realesrgan import RealESRGANer +from realesrgan.archs.srvgg_arch import SRVGGNetCompact + + +def main(): + """Inference demo for Real-ESRGAN. + """ + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder') + parser.add_argument( + '-n', + '--model_name', + type=str, + default='RealESRGAN_x4plus', + help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | ' + 'realesr-animevideov3 | realesr-general-x4v3')) + parser.add_argument('-o', '--output', type=str, default='results', help='Output folder') + parser.add_argument( + '-dn', + '--denoise_strength', + type=float, + default=0.5, + help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. ' + 'Only used for the realesr-general-x4v3 model')) + parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image') + parser.add_argument( + '--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it') + parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image') + parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing') + parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding') + parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border') + parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face') + parser.add_argument( + '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).') + parser.add_argument( + '--alpha_upsampler', + type=str, + default='realesrgan', + help='The upsampler for the alpha channels. Options: realesrgan | bicubic') + parser.add_argument( + '--ext', + type=str, + default='auto', + help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs') + parser.add_argument( + '-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu') + + args = parser.parse_args() + + # determine models according to model names + args.model_name = args.model_name.split('.')[0] + if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth'] + elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth'] + elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'] + elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + netscale = 2 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth'] + elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size) + model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu') + netscale = 4 + file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth'] + elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size) + model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu') + netscale = 4 + file_url = [ + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth', + 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth' + ] + + # determine model paths + if args.model_path is not None: + model_path = args.model_path + else: + model_path = os.path.join('weights', args.model_name + '.pth') + if not os.path.isfile(model_path): + ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) + for url in file_url: + # model_path will be updated + model_path = load_file_from_url( + url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None) + + # use dni to control the denoise strength + dni_weight = None + if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1: + wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3') + model_path = [model_path, wdn_model_path] + dni_weight = [args.denoise_strength, 1 - args.denoise_strength] + + # restorer + upsampler = RealESRGANer( + scale=netscale, + model_path=model_path, + dni_weight=dni_weight, + model=model, + tile=args.tile, + tile_pad=args.tile_pad, + pre_pad=args.pre_pad, + half=not args.fp32, + gpu_id=args.gpu_id) + + if args.face_enhance: # Use GFPGAN for face enhancement + from gfpgan import GFPGANer + face_enhancer = GFPGANer( + model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth', + upscale=args.outscale, + arch='clean', + channel_multiplier=2, + bg_upsampler=upsampler) + os.makedirs(args.output, exist_ok=True) + + if os.path.isfile(args.input): + paths = [args.input] + else: + paths = sorted(glob.glob(os.path.join(args.input, '*'))) + + for idx, path in enumerate(paths): + imgname, extension = os.path.splitext(os.path.basename(path)) + print('Testing', idx, imgname) + + img = cv2.imread(path, cv2.IMREAD_UNCHANGED) + if len(img.shape) == 3 and img.shape[2] == 4: + img_mode = 'RGBA' + else: + img_mode = None + + try: + if args.face_enhance: + _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True) + else: + output, _ = upsampler.enhance(img, outscale=args.outscale) + except RuntimeError as error: + print('Error', error) + print('If you encounter CUDA out of memory, try to set --tile with a smaller number.') + else: + if args.ext == 'auto': + extension = extension[1:] + else: + extension = args.ext + if img_mode == 'RGBA': # RGBA images should be saved in png format + extension = 'png' + if args.suffix == '': + save_path = os.path.join(args.output, f'{imgname}.{extension}') + else: + save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}') + cv2.imwrite(save_path, output) + + +if __name__ == '__main__': + main() diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt new file mode 100644 index 0000000..0cf3e2b --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/requirements.txt @@ -0,0 +1,13 @@ +realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git +hcai-nova-utils>=1.5.5 +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.1.0 +torchvision +basicsr>=1.4.2 +facexlib>=0.2.5 +gfpgan>=1.3.5 +numpy +opencv-python +Pillow +tqdm +git+https://github.com/huggingface/diffusers.git \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/image_upscale/version.py b/nostr_dvm/backends/nova_server/modules/image_upscale/version.py new file mode 100644 index 0000000..7963e09 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/image_upscale/version.py @@ -0,0 +1,12 @@ +""" RealESRGan +""" +# We follow Semantic Versioning (https://semver.org/) +_MAJOR_VERSION = '1' +_MINOR_VERSION = '0' +_PATCH_VERSION = '0' + +__version__ = '.'.join([ + _MAJOR_VERSION, + _MINOR_VERSION, + _PATCH_VERSION, +]) diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py new file mode 100644 index 0000000..919e1b1 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/lora.py @@ -0,0 +1,100 @@ +def build_lora_xl(lora, prompt, lora_weight): + existing_lora = False + if lora == "3drenderstyle": + if lora_weight == "": + lora_weight = "1" + prompt = "3d style, 3d render, " + prompt + " " + existing_lora = True + + if lora == "psychedelicnoir": + if lora_weight == "": + lora_weight = "1" + prompt = prompt + " >" + existing_lora = True + + if lora == "wojak": + if lora_weight == "": + lora_weight = "1" + prompt = ", " + prompt + ", wojak" + existing_lora = True + + if lora == "dreamarts": + if lora_weight == "": + lora_weight = "1" + prompt = ", " + prompt + existing_lora = True + + if lora == "voxel": + if lora_weight == "": + lora_weight = "1" + prompt = "voxel style, " + prompt + " " + existing_lora = True + + if lora == "kru3ger": + if lora_weight == "": + lora_weight = "1" + prompt = "kru3ger_style, " + prompt + "" + existing_lora = True + + if lora == "inkpunk": + if lora_weight == "": + lora_weight = "0.5" + prompt = "inkpunk style, " + prompt + " " + existing_lora = True + + if lora == "inkscenery": + if lora_weight == "": + lora_weight = "1" + prompt = " ink scenery, " + prompt + " " + existing_lora = True + + if lora == "inkpainting": + if lora_weight == "": + lora_weight = "0.7" + prompt = "painting style, " + prompt + " ," + existing_lora = True + + if lora == "timburton": + if lora_weight == "": + lora_weight = "1.27" + pencil_weight = "1.15" + prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")" + existing_lora = True + + if lora == "pixelart": + if lora_weight == "": + lora_weight = "1" + prompt = prompt + " (flat shading:1.2), (minimalist:1.4), " + existing_lora = True + + if lora == "pepe": + if lora_weight == "": + lora_weight = "0.8" + prompt = prompt + " , pepe" + existing_lora = True + + if lora == "bettertext": + if lora_weight == "": + lora_weight = "1" + prompt = prompt + " ," + existing_lora = True + + if lora == "mspaint": + if lora_weight == "": + lora_weight = "1" + prompt = "MSPaint drawing " + prompt +">" + existing_lora = True + + if lora == "woodfigure": + if lora_weight == "": + lora_weight = "0.7" + prompt = prompt + ",woodfigurez,artistic style " + existing_lora = True + + if lora == "fireelement": + prompt = prompt + ",composed of fire elements, fire element" + existing_lora = True + + + + return lora, prompt, existing_lora \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md new file mode 100644 index 0000000..cccbe30 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/readme.md @@ -0,0 +1,35 @@ +# Stable Diffusion XL + +This modules provides image generation based on prompts + +* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0 + +## Options + +- `model`: string, identifier of the model to choose + - `stabilityai/stable-diffusion-xl-base-1.0`: Default Stable Diffusion XL model + + +- `ratio`: Ratio of the output image + - `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16` + +- `high_noise_frac`: Denoising factor + +- `n_steps`: how many iterations should be performed + +## Example payload + +```python +payload = { + 'trainerFilePath': 'modules\\stablediffusionxl\\stablediffusionxl.trainer', + 'server': '127.0.0.1', + 'data' = '[{"id":"input_prompt","type":"input","src":"user:text","prompt":"' + prompt +'","active":"True"},{"id":"negative_prompt","type":"input","src":"user:text","prompt":"' + negative_prompt +'","active":"True"},{"id":"output_image","type":"output","src":"file:image","uri":"' + outputfile+'","active":"True"}]' + 'optStr': 'model=stabilityai/stable-diffusion-xl-base-1.0;ratio=4-3' +} + +import requests + +url = 'http://127.0.0.1:53770/predict' +headers = {'Content-type': 'application/x-www-form-urlencoded'} +requests.post(url, headers=headers, data=payload) +``` diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt new file mode 100644 index 0000000..9b9e167 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/requirements.txt @@ -0,0 +1,9 @@ +hcai-nova-utils>=1.5.5 +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.1.0 +compel~=2.0.2 +git+https://github.com/huggingface/diffusers.git +transformers +accelerate +numpy +omegaconf diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py new file mode 100644 index 0000000..bae89e8 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.py @@ -0,0 +1,176 @@ +"""StableDiffusionXL Module +""" + +import gc +import sys +import os + +# Add local dir to path for relative imports +sys.path.insert(0, os.path.dirname(__file__)) + +from nova_utils.interfaces.server_module import Processor +from nova_utils.utils.cache_utils import get_file +from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler +from diffusers.utils import load_image +import numpy as np +from PIL import Image as PILImage +from lora import build_lora_xl + + + +# Setting defaults +_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" } + +# TODO: add log infos, +class StableDiffusionXL(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.current_session = None + + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + self.input = self.input[0] + self.output = self.output[0] + + def process_data(self, ds_iter) -> dict: + import torch + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.ds_iter = ds_iter + current_session_name = self.ds_iter.session_names[0] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + #input_image_url = self.current_session.input_data['input_image_url'].data + #input_image_url = ' '.join(input_image_url) + input_image = self.current_session.input_data['input_image'].data + input_prompt = self.current_session.input_data['input_prompt'].data + input_prompt = ' '.join(input_prompt) + negative_prompt = self.current_session.input_data['negative_prompt'].data + negative_prompt = ' '.join(negative_prompt) + # print("Input Image: " + input_image_url) + print("Input prompt: " + input_prompt) + print("Negative prompt: " + negative_prompt) + + try: + + model = self.options['model'] + lora = self.options['lora'] + #init_image = load_image(input_image_url).convert("RGB") + init_image = PILImage.fromarray(input_image) + + mwidth = 1024 + mheight = 1024 + w = mwidth + h = mheight + if init_image.width > init_image.height: + scale = float(init_image.height / init_image.width) + w = mwidth + h = int(mheight * scale) + elif init_image.width < init_image.height: + scale = float(init_image.width / init_image.height) + w = int(mwidth * scale) + h = mheight + else: + w = mwidth + h = mheight + + init_image = init_image.resize((w, h)) + + if lora != "" and lora != "None": + print("Loading lora...") + + lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" ) + + from diffusers import AutoPipelineForImage2Image + import torch + + + + #init_image = init_image.resize((int(w/2), int(h/2))) + + pipe = AutoPipelineForImage2Image.from_pretrained( + "stabilityai/stable-diffusion-xl-base-1.0", + torch_dtype=torch.float16).to("cuda") + + if existing_lora: + lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0] + if str(lora_uri) == "": + return "Lora not found" + lora_path = get_file( + fname=str(lora_uri.uri_id) + ".safetensors", + origin=lora_uri.uri_url, + file_hash=lora_uri.uri_hash, + cache_dir=os.getenv("CACHE_DIR"), + tmp_dir=os.getenv("TMP_DIR"), + ) + pipe.load_lora_weights(str(lora_path)) + print("Loaded Lora: " + str(lora_path)) + + seed = 20000 + generator = torch.manual_seed(seed) + + #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" + + image = pipe( + prompt=input_prompt, + negative_prompt=negative_prompt, + image=init_image, + generator=generator, + num_inference_steps=int(self.options['n_steps']), + image_guidance_scale=float(self.options['guidance_scale']), + strength=float(str(self.options['strength']))).images[0] + + + elif model == "stabilityai/stable-diffusion-xl-refiner-1.0": + + pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained( + model, torch_dtype=torch.float16, variant="fp16", + use_safetensors=True + ) + + n_steps = int(self.options['n_steps']) + transformation_strength = float(self.options['strength']) + cfg_scale = float(self.options['guidance_scale']) + + pipe = pipe.to(self.device) + image = pipe(input_prompt, image=init_image, + negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0] + + elif model == "timbrooks/instruct-pix2pix": + pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16, + safety_checker=None) + + pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) + + pipe.to(self.device) + n_steps = int(self.options['n_steps']) + cfg_scale = float(self.options['guidance_scale']) + image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0] + + + if torch.cuda.is_available(): + del pipe + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + + numpy_array = np.array(image) + return numpy_array + + + except Exception as e: + print(e) + sys.stdout.flush() + return "Error" + + + def to_output(self, data: dict): + self.current_session.output_data_templates['output_image'].data = data + return self.current_session.output_data_templates + + + \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer new file mode 100644 index 0000000..b6f4167 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl-img2img.trainer @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py new file mode 100644 index 0000000..3f446eb --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.py @@ -0,0 +1,242 @@ +"""StableDiffusionXL Module +""" +import gc +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + +from ssl import Options +from nova_utils.interfaces.server_module import Processor +from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, logging +from compel import Compel, ReturnedEmbeddingsType +from nova_utils.utils.cache_utils import get_file +import numpy as np +PYTORCH_ENABLE_MPS_FALLBACK = 1 + +import torch +from PIL import Image +from lora import build_lora_xl +logging.disable_progress_bar() +logging.enable_explicit_format() +#logging.set_verbosity_info() + + +# Setting defaults +_default_options = {"model": "stabilityai/stable-diffusion-xl-base-1.0", "ratio": "1-1", "width": "", "height":"", "high_noise_frac" : "0.8", "n_steps" : "35", "lora" : "" } + +# TODO: add log infos, +class StableDiffusionXL(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.current_session = None + + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + self.input = self.input[0] + self.output = self.output[0] + + def process_data(self, ds_iter) -> dict: + self._device = ("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_built() else "cpu")) + self.variant = "fp16" + self.torch_d_type = torch.float16 + self.ds_iter = ds_iter + current_session_name = self.ds_iter.session_names[0] + self.current_session = self.ds_iter.sessions[current_session_name]['manager'] + input_prompt = self.current_session.input_data['input_prompt'].data + input_prompt = ' '.join(input_prompt) + negative_prompt = self.current_session.input_data['negative_prompt'].data + negative_prompt = ' '.join(negative_prompt) + new_width = 0 + new_height = 0 + print("Input prompt: " + input_prompt) + print("Negative prompt: " + negative_prompt) + + try: + if self.options['width'] != "" and self.options['height'] != "": + new_width = int(self.options['width']) + new_height = int(self.options['height']) + ratiow, ratioh = self.calculate_aspect(new_width, new_height) + print("Ratio:" + str(ratiow) + ":" + str(ratioh)) + + else: + ratiow = str(self.options['ratio']).split('-')[0] + ratioh =str(self.options['ratio']).split('-')[1] + + model = self.options["model"] + lora = self.options["lora"] + mwidth = 1024 + mheight = 1024 + + height = mheight + width = mwidth + + ratiown = int(ratiow) + ratiohn= int(ratioh) + + if ratiown > ratiohn: + height = int((ratiohn/ratiown) * float(width)) + elif ratiown < ratiohn: + width = int((ratiown/ratiohn) * float(height)) + elif ratiown == ratiohn: + width = height + + + print("Processing Output width: " + str(width) + " Output height: " + str(height)) + + + + + if model == "stabilityai/stable-diffusion-xl-base-1.0": + base = StableDiffusionXLPipeline.from_pretrained(model, torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device) + print("Loaded model: " + model) + + else: + + model_uri = [ x for x in self.trainer.meta_uri if x.uri_id == model][0] + if str(model_uri) == "": + return "Model not found" + + model_path = get_file( + fname=str(model_uri.uri_id) + ".safetensors", + origin=model_uri.uri_url, + file_hash=model_uri.uri_hash, + cache_dir=os.getenv("CACHE_DIR"), + tmp_dir=os.getenv("TMP_DIR"), + ) + + print(str(model_path)) + + + base = StableDiffusionXLPipeline.from_single_file(str(model_path), torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device) + print("Loaded model: " + model) + + if lora != "" and lora != "None": + print("Loading lora...") + lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "") + + if existing_lora: + lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0] + if str(lora_uri) == "": + return "Lora not found" + lora_path = get_file( + fname=str(lora_uri.uri_id) + ".safetensors", + origin=lora_uri.uri_url, + file_hash=lora_uri.uri_hash, + cache_dir=os.getenv("CACHE_DIR"), + tmp_dir=os.getenv("TMP_DIR"), + ) + + base.load_lora_weights(str(lora_path)) + print("Loaded Lora: " + str(lora_path)) + + refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained( + "stabilityai/stable-diffusion-xl-refiner-1.0", + text_encoder_2=base.text_encoder_2, + vae=base.vae, + torch_dtype=self.torch_d_type, + use_safetensors=True, + variant=self.variant, + ) + + + compel_base = Compel( + tokenizer=[base.tokenizer, base.tokenizer_2], + text_encoder=[base.text_encoder, base.text_encoder_2], + returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, + requires_pooled=[False, True], + ) + + compel_refiner = Compel( + tokenizer=[refiner.tokenizer_2], + text_encoder=[refiner.text_encoder_2], + returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, + requires_pooled=[True]) + + conditioning, pooled = compel_base(input_prompt) + negative_conditioning, negative_pooled = compel_base(negative_prompt) + + conditioning_refiner, pooled_refiner = compel_refiner(input_prompt) + negative_conditioning_refiner, negative_pooled_refiner = compel_refiner( + negative_prompt) + + + n_steps = int(self.options['n_steps']) + high_noise_frac = float(self.options['high_noise_frac']) + + + #base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True) + + + + img = base( + prompt_embeds=conditioning, + pooled_prompt_embeds=pooled, + negative_prompt_embeds=negative_conditioning, + negative_pooled_prompt_embeds=negative_pooled, + width=width, + height=height, + num_inference_steps=n_steps, + denoising_end=high_noise_frac, + output_type="latent", + ).images + + if torch.cuda.is_available(): + del base + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + refiner.to(self.device) + # refiner.enable_model_cpu_offload() + image = refiner( + prompt_embeds=conditioning_refiner, + pooled_prompt_embeds=pooled_refiner, + negative_prompt_embeds=negative_conditioning_refiner, + negative_pooled_prompt_embeds=negative_pooled_refiner, + num_inference_steps=n_steps, + denoising_start=high_noise_frac, + num_images_per_prompt=1, + image=img, + ).images[0] + + if torch.cuda.is_available(): + del refiner + gc.collect() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() + + if new_height != 0 or new_width != 0 and (new_width != mwidth or new_height != mheight) : + print("Resizing to width: " + str(new_width) + " height: " + str(new_height)) + image = image.resize((new_width, new_height), Image.LANCZOS) + + numpy_array = np.array(image) + return numpy_array + + + except Exception as e: + print(e) + sys.stdout.flush() + return "Error" + + def calculate_aspect(self, width: int, height: int): + def gcd(a, b): + """The GCD (greatest common divisor) is the highest number that evenly divides both width and height.""" + return a if b == 0 else gcd(b, a % b) + + r = gcd(width, height) + x = int(width / r) + y = int(height / r) + + return x, y + + + + def to_output(self, data: dict): + self.current_session.output_data_templates['output_image'].data = data + return self.current_session.output_data_templates \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer new file mode 100644 index 0000000..0e86e7e --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/stablediffusionxl.trainer @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py new file mode 100644 index 0000000..bba6553 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/stablediffusionxl/version.py @@ -0,0 +1,12 @@ +""" Stable Diffusion XL +""" +# We follow Semantic Versioning (https://semver.org/) +_MAJOR_VERSION = '1' +_MINOR_VERSION = '0' +_PATCH_VERSION = '0' + +__version__ = '.'.join([ + _MAJOR_VERSION, + _MINOR_VERSION, + _PATCH_VERSION, +]) diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py b/nostr_dvm/backends/nova_server/modules/whisperx/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/readme.md b/nostr_dvm/backends/nova_server/modules/whisperx/readme.md new file mode 100644 index 0000000..ffe67a3 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/whisperx/readme.md @@ -0,0 +1,52 @@ +# WhisperX + +This modules provides fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and +speaker diarization. + +* https://github.com/m-bain/whisperX + +## Options + +- `model`: string, identifier of the model to choose, sorted ascending in required (V)RAM: + - `tiny`, `tiny.en` + - `base`, `base.en` + - `small`, `small.en` + - `medium`, `medium.en` + - `large-v1` + - `large-v2` + +- `alignment_mode`: string, alignment method to use + - `raw` Segments as identified by Whisper + - `segment` Improved segmentation using separate alignment model. Roughly equivalent to sentence alignment. + - `word` Improved segmentation using separate alignment model. Equivalent to word alignment. + +- `language`: language code for transcription and alignment models. Supported languages: + - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh` + - `None`: auto-detect language from first 30 seconds of audio + +- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption + +## Examples + +### Request + +```python +import requests +import json + +payload = { + "jobID" : "whisper_transcript", + "data": json.dumps([ + {"src":"file:stream:audio", "type":"input", "id":"audio", "uri":"path/to/my/file.wav"}, + {"src":"file:annotation:free", "type":"output", "id":"transcript", "uri":"path/to/my/transcript.annotation"} + ]), + "trainerFilePath": "modules\\whisperx\\whisperx_transcript.trainer", +} + + +url = 'http://127.0.0.1:8080/process' +headers = {'Content-type': 'application/x-www-form-urlencoded'} +x = requests.post(url, headers=headers, data=payload) +print(x.text) + +``` diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt new file mode 100644 index 0000000..cd86386 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/whisperx/requirements.txt @@ -0,0 +1,7 @@ +hcai-nova-utils>=1.5.5 +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.1.0+cu118 +torchvision>= 0.15.1+cu118 +torchaudio >= 2.0.0+cu118 +pyannote-audio @ git+https://github.com/shelm/pyannote-audio.git@d7b4de3 +whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130 diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/version.py b/nostr_dvm/backends/nova_server/modules/whisperx/version.py new file mode 100644 index 0000000..aa37301 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/whisperx/version.py @@ -0,0 +1,12 @@ +""" WhisperX +""" +# We follow Semantic Versioning (https://semver.org/) +_MAJOR_VERSION = '1' +_MINOR_VERSION = '0' +_PATCH_VERSION = '1' + +__version__ = '.'.join([ + _MAJOR_VERSION, + _MINOR_VERSION, + _PATCH_VERSION, +]) diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py new file mode 100644 index 0000000..f24e63e --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.py @@ -0,0 +1,124 @@ +"""WhisperX Module +""" +from nova_utils.interfaces.server_module import Processor +import sys + +# Setting defaults +_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'} + +# supported language codes, cf. whisperx/alignment.py +# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None} +# {'vi', 'uk', 'pl', 'ur', 'ru', 'ko', 'en', 'zh', 'es', 'it', 'el', 'te', 'da', 'he', 'fa', 'pt', 'de', +# 'fr', 'tr', 'nl', 'cs', 'hu', 'fi', 'ar', 'ja', None} + +class WhisperX(Processor): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.options = _default_options | self.options + self.device = None + self.ds_iter = None + self.session_manager = None + + # IO shortcuts + self.input = [x for x in self.model_io if x.io_type == "input"] + self.output = [x for x in self.model_io if x.io_type == "output"] + assert len(self.input) == 1 and len(self.output) == 1 + self.input = self.input[0] + self.output = self.output[0] + + def process_data(self, ds_manager) -> dict: + import whisperx + import torch + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.session_manager = self.get_session_manager(ds_manager) + input_audio = self.session_manager.input_data['audio'] + + # sliding window will be applied by WhisperX + audio = whisperx.load_audio(input_audio.meta_data.file_path) + + # transcribe with original whisper + try: + model = whisperx.load_model(self.options["model"], self.device, compute_type=self.options['compute_type'], + language=self.options['language']) + except ValueError: + print(f'Your hardware does not support {self.options["compute_type"]} - fallback to float32') + sys.stdout.flush() + model = whisperx.load_model(self.options["model"], self.device, compute_type='float32', + language=self.options['language']) + + result = model.transcribe(audio, batch_size=int(self.options["batch_size"])) + + # delete model if low on GPU resources + import gc; gc.collect(); torch.cuda.empty_cache(); del model + + if not self.options["alignment_mode"] == "raw": + # load alignment model and metadata + model_a, metadata = whisperx.load_align_model( + language_code=result["language"], device=self.device + ) + + # align whisper output + result_aligned = whisperx.align( + result["segments"], model_a, metadata, audio, self.device + ) + result = result_aligned + + # delete model if low on GPU resources + import gc; gc.collect(); torch.cuda.empty_cache(); del model_a + + return result + + def to_output(self, data: dict): + def _fix_missing_timestamps(data): + """ + https://github.com/m-bain/whisperX/issues/253 + Some characters might miss timestamps and recognition scores. This function adds estimated time stamps assuming a fixed time per character of 65ms. + Confidence for each added timestamp will be 0. + Args: + data (dictionary): output dictionary as returned by process_data + """ + last_end = 0 + for s in data["segments"]: + for w in s["words"]: + if "end" in w.keys(): + last_end = w["end"] + else: + #TODO: rethink lower bound for confidence; place word centred instead of left aligned + w["start"] = last_end + last_end += 0.065 + w["end"] = last_end + #w["score"] = 0.000 + w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4]) + + def _hmean(scores): + if len(scores) > 0: + prod = scores[0] + for s in scores[1:]: + prod *= s + prod = prod**(1/len(scores)) + else: + prod = 0 + return prod + + if ( + self.options["alignment_mode"] == "word" + or self.options["alignment_mode"] == "segment" + ): + _fix_missing_timestamps(data) + + if self.options["alignment_mode"] == "word": + anno_data = [ + (w["start"], w["end"], w["word"], w["score"]) + for w in data["word_segments"] + ] + else: + anno_data = [ + #(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"] + (w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?) + ] + + # convert to milliseconds + anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data] + out = self.session_manager.output_data_templates[self.output.io_id] + out.data = anno_data + return self.session_manager.output_data_templates diff --git a/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer new file mode 100644 index 0000000..44dae41 --- /dev/null +++ b/nostr_dvm/backends/nova_server/modules/whisperx/whisperx_transcript.trainer @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/nostr_dvm/backends/nova_server/run_windows.cmd b/nostr_dvm/backends/nova_server/run_windows.cmd new file mode 100644 index 0000000..f274dbc --- /dev/null +++ b/nostr_dvm/backends/nova_server/run_windows.cmd @@ -0,0 +1,2 @@ +call venv/Scripts/activate +nova-server \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/setup_windows.cmd b/nostr_dvm/backends/nova_server/setup_windows.cmd new file mode 100644 index 0000000..04f49db --- /dev/null +++ b/nostr_dvm/backends/nova_server/setup_windows.cmd @@ -0,0 +1,3 @@ +python -m venv venv +call venv/Scripts/activate +pip install hcai-nova-server \ No newline at end of file diff --git a/nostr_dvm/backends/nova_server/utils.py b/nostr_dvm/backends/nova_server/utils.py index 2796cc8..b94ea09 100644 --- a/nostr_dvm/backends/nova_server/utils.py +++ b/nostr_dvm/backends/nova_server/utils.py @@ -11,7 +11,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster """ This file contains basic calling functions for ML tasks that are outsourced to nova server. It is an Open-Source backend -that enables running models locally based on preefined modules, by accepting a request form. +that enables running models locally based on preefined modules, by accepting a request. Modules are deployed in in separate virtual environments so dependencies won't conflict. """ diff --git a/nostr_dvm/dvm.py b/nostr_dvm/dvm.py index 44febf7..8a5110a 100644 --- a/nostr_dvm/dvm.py +++ b/nostr_dvm/dvm.py @@ -1,9 +1,8 @@ -import importlib import json import os import subprocess from datetime import timedelta -from pathlib import Path +from sys import platform from nostr_sdk import PublicKey, Keys, Client, Tag, Event, EventBuilder, Filter, HandleNotification, Timestamp, \ init_logger, LogLevel, Options, nip04_encrypt @@ -475,8 +474,11 @@ class DVM: request_form = dvm.create_request_from_nostr_event(job_event, self.client, self.dvm_config) if dvm_config.USE_OWN_VENV: - python_bin = (r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0] - + "/bin/python") + python_location = "/bin/python" + if platform == "win32": + python_location = "/Scripts/python" + python_bin = ( r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0] + + python_location) retcode = subprocess.call([python_bin, dvm_config.SCRIPT, '--request', json.dumps(request_form), '--identifier', dvm_config.IDENTIFIER, diff --git a/nostr_dvm/interfaces/dvmtaskinterface.py b/nostr_dvm/interfaces/dvmtaskinterface.py index 3035ef4..b4f720c 100644 --- a/nostr_dvm/interfaces/dvmtaskinterface.py +++ b/nostr_dvm/interfaces/dvmtaskinterface.py @@ -3,6 +3,7 @@ import os import subprocess from subprocess import run import sys +from sys import platform from threading import Thread from venv import create from nostr_sdk import Keys @@ -59,15 +60,18 @@ class DVMTaskInterface: def install_dependencies(self, dvm_config): if dvm_config.SCRIPT != "": if self.dvm_config.USE_OWN_VENV: - dir = r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0] + pip_location = 'bin/pip' + if platform == "win32": + pip_location = dir + '/Scripts/pip' + if not os.path.isdir(dir): - print(dir) + print("Creating Venv: " + dir) create(dir, with_pip=True, upgrade_deps=True) self.dependencies.append(("nostr-dvm", "nostr-dvm")) for (module, package) in self.dependencies: print("Installing Venv Module: " + module) - run(["bin/pip", "install", "--force-reinstall", package], cwd=dir) + run([pip_location, "install", "--upgrade", package], cwd=dir) else: for module, package in self.dependencies: if module != "nostr-dvm": diff --git a/nostr_dvm/tasks/README.md b/nostr_dvm/tasks/README.md index b8f794f..b2c1aea 100644 --- a/nostr_dvm/tasks/README.md +++ b/nostr_dvm/tasks/README.md @@ -6,12 +6,27 @@ Reusable backend functions can be defined in backends (e.g. API calls) Current List of Tasks: -| Module | Kind | Description | Backend | -|-------------------------|------|------------------------------------------------|-------------| -| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local | -| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google | googleAPI | -| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI | -| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI | -| ImageGenerationDALLE | 5100 | Generates an Image with Dall-E | openAI | -| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI | -| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local | \ No newline at end of file +| Module | Kind | Description | Backend | +|------------------------------|--------------|------------------------------------------------------------|------------------| +| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local | +| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google Services | googleAPI | +| SpeechToTextWhisperX | 5000 | Extracts Speech from Media files via local WhisperX | nserver | +| ImageInterrogator | 5000 | Extracts Prompts from Images | nserver | +| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI | +| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI | +| TextGenerationLLMLite | 5050 | Chat with LLM backends like Ollama, ChatGPT etc | local/api/openai | +| ImageGenerationSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | nserver | +| ImageGenerationSDXLIMG2IMG | 5100 | Generates an Image from an Image with Stable Diffusion XL | nserver | +| ImageGenerationReplicateSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | replicate | +| ImageGenerationMLX | 5100 | Generates an Image with Stable Diffusion 2.1 on M1/2/3 Mac | mlx | +| ImageGenerationDALLE | 5100 | Generates an Image with OpenAI's Dall-E | openAI | +| ImageUpscale | 5100 | Upscales an Image | nserver | +| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI | +| VideoGenerationReplicateSVD | 5202 (inoff) | Generates a Video from an Image | replicate | +| TextToSpeech | 5250 (inoff) | Generate Audio from a prompt | local | +| TrendingNotesNostrBand | 5300 | Show trending notes on nostr.band | nostr.band api | +| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local | +| AdvancedSearch | 5302 (inoff) | Search Content on nostr.band | local | + +Kinds with (inoff) are suggestions and not merged yet and might change in the future. +Backends might require to add an API key to the .env file or run an external server/framework the dvm will communicate with. \ No newline at end of file diff --git a/nostr_dvm/tasks/advanced_search.py b/nostr_dvm/tasks/advanced_search.py index 217ea18..2b11a08 100644 --- a/nostr_dvm/tasks/advanced_search.py +++ b/nostr_dvm/tasks/advanced_search.py @@ -47,8 +47,8 @@ class AdvancedSearch(DVMTaskInterface): # default values user = "" - since_days = 800 #days ago - until_days = 0 #days ago + since_days = 800 # days ago + until_days = 0 # days ago search = "" max_results = 20 @@ -98,11 +98,14 @@ class AdvancedSearch(DVMTaskInterface): search_until = Timestamp.from_secs(dif) if options["user"] == "": - notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"]) + notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit( + options["max_results"]) elif options["search"] == "": - notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until(search_until).limit(options["max_results"]) + notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until( + search_until).limit(options["max_results"]) else: - notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"]) + notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since( + search_since).until(search_until).limit(options["max_results"]) events = cli.get_events_of([notes_filter], timedelta(seconds=5)) @@ -116,8 +119,6 @@ class AdvancedSearch(DVMTaskInterface): return json.dumps(result_list) - - def post_process(self, result, event): """Overwrite the interface function to return a social client readable format, if requested""" for tag in event.tags(): @@ -170,9 +171,9 @@ def build_example(name, identifier, admin_config): nip89config = NIP89Config() nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) nip89config.CONTENT = json.dumps(nip89info) - + return AdvancedSearch(name=name, dvm_config=dvm_config, nip89config=nip89config, - admin_config=admin_config) + admin_config=admin_config) def process_venv(): @@ -182,5 +183,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/convert_media.py b/nostr_dvm/tasks/convert_media.py index fa8655c..970bbc9 100644 --- a/nostr_dvm/tasks/convert_media.py +++ b/nostr_dvm/tasks/convert_media.py @@ -8,7 +8,6 @@ from nostr_dvm.utils.nip89_utils import NIP89Config from nostr_dvm.utils.mediasource_utils import organize_input_media_data from nostr_dvm.utils.output_utils import upload_media_to_hoster - """ This File contains a Module convert media locally @@ -98,10 +97,10 @@ def build_example(name, identifier, admin_config): nip89config = NIP89Config() - return MediaConverter(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -111,4 +110,4 @@ def process_venv(): if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/discovery_inactive_follows.py b/nostr_dvm/tasks/discovery_inactive_follows.py index 961380e..5dd2e47 100644 --- a/nostr_dvm/tasks/discovery_inactive_follows.py +++ b/nostr_dvm/tasks/discovery_inactive_follows.py @@ -163,6 +163,7 @@ class DiscoverInactiveFollows(DVMTaskInterface): # if not text/plain, don't post-process return result + # We build an example here that we can call by either calling this file directly from the main directory, # or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the # playground or elsewhere @@ -196,6 +197,7 @@ def build_example(name, identifier, admin_config): return DiscoverInactiveFollows(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -203,5 +205,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/imagegeneration_openai_dalle.py b/nostr_dvm/tasks/imagegeneration_openai_dalle.py index 86ebdcb..fc03938 100644 --- a/nostr_dvm/tasks/imagegeneration_openai_dalle.py +++ b/nostr_dvm/tasks/imagegeneration_openai_dalle.py @@ -107,7 +107,6 @@ class ImageGenerationDALLE(DVMTaskInterface): n=int(options['number']), ) - image_url = response.data[0].url # rehost the result instead of relying on the openai link response = requests.get(image_url) @@ -162,8 +161,8 @@ def process_venv(): result = dvm.process(json.loads(args.request)) time.sleep(10) - DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py b/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py index 5c3cdec..eb72398 100644 --- a/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py +++ b/nostr_dvm/tasks/imagegeneration_replicate_sdxl.py @@ -6,7 +6,6 @@ from PIL import Image from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag @@ -153,5 +152,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/imagegeneration_mlx.py b/nostr_dvm/tasks/imagegeneration_sd21_mlx.py similarity index 97% rename from nostr_dvm/tasks/imagegeneration_mlx.py rename to nostr_dvm/tasks/imagegeneration_sd21_mlx.py index 8743bb9..11de72c 100644 --- a/nostr_dvm/tasks/imagegeneration_mlx.py +++ b/nostr_dvm/tasks/imagegeneration_sd21_mlx.py @@ -12,7 +12,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster from nostr_dvm.utils.zap_utils import get_price_per_sat """ -This File contains a Module to generate an Image on replicate and receive results back. +This File contains a Module to generate an Image on Macs with M1/M2/M3 chips and receive results back. Accepted Inputs: Prompt (text) Outputs: An url to an Image @@ -95,7 +95,7 @@ class ImageGenerationMLX(DVMTaskInterface): def process(self, request_form): try: import mlx.core as mx - from backends.mlx.stable_diffusion import StableDiffusion + from nostr_dvm.backends.mlx.modules.stable_diffusion import StableDiffusion options = DVMTaskInterface.set_options(request_form) sd = StableDiffusion() diff --git a/nostr_dvm/tasks/imagegeneration_sdxl.py b/nostr_dvm/tasks/imagegeneration_sdxl.py new file mode 100644 index 0000000..532ab09 --- /dev/null +++ b/nostr_dvm/tasks/imagegeneration_sdxl.py @@ -0,0 +1,206 @@ +import json +from multiprocessing.pool import ThreadPool + +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a module to transform Text input on n-server and receive results back. + +Accepted Inputs: Prompt (text) +Outputs: An url to an Image +Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable + -lora # loras (weights on top of models) voxel, +""" + + +class ImageGenerationSDXL(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE + TASK: str = "text-to-image" + FIX_COST: float = 70 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type != "text": + return False + + elif tag.as_vec()[0] == 'output': + output = tag.as_vec()[1] + if (output == "" or + not (output == "image/png" or "image/jpg" + or output == "image/png;format=url" or output == "image/jpg;format=url")): + print("Output format not supported, skipping..") + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl.trainer' + + prompt = "" + negative_prompt = "" + if self.options.get("default_model") and self.options.get("default_model") != "": + model = self.options['default_model'] + else: + model = "stabilityai/stable-diffusion-xl-base-1.0" + + ratio_width = "1" + ratio_height = "1" + width = "" + height = "" + if self.options.get("default_lora") and self.options.get("default_lora") != "": + lora = self.options['default_lora'] + else: + lora = "" + lora_weight = "" + strength = "" + guidance_scale = "" + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "text": + prompt = tag.as_vec()[1] + + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "negative_prompt": + negative_prompt = tag.as_vec()[2] + elif tag.as_vec()[1] == "lora": + lora = tag.as_vec()[2] + elif tag.as_vec()[1] == "lora_weight": + lora_weight = tag.as_vec()[2] + elif tag.as_vec()[1] == "strength": + strength = float(tag.as_vec()[2]) + elif tag.as_vec()[1] == "guidance_scale": + guidance_scale = float(tag.as_vec()[2]) + elif tag.as_vec()[1] == "ratio": + if len(tag.as_vec()) > 3: + ratio_width = (tag.as_vec()[2]) + ratio_height = (tag.as_vec()[3]) + elif len(tag.as_vec()) == 3: + split = tag.as_vec()[2].split(":") + ratio_width = split[0] + ratio_height = split[1] + # if size is set it will overwrite ratio. + elif tag.as_vec()[1] == "size": + if len(tag.as_vec()) > 3: + width = (tag.as_vec()[2]) + height = (tag.as_vec()[3]) + elif len(tag.as_vec()) == 3: + split = tag.as_vec()[2].split("x") + if len(split) > 1: + width = split[0] + height = split[1] + elif tag.as_vec()[1] == "model": + model = tag.as_vec()[2] + + io_input = { + "id": "input_prompt", + "type": "input", + "src": "request:text", + "data": prompt + } + io_negative = { + "id": "negative_prompt", + "type": "input", + "src": "request:text", + "data": negative_prompt + } + io_output = { + "id": "output_image", + "type": "output", + "src": "request:image" + } + + request_form['data'] = json.dumps([io_input, io_negative, io_output]) + + options = { + "model": model, + "ratio": ratio_width + '-' + ratio_height, + "width": width, + "height": height, + "strength": strength, + "guidance_scale": guidance_scale, + "lora": lora, + "lora_weight": lora_weight + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of n-server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address, default_model="stabilityai/stable-diffusion-xl" + "-base-1.0", default_lora=""): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + # A module might have options it can be initialized with, here we set a default model, and the server + # address it should use. These parameters can be freely defined in the task component + options = {'default_model': default_model, 'default_lora': default_lora, 'server': server_address} + + nip89info = { + "name": name, + "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg", + "about": "I draw images based on a prompt with a Model called unstable diffusion", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": { + "negative_prompt": { + "required": False, + "values": [] + }, + "ratio": { + "required": False, + "values": ["1:1", "4:3", "16:9", "3:4", "9:16", "10:16"] + } + } + } + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return ImageGenerationSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +def process_venv(): + args = DVMTaskInterface.process_args() + dvm_config = build_default_config(args.identifier) + dvm = ImageGenerationSDXL(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None) + result = dvm.process(json.loads(args.request)) + DVMTaskInterface.write_output(result, args.output) + + +if __name__ == '__main__': + process_venv() diff --git a/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py b/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py new file mode 100644 index 0000000..1fdf30c --- /dev/null +++ b/nostr_dvm/tasks/imagegeneration_sdxlimg2img.py @@ -0,0 +1,235 @@ +import json +from multiprocessing.pool import ThreadPool + +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a Module to transform Image (and Text) input on N-server and receive results back. + +Accepted Inputs: Prompt (text) +Outputs: An url to an Image +Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable + -lora # loras (weights on top of models) voxel, +""" + + +class ImageGenerationSDXLIMG2IMG(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE + TASK: str = "image-to-image" + FIX_COST: float = 70 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + hasurl = False + hasprompt = False + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type == "url": + hasurl = True + elif input_type == "text": + hasprompt = True # Little optional when lora is set + + elif tag.as_vec()[0] == 'output': + output = tag.as_vec()[1] + if (output == "" or + not (output == "image/png" or "image/jpg" + or output == "image/png;format=url" or output == "image/jpg;format=url")): + print("Output format not supported, skipping..") + return False + + if not hasurl: + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl-img2img.trainer' + + prompt = "" + negative_prompt = "" + url = "" + if self.options.get("default_model"): + model = self.options['default_model'] + else: + model = "stabilityai/stable-diffusion-xl-refiner-1.0" + + ratio_width = "1" + ratio_height = "1" + width = "" + height = "" + + if self.options.get("default_lora") and self.options.get("default_lora") != "": + lora = self.options['default_lora'] + else: + lora = "" + + lora_weight = "" + if self.options.get("strength"): + strength = float(self.options['strength']) + else: + strength = 0.8 + if self.options.get("guidance_scale"): + guidance_scale = float(self.options['guidance_scale']) + else: + guidance_scale = 11.0 + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "text": + prompt = tag.as_vec()[1] + elif input_type == "url": + url = tag.as_vec()[1] + + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "negative_prompt": + negative_prompt = tag.as_vec()[2] + elif tag.as_vec()[1] == "lora": + lora = tag.as_vec()[2] + elif tag.as_vec()[1] == "lora_weight": + lora_weight = tag.as_vec()[2] + elif tag.as_vec()[1] == "strength": + strength = float(tag.as_vec()[2]) + elif tag.as_vec()[1] == "guidance_scale": + guidance_scale = float(tag.as_vec()[2]) + elif tag.as_vec()[1] == "ratio": + if len(tag.as_vec()) > 3: + ratio_width = (tag.as_vec()[2]) + ratio_height = (tag.as_vec()[3]) + elif len(tag.as_vec()) == 3: + split = tag.as_vec()[2].split(":") + ratio_width = split[0] + ratio_height = split[1] + # if size is set it will overwrite ratio. + elif tag.as_vec()[1] == "size": + if len(tag.as_vec()) > 3: + width = (tag.as_vec()[2]) + height = (tag.as_vec()[3]) + elif len(tag.as_vec()) == 3: + split = tag.as_vec()[2].split("x") + if len(split) > 1: + width = split[0] + height = split[1] + elif tag.as_vec()[1] == "model": + model = tag.as_vec()[2] + + io_input_image = { + "id": "input_image", + "type": "input", + "src": "url:Image", + "uri": url + } + io_input = { + "id": "input_prompt", + "type": "input", + "src": "request:text", + "data": prompt + } + io_negative = { + "id": "negative_prompt", + "type": "input", + "src": "request:text", + "data": negative_prompt + } + io_output = { + "id": "output_image", + "type": "output", + "src": "request:image" + } + + request_form['data'] = json.dumps([io_input_image, io_input, io_negative, io_output]) + + options = { + "model": model, + "ratio": ratio_width + '-' + ratio_height, + "width": width, + "height": height, + "strength": strength, + "guidance_scale": guidance_scale, + "lora": lora, + "lora_weight": lora_weight, + "n_steps": 30 + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of NOVA-Server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address, default_lora="", strength=0.6): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + + nip89info = { + "name": name, + "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg", + "about": "I convert an image to another image, kinda random for now. ", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": { + "negative_prompt": { + "required": False, + "values": [] + }, + "lora": { + "required": False, + "values": ["inkpunk", "timburton", "voxel"] + }, + "strength": { + "required": False, + "values": [] + } + } + } + + # A module might have options it can be initialized with, here we set a default model, lora and the server + options = {'default_lora': default_lora, 'strength': strength, 'server': server_address} + + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return ImageGenerationSDXLIMG2IMG(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +def process_venv(): + args = DVMTaskInterface.process_args() + dvm_config = build_default_config(args.identifier) + dvm = ImageGenerationSDXLIMG2IMG(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None) + result = dvm.process(json.loads(args.request)) + DVMTaskInterface.write_output(result, args.output) + + +if __name__ == '__main__': + process_venv() diff --git a/nostr_dvm/tasks/imageinterrogator.py b/nostr_dvm/tasks/imageinterrogator.py new file mode 100644 index 0000000..1bc9390 --- /dev/null +++ b/nostr_dvm/tasks/imageinterrogator.py @@ -0,0 +1,149 @@ +import json +from multiprocessing.pool import ThreadPool + +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a Module to extract a prompt from an image from an url. + +Accepted Inputs: link to image (url) +Outputs: An textual description of the image + +""" + + +class ImageInterrogator(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT + TASK: str = "image-to-text" + FIX_COST: float = 80 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + hasurl = False + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type == "url": + hasurl = True + + if not hasurl: + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer' + url = "" + method = "prompt" + mode = "best" + + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "url": + url = tag.as_vec()[1] + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "method": + method = tag.as_vec()[2] + elif tag.as_vec()[1] == "mode": + mode = tag.as_vec()[2] + + io_input_image = { + "id": "input_image", + "type": "input", + "src": "url:Image", + "uri": url + } + + io_output = { + "id": "output", + "type": "output", + "src": "request:text" + } + + request_form['data'] = json.dumps([io_input_image, io_output]) + + options = { + "kind": method, + "mode": mode + + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of NOVA-Server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + + nip89info = { + "name": name, + "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg", + "about": "I analyse Images an return a prompt or a prompt analysis", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": { + "method": { + "required": False, + "values": ["prompt", "analysis"] + }, + "mode": { + "required": False, + "values": ["best", "classic", "fast", "negative"] + } + } + } + + # A module might have options it can be initialized with, here we set a default model, lora and the server + options = {'server': server_address} + + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return ImageInterrogator(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +def process_venv(): + args = DVMTaskInterface.process_args() + dvm_config = build_default_config(args.identifier) + dvm = ImageInterrogator(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None) + result = dvm.process(json.loads(args.request)) + DVMTaskInterface.write_output(result, args.output) + + +if __name__ == '__main__': + process_venv() diff --git a/nostr_dvm/tasks/imageupscale.py b/nostr_dvm/tasks/imageupscale.py new file mode 100644 index 0000000..0133fdc --- /dev/null +++ b/nostr_dvm/tasks/imageupscale.py @@ -0,0 +1,141 @@ +import json +from multiprocessing.pool import ThreadPool + +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a Module to upscale an image from an url by factor 2-4 + +Accepted Inputs: link to image (url) +Outputs: An url to an Image +Params: -upscale 2,3,4 +""" + + +class ImageUpscale(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE + TASK: str = "image-to-image" + FIX_COST: float = 20 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + hasurl = False + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type == "url": + hasurl = True + + if not hasurl: + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")} + request_form["trainerFilePath"] = r'modules\image_upscale\image_upscale_realesrgan.trainer' + url = "" + out_scale = 4 + + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "url": + url = tag.as_vec()[1] + + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "upscale": + out_scale = tag.as_vec()[2] + + io_input_image = { + "id": "input_image", + "type": "input", + "src": "url:Image", + "uri": url + } + + io_output = { + "id": "output_image", + "type": "output", + "src": "request:image" + } + + request_form['data'] = json.dumps([io_input_image, io_output]) + + options = { + "outscale": out_scale, + + } + request_form['options'] = json.dumps(options) + + return request_form + + def process(self, request_form): + try: + # Call the process route of NOVA-Server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + + # A module might have options it can be initialized with, here we set a default model, lora and the server + options = {'server': server_address} + + nip89info = { + "name": name, + "image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg", + "about": "I upscale an image using realESRGan up to factor 4 (default is factor 4)", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": { + "upscale": { + "required": False, + "values": ["2", "3", "4"] + } + } + } + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return ImageUpscale(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +def process_venv(): + args = DVMTaskInterface.process_args() + dvm_config = build_default_config(args.identifier) + dvm = ImageUpscale(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None) + result = dvm.process(json.loads(args.request)) + DVMTaskInterface.write_output(result, args.output) + + +if __name__ == '__main__': + process_venv() diff --git a/nostr_dvm/tasks/textextraction_google.py b/nostr_dvm/tasks/textextraction_google.py index f5c6b69..23053df 100644 --- a/nostr_dvm/tasks/textextraction_google.py +++ b/nostr_dvm/tasks/textextraction_google.py @@ -1,20 +1,16 @@ import json import os import time -from pathlib import Path - -import dotenv from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.mediasource_utils import organize_input_media_data from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag from nostr_dvm.utils.definitions import EventDefinitions """ -This File contains a Module to transform a media file input on Google Cloud +This File contains a Module to extract text form a media file input on Google Cloud Accepted Inputs: Url to media file (url) Outputs: Transcribed text @@ -30,7 +26,6 @@ class SpeechToTextGoogle(DVMTaskInterface): dependencies = [("nostr-dvm", "nostr-dvm"), ("speech_recognition", "SpeechRecognition==3.10.0")] - def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, admin_config: AdminConfig = None, options=None): dvm_config.SCRIPT = os.path.abspath(__file__) @@ -129,6 +124,7 @@ class SpeechToTextGoogle(DVMTaskInterface): return result + # We build an example here that we can call by either calling this file directly from the main directory, # or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the # playground or elsewhere @@ -158,6 +154,8 @@ def build_example(name, identifier, admin_config): return SpeechToTextGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config, options=options) + + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -167,4 +165,4 @@ def process_venv(): if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/textextraction_pdf.py b/nostr_dvm/tasks/textextraction_pdf.py index d578299..b993097 100644 --- a/nostr_dvm/tasks/textextraction_pdf.py +++ b/nostr_dvm/tasks/textextraction_pdf.py @@ -1,13 +1,9 @@ import json import os import re -from pathlib import Path - -import dotenv from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag @@ -29,14 +25,11 @@ class TextExtractionPDF(DVMTaskInterface): dependencies = [("nostr-dvm", "nostr-dvm"), ("pypdf", "pypdf==3.17.1")] - def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, admin_config: AdminConfig = None, options=None): dvm_config.SCRIPT = os.path.abspath(__file__) super().__init__(name, dvm_config, nip89config, admin_config, options) - - def is_input_supported(self, tags): for tag in tags: if tag.as_vec()[0] == 'i': @@ -118,6 +111,7 @@ def build_example(name, identifier, admin_config): return TextExtractionPDF(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -127,4 +121,4 @@ def process_venv(): if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/textextraction_whisperx.py b/nostr_dvm/tasks/textextraction_whisperx.py new file mode 100644 index 0000000..13e998a --- /dev/null +++ b/nostr_dvm/tasks/textextraction_whisperx.py @@ -0,0 +1,193 @@ +import json +import os +import time +from multiprocessing.pool import ThreadPool +from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server, send_file_to_server +from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface +from nostr_dvm.utils.admin_utils import AdminConfig +from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config +from nostr_dvm.utils.mediasource_utils import organize_input_media_data +from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag +from nostr_dvm.utils.definitions import EventDefinitions + +""" +This File contains a Module to transform A media file input on n-server and receive results back. + +Accepted Inputs: Url to media file (url) +Outputs: Transcribed text + +""" + + +class SpeechToTextWhisperX(DVMTaskInterface): + KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT + TASK: str = "speech-to-text" + FIX_COST: float = 10 + PER_UNIT_COST: float = 0.1 + + def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, + admin_config: AdminConfig = None, options=None): + super().__init__(name, dvm_config, nip89config, admin_config, options) + + def is_input_supported(self, tags): + for tag in tags: + if tag.as_vec()[0] == 'i': + input_value = tag.as_vec()[1] + input_type = tag.as_vec()[2] + if input_type != "url": + return False + + elif tag.as_vec()[0] == 'output': + output = tag.as_vec()[1] + if output == "" or not (output == "text/plain"): + print("Output format not supported, skipping..") + return False + + return True + + def create_request_from_nostr_event(self, event, client=None, dvm_config=None): + request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""), + "trainerFilePath": r'modules\whisperx\whisperx_transcript.trainer'} + + if self.options.get("default_model"): + model = self.options['default_model'] + else: + model = "base" + if self.options.get("alignment"): + alignment = self.options['alignment'] + else: + alignment = "raw" + + url = "" + input_type = "url" + start_time = 0 + end_time = 0 + media_format = "audio/mp3" + + for tag in event.tags(): + if tag.as_vec()[0] == 'i': + input_type = tag.as_vec()[2] + if input_type == "url": + url = tag.as_vec()[1] + + elif tag.as_vec()[0] == 'param': + print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2]) + if tag.as_vec()[1] == "alignment": + alignment = tag.as_vec()[2] + elif tag.as_vec()[1] == "model": + model = tag.as_vec()[2] + elif tag.as_vec()[1] == "range": + try: + t = time.strptime(tag.as_vec()[2], "%H:%M:%S") + seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec + start_time = float(seconds) + except: + try: + t = time.strptime(tag.as_vec()[2], "%M:%S") + seconds = t.tm_min * 60 + t.tm_sec + start_time = float(seconds) + except: + start_time = tag.as_vec()[2] + try: + t = time.strptime(tag.as_vec()[3], "%H:%M:%S") + seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec + end_time = float(seconds) + except: + try: + t = time.strptime(tag.as_vec()[3], "%M:%S") + seconds = t.tm_min * 60 + t.tm_sec + end_time = float(seconds) + except: + end_time = float(tag.as_vec()[3]) + + filepath = organize_input_media_data(url, input_type, start_time, end_time, dvm_config, client, True, + media_format) + path_on_server = send_file_to_server(os.path.realpath(filepath), self.options['server']) + + io_input = { + "id": "audio", + "type": "input", + "src": "file:stream", + "uri": path_on_server + } + + io_output = { + "id": "transcript", + "type": "output", + "src": "request:annotation:free" + } + + request_form['data'] = json.dumps([io_input, io_output]) + + options = { + "model": model, + "alignment_mode": alignment, + } + request_form['options'] = json.dumps(options) + return request_form + + def process(self, request_form): + try: + # Call the process route of NOVA-Server with our request form. + response = send_request_to_server(request_form, self.options['server']) + if bool(json.loads(response)['success']): + print("Job " + request_form['jobID'] + " sent to server") + + pool = ThreadPool(processes=1) + thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server'])) + print("Wait for results of server...") + result = thread.get() + return result + + except Exception as e: + raise Exception(e) + + +# We build an example here that we can call by either calling this file directly from the main directory, +# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the +# playground or elsewhere +def build_example(name, identifier, admin_config, server_address): + dvm_config = build_default_config(identifier) + dvm_config.USE_OWN_VENV = False + admin_config.LUD16 = dvm_config.LN_ADDRESS + + # A module might have options it can be initialized with, here we set a default model, and the server + # address it should use. These parameters can be freely defined in the task component + options = {'default_model': "base", 'server': server_address} + + nip89info = { + "name": name, + "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg", + "about": "I extract text from media files with WhisperX", + "encryptionSupported": True, + "cashuAccepted": True, + "nip90Params": { + "model": { + "required": False, + "values": ["base", "tiny", "small", "medium", "large-v1", "large-v2", "tiny.en", "base.en", "small.en", + "medium.en"] + }, + "alignment": { + "required": False, + "values": ["raw", "segment", "word"] + } + } + } + nip89config = NIP89Config() + nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"]) + nip89config.CONTENT = json.dumps(nip89info) + + return SpeechToTextWhisperX(name=name, dvm_config=dvm_config, nip89config=nip89config, + admin_config=admin_config, options=options) + + +def process_venv(): + args = DVMTaskInterface.process_args() + dvm_config = build_default_config(args.identifier) + dvm = SpeechToTextWhisperX(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None) + result = dvm.process(json.loads(args.request)) + DVMTaskInterface.write_output(result, args.output) + + +if __name__ == '__main__': + process_venv() diff --git a/nostr_dvm/tasks/textgeneration_llmlite.py b/nostr_dvm/tasks/textgeneration_llmlite.py index ab2bd0e..e4d43ce 100644 --- a/nostr_dvm/tasks/textgeneration_llmlite.py +++ b/nostr_dvm/tasks/textgeneration_llmlite.py @@ -1,13 +1,8 @@ import json import os -from pathlib import Path - -import dotenv - from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag diff --git a/nostr_dvm/tasks/texttospeech.py b/nostr_dvm/tasks/texttospeech.py index eca7523..359c9e9 100644 --- a/nostr_dvm/tasks/texttospeech.py +++ b/nostr_dvm/tasks/texttospeech.py @@ -1,5 +1,6 @@ import json import os + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" from pathlib import Path import urllib.request @@ -79,7 +80,7 @@ class TextToSpeech(DVMTaskInterface): from TTS.api import TTS options = DVMTaskInterface.set_options(request_form) device = "cuda" if torch.cuda.is_available() else "cpu" - #else "mps" if torch.backends.mps.is_available() \ + # else "mps" if torch.backends.mps.is_available() \ print(TTS().list_models()) try: @@ -102,7 +103,7 @@ def build_example(name, identifier, admin_config): dvm_config = build_default_config(identifier) admin_config.LUD16 = dvm_config.LN_ADDRESS - #use an alternative local wav file you want to use for cloning + # use an alternative local wav file you want to use for cloning options = {'input_file': ""} nip89info = { @@ -134,5 +135,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/translation_google.py b/nostr_dvm/tasks/translation_google.py index b1f17d8..b4ed1e3 100644 --- a/nostr_dvm/tasks/translation_google.py +++ b/nostr_dvm/tasks/translation_google.py @@ -1,20 +1,14 @@ import json import os -from pathlib import Path - -import dotenv - from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag from nostr_dvm.utils.nostr_utils import get_referenced_event_by_id, get_event_by_id - """ -This File contains a Module to call Google Translate Services locally on the DVM Machine +This File contains a Module to call Google Translate Services on the DVM Machine Accepted Inputs: Text, Events, Jobs (Text Extraction, Summary, Translation) Outputs: Text containing the TranslationGoogle in the desired language. @@ -111,6 +105,7 @@ class TranslationGoogle(DVMTaskInterface): return translated_text + # We build an example here that we can call by either calling this file directly from the main directory, # or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the # playground or elsewhere @@ -128,12 +123,18 @@ def build_example(name, identifier, admin_config): "nip90Params": { "language": { "required": False, - "values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo", "es", - "et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr", "ht", - "hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku", "ky", - "la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", - "no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn", "so", - "sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur", "uz", + "values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo", + "es", + "et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr", + "ht", + "hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku", + "ky", + "la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", + "nl", + "no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn", + "so", + "sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur", + "uz", "vi", "xh", "yi", "yo", "zh", "zu"] } } @@ -145,7 +146,6 @@ def build_example(name, identifier, admin_config): return TranslationGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) - def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -155,4 +155,4 @@ def process_venv(): if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/translation_libretranslate.py b/nostr_dvm/tasks/translation_libretranslate.py index c3d00c7..cc6f6d2 100644 --- a/nostr_dvm/tasks/translation_libretranslate.py +++ b/nostr_dvm/tasks/translation_libretranslate.py @@ -1,13 +1,9 @@ import json import os -from pathlib import Path - -import dotenv import requests from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag @@ -156,5 +152,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/trending_notes_nostrband.py b/nostr_dvm/tasks/trending_notes_nostrband.py index 20c5cd5..85c2e73 100644 --- a/nostr_dvm/tasks/trending_notes_nostrband.py +++ b/nostr_dvm/tasks/trending_notes_nostrband.py @@ -1,13 +1,9 @@ import json import os -from pathlib import Path - -import dotenv from nostr_sdk import Tag from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag @@ -118,6 +114,7 @@ def build_example(name, identifier, admin_config): return TrendingNotesNostrBand(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -125,5 +122,6 @@ def process_venv(): result = dvm.process(json.loads(args.request)) DVMTaskInterface.write_output(result, args.output) + if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/tasks/videogeneration_replicate_svd.py b/nostr_dvm/tasks/videogeneration_replicate_svd.py index f283c1c..fa4bed6 100644 --- a/nostr_dvm/tasks/videogeneration_replicate_svd.py +++ b/nostr_dvm/tasks/videogeneration_replicate_svd.py @@ -1,17 +1,12 @@ import json import os -import subprocess from io import BytesIO -from pathlib import Path - -import dotenv import requests import urllib.request from PIL import Image from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface from nostr_dvm.utils.admin_utils import AdminConfig -from nostr_dvm.utils.backend_utils import keep_alive from nostr_dvm.utils.definitions import EventDefinitions from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag @@ -39,9 +34,6 @@ class VideoGenerationReplicateSVD(DVMTaskInterface): dvm_config.SCRIPT = os.path.abspath(__file__) super().__init__(name, dvm_config, nip89config, admin_config, options) - - - def is_input_supported(self, tags): for tag in tags: if tag.as_vec()[0] == 'i': @@ -140,6 +132,7 @@ def build_example(name, identifier, admin_config): return VideoGenerationReplicateSVD(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config) + def process_venv(): args = DVMTaskInterface.process_args() dvm_config = build_default_config(args.identifier) @@ -149,4 +142,4 @@ def process_venv(): if __name__ == '__main__': - process_venv() \ No newline at end of file + process_venv() diff --git a/nostr_dvm/utils/definitions.py b/nostr_dvm/utils/definitions.py index 99d1df9..3397307 100644 --- a/nostr_dvm/utils/definitions.py +++ b/nostr_dvm/utils/definitions.py @@ -2,6 +2,8 @@ import os from dataclasses import dataclass from nostr_sdk import Event + + class EventDefinitions: KIND_DM = 4 KIND_ZAP = 9735 @@ -14,8 +16,6 @@ class EventDefinitions: KIND_NIP90_RESULT_SUMMARIZE_TEXT = KIND_NIP90_SUMMARIZE_TEXT + 1000 KIND_NIP90_TRANSLATE_TEXT = 5002 KIND_NIP90_RESULT_TRANSLATE_TEXT = KIND_NIP90_TRANSLATE_TEXT + 1000 - KIND_NIP90_TEXT_TO_SPEECH = 5005 - KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000 KIND_NIP90_GENERATE_TEXT = 5050 KIND_NIP90_RESULT_GENERATE_TEXT = KIND_NIP90_GENERATE_TEXT + 1000 KIND_NIP90_GENERATE_IMAGE = 5100 @@ -23,6 +23,8 @@ class EventDefinitions: KIND_NIP90_CONVERT_VIDEO = 5200 KIND_NIP90_RESULT_CONVERT_VIDEO = KIND_NIP90_CONVERT_VIDEO + 1000 KIND_NIP90_GENERATE_VIDEO = 5202 + KIND_NIP90_TEXT_TO_SPEECH = 5250 + KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000 KIND_NIP90_RESULT_GENERATE_VIDEO = KIND_NIP90_GENERATE_VIDEO + 1000 KIND_NIP90_CONTENT_DISCOVERY = 5300 KIND_NIP90_RESULT_CONTENT_DISCOVERY = KIND_NIP90_CONTENT_DISCOVERY + 1000 @@ -59,6 +61,7 @@ class JobToWatch: payment_hash: str expires: int + @dataclass class RequiredJobToWatch: event: Event diff --git a/setup.py b/setup.py index 486ecd0..9c1a6c5 100644 --- a/setup.py +++ b/setup.py @@ -13,10 +13,7 @@ setup( author_email="believethehypeonnostr@proton.me", description=DESCRIPTION, long_description=LONG_DESCRIPTION, - packages=find_packages(include=['nostr_dvm', 'nostr_dvm.interfaces', 'nostr_dvm.tasks', - 'nostr_dvm.utils', 'nostr_dvm.utils.scrapper', - 'nostr_dvm.backends', 'nostr_dvm.backends.mlx', - 'nostr_dvm.backends.mlx.stablediffusion']), + packages=find_packages(include=['nostr_dvm/**']), install_requires=["nostr-sdk==0.0.5", "bech32==1.2.0", "pycryptodome==3.19.0",