diff --git a/.env_example b/.env_example
index aaa3c46..ba7672f 100644
--- a/.env_example
+++ b/.env_example
@@ -10,6 +10,7 @@ OPENAI_API_KEY = "" # Enter your OpenAI API Key to use DVMs with OpenAI service
LIBRE_TRANSLATE_ENDPOINT = "" # Url to LibreTranslate Endpoint e.g. https://libretranslate.com
LIBRE_TRANSLATE_API_KEY = "" # API Key, if required (You can host your own instance where you don't need it)
REPLICATE_API_TOKEN = "" #API Key to run models on replicate.com
+N_SERVER = "" # Enter the address of a n-server instance, locally or on a machine in your network host:port
# We will automatically create dtags and private keys based on the identifier variable in main.
# If your DVM already has a dtag and private key you can replace it here before publishing the DTAG to not create a new one.
diff --git a/.gitignore b/.gitignore
index 4f1d54f..6e28058 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,5 @@ app_deploy.py
db/Cashu/wallet.sqlite3
.idea/misc.xml
.idea/misc.xml
+backends/nserver/cache/*
+backends/nserver/modules/image_upscale/weights/*
\ No newline at end of file
diff --git a/backends/nserver/.env_example b/backends/nserver/.env_example
new file mode 100644
index 0000000..59995bc
--- /dev/null
+++ b/backends/nserver/.env_example
@@ -0,0 +1,7 @@
+NOVA_SERVER_HOST = 0.0.0.0
+NOVA_SERVER_PORT = 37318
+NOVA_SERVER_CML_DIR = .\modules
+NOVA_SERVER_DATA_DIR = .\data
+NOVA_SERVER_CACHE_DIR = .\cache
+NOVA_SERVER_TMP_DIR = .\tmp
+NOVA_SERVER_LOG_DIR = .\log
\ No newline at end of file
diff --git a/backends/nserver/modules/image_interrogator/image_interrogator.py b/backends/nserver/modules/image_interrogator/image_interrogator.py
new file mode 100644
index 0000000..217f5f3
--- /dev/null
+++ b/backends/nserver/modules/image_interrogator/image_interrogator.py
@@ -0,0 +1,129 @@
+"""StableDiffusionXL Module
+"""
+import gc
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+
+from nova_utils.interfaces.server_module import Processor
+
+# Setting defaults
+_default_options = {"kind": "prompt", "mode": "fast" }
+
+# TODO: add log infos,
+class ImageInterrogator(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+
+ from PIL import Image as PILImage
+ import torch
+
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+ kind = self.options['kind'] #"prompt" #"analysis" #prompt
+ mode = self.options['mode']
+ #url = self.current_session.input_data['input_image_url'].data[0]
+ #print(url)
+ input_image = self.current_session.input_data['input_image'].data
+ init_image = PILImage.fromarray(input_image)
+ mwidth = 256
+ mheight = 256
+
+
+ w = mwidth
+ h = mheight
+ if init_image.width > init_image.height:
+ scale = float(init_image.height / init_image.width)
+ w = mwidth
+ h = int(mheight * scale)
+ elif init_image.width < init_image.height:
+ scale = float(init_image.width / init_image.height)
+ w = int(mwidth * scale)
+ h = mheight
+ else:
+ w = mwidth
+ h = mheight
+
+ init_image = init_image.resize((w, h))
+
+ from clip_interrogator import Config, Interrogator
+
+ config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
+
+
+ if kind == "analysis":
+ ci = Interrogator(config)
+
+
+ image_features = ci.image_to_features(init_image)
+
+ top_mediums = ci.mediums.rank(image_features, 5)
+ top_artists = ci.artists.rank(image_features, 5)
+ top_movements = ci.movements.rank(image_features, 5)
+ top_trendings = ci.trendings.rank(image_features, 5)
+ top_flavors = ci.flavors.rank(image_features, 5)
+
+ medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
+ artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
+ movement_ranks = {movement: sim for movement, sim in
+ zip(top_movements, ci.similarities(image_features, top_movements))}
+ trending_ranks = {trending: sim for trending, sim in
+ zip(top_trendings, ci.similarities(image_features, top_trendings))}
+ flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
+
+ result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
+
+ print(result)
+ return result
+ else:
+
+ ci = Interrogator(config)
+ ci.config.blip_num_beams = 64
+ ci.config.chunk_size = 2024
+ ci.config.clip_offload = True
+ ci.config.apply_low_vram_defaults()
+ #MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
+ ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
+
+ image = init_image
+ if mode == 'best':
+ prompt = ci.interrogate(image)
+ elif mode == 'classic':
+ prompt = ci.interrogate_classic(image)
+ elif mode == 'fast':
+ prompt = ci.interrogate_fast(image)
+ elif mode == 'negative':
+ prompt = ci.interrogate_negative(image)
+
+ #print(str(prompt))
+ return prompt
+
+
+ # config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
+ # ci = Interrogator(config)
+ # "ViT-L-14/openai"))
+ # "ViT-g-14/laion2B-s34B-b88K"))
+
+
+ def to_output(self, data: dict):
+ import numpy as np
+ self.current_session.output_data_templates['output'].data = np.array([data])
+ return self.current_session.output_data_templates
\ No newline at end of file
diff --git a/backends/nserver/modules/image_interrogator/image_interrogator.trainer b/backends/nserver/modules/image_interrogator/image_interrogator.trainer
new file mode 100644
index 0000000..216205c
--- /dev/null
+++ b/backends/nserver/modules/image_interrogator/image_interrogator.trainer
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/backends/nserver/modules/image_interrogator/readme.md b/backends/nserver/modules/image_interrogator/readme.md
new file mode 100644
index 0000000..ec092db
--- /dev/null
+++ b/backends/nserver/modules/image_interrogator/readme.md
@@ -0,0 +1,11 @@
+#Clip Interogator
+
+This modules provides prompt generation based on images
+
+* https://huggingface.co/spaces/pharmapsychotic/CLIP-Interrogator
+
+## Options
+
+- `kind`: string, identifier of the kind of processing
+ - `prompt`: Generates a prompt from image
+ - `analysis`: Generates a categorical analysis
diff --git a/backends/nserver/modules/image_interrogator/requirements.txt b/backends/nserver/modules/image_interrogator/requirements.txt
new file mode 100644
index 0000000..a9b489d
--- /dev/null
+++ b/backends/nserver/modules/image_interrogator/requirements.txt
@@ -0,0 +1,5 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.1
+clip_interrogator
+git+https://github.com/huggingface/diffusers.git
diff --git a/backends/nserver/modules/image_interrogator/version.py b/backends/nserver/modules/image_interrogator/version.py
new file mode 100644
index 0000000..adf3132
--- /dev/null
+++ b/backends/nserver/modules/image_interrogator/version.py
@@ -0,0 +1,12 @@
+""" Clip Interrorgator
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/backends/nserver/modules/image_upscale/image_upscale_realesrgan.py b/backends/nserver/modules/image_upscale/image_upscale_realesrgan.py
new file mode 100644
index 0000000..32ec7c8
--- /dev/null
+++ b/backends/nserver/modules/image_upscale/image_upscale_realesrgan.py
@@ -0,0 +1,152 @@
+"""RealESRGan Module
+"""
+
+import os
+import glob
+import sys
+from nova_utils.interfaces.server_module import Processor
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+import numpy as np
+
+
+
+from realesrgan import RealESRGANer
+from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+import cv2
+from PIL import Image as PILImage
+
+
+# Setting defaults
+_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
+
+# TODO: add log infos,
+class RealESRGan(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+ self.model_path = None #Maybe need this later for manual path
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ input_image = self.current_session.input_data['input_image'].data
+
+
+ try:
+ model, netscale, file_url = self.manageModel(str(self.options['model']))
+
+ if self.model_path is not None:
+ model_path = self.model_path
+ else:
+ model_path = os.path.join('weights', self.options['model'] + '.pth')
+ if not os.path.isfile(model_path):
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+ for url in file_url:
+ # model_path will be updated
+ model_path = load_file_from_url(
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+
+ # use dni to control the denoise strength
+ dni_weight = None
+ if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
+ model_path = [model_path, wdn_model_path]
+ dni_weight = [float(self.options['denoise_strength']), 1 - float(self.options['denoise_strength'])]
+
+ half = True
+ if self.options["compute_type"] == "fp32":
+ half=False
+
+
+ upsampler = RealESRGANer(
+ scale=netscale,
+ model_path=model_path,
+ dni_weight=dni_weight,
+ model=model,
+ tile= int(self.options['tile']),
+ tile_pad=int(self.options['tile_pad']),
+ pre_pad=int(self.options['pre_pad']),
+ half=half,
+ gpu_id=None) #Can be set if multiple gpus are available
+
+ if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
+ from gfpgan import GFPGANer
+ face_enhancer = GFPGANer(
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
+ upscale=int(self.options['outscale']),
+ arch='clean',
+ channel_multiplier=2,
+ bg_upsampler=upsampler)
+
+
+ pilimage = PILImage.fromarray(input_image)
+ img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
+ try:
+ if bool(self.options['face_enhance']):
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
+ else:
+ output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
+ except RuntimeError as error:
+ print('Error', error)
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
+
+ output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
+
+ return output
+
+
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
+
+
+ def manageModel(self, model_name):
+ if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
+ elif model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
+ elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
+ elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+ netscale = 2
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
+ elif model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
+ elif model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = [
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
+ ]
+
+ return model, netscale, file_url
\ No newline at end of file
diff --git a/backends/nserver/modules/image_upscale/image_upscale_realesrgan.trainer b/backends/nserver/modules/image_upscale/image_upscale_realesrgan.trainer
new file mode 100644
index 0000000..b3bf12f
--- /dev/null
+++ b/backends/nserver/modules/image_upscale/image_upscale_realesrgan.trainer
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/backends/nserver/modules/image_upscale/inference_realesrgan.py b/backends/nserver/modules/image_upscale/inference_realesrgan.py
new file mode 100644
index 0000000..0a8cc43
--- /dev/null
+++ b/backends/nserver/modules/image_upscale/inference_realesrgan.py
@@ -0,0 +1,166 @@
+import argparse
+import cv2
+import glob
+import os
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+
+from realesrgan import RealESRGANer
+from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+
+
+def main():
+ """Inference demo for Real-ESRGAN.
+ """
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
+ parser.add_argument(
+ '-n',
+ '--model_name',
+ type=str,
+ default='RealESRGAN_x4plus',
+ help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
+ 'realesr-animevideov3 | realesr-general-x4v3'))
+ parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
+ parser.add_argument(
+ '-dn',
+ '--denoise_strength',
+ type=float,
+ default=0.5,
+ help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
+ 'Only used for the realesr-general-x4v3 model'))
+ parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
+ parser.add_argument(
+ '--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
+ parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
+ parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
+ parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
+ parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
+ parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
+ parser.add_argument(
+ '--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
+ parser.add_argument(
+ '--alpha_upsampler',
+ type=str,
+ default='realesrgan',
+ help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
+ parser.add_argument(
+ '--ext',
+ type=str,
+ default='auto',
+ help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
+ parser.add_argument(
+ '-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
+
+ args = parser.parse_args()
+
+ # determine models according to model names
+ args.model_name = args.model_name.split('.')[0]
+ if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
+ elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
+ elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
+ elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+ netscale = 2
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
+ elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
+ elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
+ model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
+ netscale = 4
+ file_url = [
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
+ 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
+ ]
+
+ # determine model paths
+ if args.model_path is not None:
+ model_path = args.model_path
+ else:
+ model_path = os.path.join('weights', args.model_name + '.pth')
+ if not os.path.isfile(model_path):
+ ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+ for url in file_url:
+ # model_path will be updated
+ model_path = load_file_from_url(
+ url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+
+ # use dni to control the denoise strength
+ dni_weight = None
+ if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
+ wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
+ model_path = [model_path, wdn_model_path]
+ dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
+
+ # restorer
+ upsampler = RealESRGANer(
+ scale=netscale,
+ model_path=model_path,
+ dni_weight=dni_weight,
+ model=model,
+ tile=args.tile,
+ tile_pad=args.tile_pad,
+ pre_pad=args.pre_pad,
+ half=not args.fp32,
+ gpu_id=args.gpu_id)
+
+ if args.face_enhance: # Use GFPGAN for face enhancement
+ from gfpgan import GFPGANer
+ face_enhancer = GFPGANer(
+ model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
+ upscale=args.outscale,
+ arch='clean',
+ channel_multiplier=2,
+ bg_upsampler=upsampler)
+ os.makedirs(args.output, exist_ok=True)
+
+ if os.path.isfile(args.input):
+ paths = [args.input]
+ else:
+ paths = sorted(glob.glob(os.path.join(args.input, '*')))
+
+ for idx, path in enumerate(paths):
+ imgname, extension = os.path.splitext(os.path.basename(path))
+ print('Testing', idx, imgname)
+
+ img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+ if len(img.shape) == 3 and img.shape[2] == 4:
+ img_mode = 'RGBA'
+ else:
+ img_mode = None
+
+ try:
+ if args.face_enhance:
+ _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
+ else:
+ output, _ = upsampler.enhance(img, outscale=args.outscale)
+ except RuntimeError as error:
+ print('Error', error)
+ print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
+ else:
+ if args.ext == 'auto':
+ extension = extension[1:]
+ else:
+ extension = args.ext
+ if img_mode == 'RGBA': # RGBA images should be saved in png format
+ extension = 'png'
+ if args.suffix == '':
+ save_path = os.path.join(args.output, f'{imgname}.{extension}')
+ else:
+ save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
+ cv2.imwrite(save_path, output)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/backends/nserver/modules/image_upscale/requirements.txt b/backends/nserver/modules/image_upscale/requirements.txt
new file mode 100644
index 0000000..0cf3e2b
--- /dev/null
+++ b/backends/nserver/modules/image_upscale/requirements.txt
@@ -0,0 +1,13 @@
+realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0
+torchvision
+basicsr>=1.4.2
+facexlib>=0.2.5
+gfpgan>=1.3.5
+numpy
+opencv-python
+Pillow
+tqdm
+git+https://github.com/huggingface/diffusers.git
\ No newline at end of file
diff --git a/backends/nserver/modules/image_upscale/version.py b/backends/nserver/modules/image_upscale/version.py
new file mode 100644
index 0000000..7963e09
--- /dev/null
+++ b/backends/nserver/modules/image_upscale/version.py
@@ -0,0 +1,12 @@
+""" RealESRGan
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/backends/nserver/modules/stablediffusionxl/lora.py b/backends/nserver/modules/stablediffusionxl/lora.py
new file mode 100644
index 0000000..919e1b1
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/lora.py
@@ -0,0 +1,100 @@
+def build_lora_xl(lora, prompt, lora_weight):
+ existing_lora = False
+ if lora == "3drenderstyle":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "3d style, 3d render, " + prompt + " "
+ existing_lora = True
+
+ if lora == "psychedelicnoir":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " >"
+ existing_lora = True
+
+ if lora == "wojak":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = ", " + prompt + ", wojak"
+ existing_lora = True
+
+ if lora == "dreamarts":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = ", " + prompt
+ existing_lora = True
+
+ if lora == "voxel":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "voxel style, " + prompt + " "
+ existing_lora = True
+
+ if lora == "kru3ger":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "kru3ger_style, " + prompt + ""
+ existing_lora = True
+
+ if lora == "inkpunk":
+ if lora_weight == "":
+ lora_weight = "0.5"
+ prompt = "inkpunk style, " + prompt + " "
+ existing_lora = True
+
+ if lora == "inkscenery":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = " ink scenery, " + prompt + " "
+ existing_lora = True
+
+ if lora == "inkpainting":
+ if lora_weight == "":
+ lora_weight = "0.7"
+ prompt = "painting style, " + prompt + " ,"
+ existing_lora = True
+
+ if lora == "timburton":
+ if lora_weight == "":
+ lora_weight = "1.27"
+ pencil_weight = "1.15"
+ prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
+ existing_lora = True
+
+ if lora == "pixelart":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " (flat shading:1.2), (minimalist:1.4), "
+ existing_lora = True
+
+ if lora == "pepe":
+ if lora_weight == "":
+ lora_weight = "0.8"
+ prompt = prompt + " , pepe"
+ existing_lora = True
+
+ if lora == "bettertext":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = prompt + " ,"
+ existing_lora = True
+
+ if lora == "mspaint":
+ if lora_weight == "":
+ lora_weight = "1"
+ prompt = "MSPaint drawing " + prompt +">"
+ existing_lora = True
+
+ if lora == "woodfigure":
+ if lora_weight == "":
+ lora_weight = "0.7"
+ prompt = prompt + ",woodfigurez,artistic style "
+ existing_lora = True
+
+ if lora == "fireelement":
+ prompt = prompt + ",composed of fire elements, fire element"
+ existing_lora = True
+
+
+
+ return lora, prompt, existing_lora
\ No newline at end of file
diff --git a/backends/nserver/modules/stablediffusionxl/readme.md b/backends/nserver/modules/stablediffusionxl/readme.md
new file mode 100644
index 0000000..cccbe30
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/readme.md
@@ -0,0 +1,35 @@
+# Stable Diffusion XL
+
+This modules provides image generation based on prompts
+
+* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
+
+## Options
+
+- `model`: string, identifier of the model to choose
+ - `stabilityai/stable-diffusion-xl-base-1.0`: Default Stable Diffusion XL model
+
+
+- `ratio`: Ratio of the output image
+ - `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
+
+- `high_noise_frac`: Denoising factor
+
+- `n_steps`: how many iterations should be performed
+
+## Example payload
+
+```python
+payload = {
+ 'trainerFilePath': 'modules\\stablediffusionxl\\stablediffusionxl.trainer',
+ 'server': '127.0.0.1',
+ 'data' = '[{"id":"input_prompt","type":"input","src":"user:text","prompt":"' + prompt +'","active":"True"},{"id":"negative_prompt","type":"input","src":"user:text","prompt":"' + negative_prompt +'","active":"True"},{"id":"output_image","type":"output","src":"file:image","uri":"' + outputfile+'","active":"True"}]'
+ 'optStr': 'model=stabilityai/stable-diffusion-xl-base-1.0;ratio=4-3'
+}
+
+import requests
+
+url = 'http://127.0.0.1:53770/predict'
+headers = {'Content-type': 'application/x-www-form-urlencoded'}
+requests.post(url, headers=headers, data=payload)
+```
diff --git a/backends/nserver/modules/stablediffusionxl/requirements.txt b/backends/nserver/modules/stablediffusionxl/requirements.txt
new file mode 100644
index 0000000..9b9e167
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/requirements.txt
@@ -0,0 +1,9 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0
+compel~=2.0.2
+git+https://github.com/huggingface/diffusers.git
+transformers
+accelerate
+numpy
+omegaconf
diff --git a/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.py b/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.py
new file mode 100644
index 0000000..bae89e8
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.py
@@ -0,0 +1,176 @@
+"""StableDiffusionXL Module
+"""
+
+import gc
+import sys
+import os
+
+# Add local dir to path for relative imports
+sys.path.insert(0, os.path.dirname(__file__))
+
+from nova_utils.interfaces.server_module import Processor
+from nova_utils.utils.cache_utils import get_file
+from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
+from diffusers.utils import load_image
+import numpy as np
+from PIL import Image as PILImage
+from lora import build_lora_xl
+
+
+
+# Setting defaults
+_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
+
+# TODO: add log infos,
+class StableDiffusionXL(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ import torch
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ #input_image_url = self.current_session.input_data['input_image_url'].data
+ #input_image_url = ' '.join(input_image_url)
+ input_image = self.current_session.input_data['input_image'].data
+ input_prompt = self.current_session.input_data['input_prompt'].data
+ input_prompt = ' '.join(input_prompt)
+ negative_prompt = self.current_session.input_data['negative_prompt'].data
+ negative_prompt = ' '.join(negative_prompt)
+ # print("Input Image: " + input_image_url)
+ print("Input prompt: " + input_prompt)
+ print("Negative prompt: " + negative_prompt)
+
+ try:
+
+ model = self.options['model']
+ lora = self.options['lora']
+ #init_image = load_image(input_image_url).convert("RGB")
+ init_image = PILImage.fromarray(input_image)
+
+ mwidth = 1024
+ mheight = 1024
+ w = mwidth
+ h = mheight
+ if init_image.width > init_image.height:
+ scale = float(init_image.height / init_image.width)
+ w = mwidth
+ h = int(mheight * scale)
+ elif init_image.width < init_image.height:
+ scale = float(init_image.width / init_image.height)
+ w = int(mwidth * scale)
+ h = mheight
+ else:
+ w = mwidth
+ h = mheight
+
+ init_image = init_image.resize((w, h))
+
+ if lora != "" and lora != "None":
+ print("Loading lora...")
+
+ lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
+
+ from diffusers import AutoPipelineForImage2Image
+ import torch
+
+
+
+ #init_image = init_image.resize((int(w/2), int(h/2)))
+
+ pipe = AutoPipelineForImage2Image.from_pretrained(
+ "stabilityai/stable-diffusion-xl-base-1.0",
+ torch_dtype=torch.float16).to("cuda")
+
+ if existing_lora:
+ lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
+ if str(lora_uri) == "":
+ return "Lora not found"
+ lora_path = get_file(
+ fname=str(lora_uri.uri_id) + ".safetensors",
+ origin=lora_uri.uri_url,
+ file_hash=lora_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+ pipe.load_lora_weights(str(lora_path))
+ print("Loaded Lora: " + str(lora_path))
+
+ seed = 20000
+ generator = torch.manual_seed(seed)
+
+ #os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
+
+ image = pipe(
+ prompt=input_prompt,
+ negative_prompt=negative_prompt,
+ image=init_image,
+ generator=generator,
+ num_inference_steps=int(self.options['n_steps']),
+ image_guidance_scale=float(self.options['guidance_scale']),
+ strength=float(str(self.options['strength']))).images[0]
+
+
+ elif model == "stabilityai/stable-diffusion-xl-refiner-1.0":
+
+ pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+ model, torch_dtype=torch.float16, variant="fp16",
+ use_safetensors=True
+ )
+
+ n_steps = int(self.options['n_steps'])
+ transformation_strength = float(self.options['strength'])
+ cfg_scale = float(self.options['guidance_scale'])
+
+ pipe = pipe.to(self.device)
+ image = pipe(input_prompt, image=init_image,
+ negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
+
+ elif model == "timbrooks/instruct-pix2pix":
+ pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
+ safety_checker=None)
+
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+
+ pipe.to(self.device)
+ n_steps = int(self.options['n_steps'])
+ cfg_scale = float(self.options['guidance_scale'])
+ image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
+
+
+ if torch.cuda.is_available():
+ del pipe
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+
+ numpy_array = np.array(image)
+ return numpy_array
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
+
+
+
\ No newline at end of file
diff --git a/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.trainer b/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
new file mode 100644
index 0000000..b6f4167
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/stablediffusionxl-img2img.trainer
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/backends/nserver/modules/stablediffusionxl/stablediffusionxl.py b/backends/nserver/modules/stablediffusionxl/stablediffusionxl.py
new file mode 100644
index 0000000..258ee4b
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/stablediffusionxl.py
@@ -0,0 +1,241 @@
+"""StableDiffusionXL Module
+"""
+import gc
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(__file__))
+
+from ssl import Options
+from nova_utils.interfaces.server_module import Processor
+from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, logging
+from compel import Compel, ReturnedEmbeddingsType
+from nova_utils.utils.cache_utils import get_file
+import numpy as np
+
+import torch
+from PIL import Image
+from lora import build_lora_xl
+logging.disable_progress_bar()
+logging.enable_explicit_format()
+#logging.set_verbosity_info()
+
+
+# Setting defaults
+_default_options = {"model": "stabilityai/stable-diffusion-xl-base-1.0", "ratio": "1-1", "width": "", "height":"", "high_noise_frac" : "0.8", "n_steps" : "35", "lora" : "" }
+
+# TODO: add log infos,
+class StableDiffusionXL(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.current_session = None
+
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_iter) -> dict:
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.variant = "fp16"
+ self.torch_d_type = torch.float32
+ self.ds_iter = ds_iter
+ current_session_name = self.ds_iter.session_names[0]
+ self.current_session = self.ds_iter.sessions[current_session_name]['manager']
+ input_prompt = self.current_session.input_data['input_prompt'].data
+ input_prompt = ' '.join(input_prompt)
+ negative_prompt = self.current_session.input_data['negative_prompt'].data
+ negative_prompt = ' '.join(negative_prompt)
+ new_width = 0
+ new_height = 0
+ print("Input prompt: " + input_prompt)
+ print("Negative prompt: " + negative_prompt)
+
+ try:
+ if self.options['width'] != "" and self.options['height'] != "":
+ new_width = int(self.options['width'])
+ new_height = int(self.options['height'])
+ ratiow, ratioh = self.calculate_aspect(new_width, new_height)
+ print("Ratio:" + str(ratiow) + ":" + str(ratioh))
+
+ else:
+ ratiow = str(self.options['ratio']).split('-')[0]
+ ratioh =str(self.options['ratio']).split('-')[1]
+
+ model = self.options["model"]
+ lora = self.options["lora"]
+ mwidth = 1024
+ mheight = 1024
+
+ height = mheight
+ width = mwidth
+
+ ratiown = int(ratiow)
+ ratiohn= int(ratioh)
+
+ if ratiown > ratiohn:
+ height = int((ratiohn/ratiown) * float(width))
+ elif ratiown < ratiohn:
+ width = int((ratiown/ratiohn) * float(height))
+ elif ratiown == ratiohn:
+ width = height
+
+
+ print("Processing Output width: " + str(width) + " Output height: " + str(height))
+
+
+
+
+ if model == "stabilityai/stable-diffusion-xl-base-1.0":
+ base = StableDiffusionXLPipeline.from_pretrained(model, torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
+ print("Loaded model: " + model)
+
+ else:
+
+ model_uri = [ x for x in self.trainer.meta_uri if x.uri_id == model][0]
+ if str(model_uri) == "":
+ return "Model not found"
+
+ model_path = get_file(
+ fname=str(model_uri.uri_id) + ".safetensors",
+ origin=model_uri.uri_url,
+ file_hash=model_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+
+ print(str(model_path))
+
+
+ base = StableDiffusionXLPipeline.from_single_file(str(model_path), torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
+ print("Loaded model: " + model)
+
+ if lora != "" and lora != "None":
+ print("Loading lora...")
+ lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
+
+ if existing_lora:
+ lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
+ if str(lora_uri) == "":
+ return "Lora not found"
+ lora_path = get_file(
+ fname=str(lora_uri.uri_id) + ".safetensors",
+ origin=lora_uri.uri_url,
+ file_hash=lora_uri.uri_hash,
+ cache_dir=os.getenv("CACHE_DIR"),
+ tmp_dir=os.getenv("TMP_DIR"),
+ )
+
+ base.load_lora_weights(str(lora_path))
+ print("Loaded Lora: " + str(lora_path))
+
+ refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
+ text_encoder_2=base.text_encoder_2,
+ vae=base.vae,
+ torch_dtype=self.torch_d_type,
+ use_safetensors=True,
+ variant=self.variant,
+ )
+
+
+ compel_base = Compel(
+ tokenizer=[base.tokenizer, base.tokenizer_2],
+ text_encoder=[base.text_encoder, base.text_encoder_2],
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+ requires_pooled=[False, True],
+ )
+
+ compel_refiner = Compel(
+ tokenizer=[refiner.tokenizer_2],
+ text_encoder=[refiner.text_encoder_2],
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+ requires_pooled=[True])
+
+ conditioning, pooled = compel_base(input_prompt)
+ negative_conditioning, negative_pooled = compel_base(negative_prompt)
+
+ conditioning_refiner, pooled_refiner = compel_refiner(input_prompt)
+ negative_conditioning_refiner, negative_pooled_refiner = compel_refiner(
+ negative_prompt)
+
+
+ n_steps = int(self.options['n_steps'])
+ high_noise_frac = float(self.options['high_noise_frac'])
+
+
+ #base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
+
+
+
+ img = base(
+ prompt_embeds=conditioning,
+ pooled_prompt_embeds=pooled,
+ negative_prompt_embeds=negative_conditioning,
+ negative_pooled_prompt_embeds=negative_pooled,
+ width=width,
+ height=height,
+ num_inference_steps=n_steps,
+ denoising_end=high_noise_frac,
+ output_type="latent",
+ ).images
+
+ if torch.cuda.is_available():
+ del base
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+ refiner.to(self.device)
+ # refiner.enable_model_cpu_offload()
+ image = refiner(
+ prompt_embeds=conditioning_refiner,
+ pooled_prompt_embeds=pooled_refiner,
+ negative_prompt_embeds=negative_conditioning_refiner,
+ negative_pooled_prompt_embeds=negative_pooled_refiner,
+ num_inference_steps=n_steps,
+ denoising_start=high_noise_frac,
+ num_images_per_prompt=1,
+ image=img,
+ ).images[0]
+
+ if torch.cuda.is_available():
+ del refiner
+ gc.collect()
+ torch.cuda.empty_cache()
+ torch.cuda.ipc_collect()
+
+ if new_height != 0 or new_width != 0 and (new_width != mwidth or new_height != mheight) :
+ print("Resizing to width: " + str(new_width) + " height: " + str(new_height))
+ image = image.resize((new_width, new_height), Image.LANCZOS)
+
+ numpy_array = np.array(image)
+ return numpy_array
+
+
+ except Exception as e:
+ print(e)
+ sys.stdout.flush()
+ return "Error"
+
+ def calculate_aspect(self, width: int, height: int):
+ def gcd(a, b):
+ """The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
+ return a if b == 0 else gcd(b, a % b)
+
+ r = gcd(width, height)
+ x = int(width / r)
+ y = int(height / r)
+
+ return x, y
+
+
+
+ def to_output(self, data: dict):
+ self.current_session.output_data_templates['output_image'].data = data
+ return self.current_session.output_data_templates
\ No newline at end of file
diff --git a/backends/nserver/modules/stablediffusionxl/stablediffusionxl.trainer b/backends/nserver/modules/stablediffusionxl/stablediffusionxl.trainer
new file mode 100644
index 0000000..0e86e7e
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/stablediffusionxl.trainer
@@ -0,0 +1,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/backends/nserver/modules/stablediffusionxl/version.py b/backends/nserver/modules/stablediffusionxl/version.py
new file mode 100644
index 0000000..bba6553
--- /dev/null
+++ b/backends/nserver/modules/stablediffusionxl/version.py
@@ -0,0 +1,12 @@
+""" Stable Diffusion XL
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '0'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/backends/nserver/modules/whisperx/readme.md b/backends/nserver/modules/whisperx/readme.md
new file mode 100644
index 0000000..ffe67a3
--- /dev/null
+++ b/backends/nserver/modules/whisperx/readme.md
@@ -0,0 +1,52 @@
+# WhisperX
+
+This modules provides fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and
+speaker diarization.
+
+* https://github.com/m-bain/whisperX
+
+## Options
+
+- `model`: string, identifier of the model to choose, sorted ascending in required (V)RAM:
+ - `tiny`, `tiny.en`
+ - `base`, `base.en`
+ - `small`, `small.en`
+ - `medium`, `medium.en`
+ - `large-v1`
+ - `large-v2`
+
+- `alignment_mode`: string, alignment method to use
+ - `raw` Segments as identified by Whisper
+ - `segment` Improved segmentation using separate alignment model. Roughly equivalent to sentence alignment.
+ - `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
+
+- `language`: language code for transcription and alignment models. Supported languages:
+ - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
+ - `None`: auto-detect language from first 30 seconds of audio
+
+- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
+
+## Examples
+
+### Request
+
+```python
+import requests
+import json
+
+payload = {
+ "jobID" : "whisper_transcript",
+ "data": json.dumps([
+ {"src":"file:stream:audio", "type":"input", "id":"audio", "uri":"path/to/my/file.wav"},
+ {"src":"file:annotation:free", "type":"output", "id":"transcript", "uri":"path/to/my/transcript.annotation"}
+ ]),
+ "trainerFilePath": "modules\\whisperx\\whisperx_transcript.trainer",
+}
+
+
+url = 'http://127.0.0.1:8080/process'
+headers = {'Content-type': 'application/x-www-form-urlencoded'}
+x = requests.post(url, headers=headers, data=payload)
+print(x.text)
+
+```
diff --git a/backends/nserver/modules/whisperx/requirements.txt b/backends/nserver/modules/whisperx/requirements.txt
new file mode 100644
index 0000000..cd86386
--- /dev/null
+++ b/backends/nserver/modules/whisperx/requirements.txt
@@ -0,0 +1,7 @@
+hcai-nova-utils>=1.5.5
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.0+cu118
+torchvision>= 0.15.1+cu118
+torchaudio >= 2.0.0+cu118
+pyannote-audio @ git+https://github.com/shelm/pyannote-audio.git@d7b4de3
+whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130
diff --git a/backends/nserver/modules/whisperx/version.py b/backends/nserver/modules/whisperx/version.py
new file mode 100644
index 0000000..aa37301
--- /dev/null
+++ b/backends/nserver/modules/whisperx/version.py
@@ -0,0 +1,12 @@
+""" WhisperX
+"""
+# We follow Semantic Versioning (https://semver.org/)
+_MAJOR_VERSION = '1'
+_MINOR_VERSION = '0'
+_PATCH_VERSION = '1'
+
+__version__ = '.'.join([
+ _MAJOR_VERSION,
+ _MINOR_VERSION,
+ _PATCH_VERSION,
+])
diff --git a/backends/nserver/modules/whisperx/whisperx_transcript.py b/backends/nserver/modules/whisperx/whisperx_transcript.py
new file mode 100644
index 0000000..f24e63e
--- /dev/null
+++ b/backends/nserver/modules/whisperx/whisperx_transcript.py
@@ -0,0 +1,124 @@
+"""WhisperX Module
+"""
+from nova_utils.interfaces.server_module import Processor
+import sys
+
+# Setting defaults
+_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
+
+# supported language codes, cf. whisperx/alignment.py
+# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
+# {'vi', 'uk', 'pl', 'ur', 'ru', 'ko', 'en', 'zh', 'es', 'it', 'el', 'te', 'da', 'he', 'fa', 'pt', 'de',
+# 'fr', 'tr', 'nl', 'cs', 'hu', 'fi', 'ar', 'ja', None}
+
+class WhisperX(Processor):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.options = _default_options | self.options
+ self.device = None
+ self.ds_iter = None
+ self.session_manager = None
+
+ # IO shortcuts
+ self.input = [x for x in self.model_io if x.io_type == "input"]
+ self.output = [x for x in self.model_io if x.io_type == "output"]
+ assert len(self.input) == 1 and len(self.output) == 1
+ self.input = self.input[0]
+ self.output = self.output[0]
+
+ def process_data(self, ds_manager) -> dict:
+ import whisperx
+ import torch
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
+ self.session_manager = self.get_session_manager(ds_manager)
+ input_audio = self.session_manager.input_data['audio']
+
+ # sliding window will be applied by WhisperX
+ audio = whisperx.load_audio(input_audio.meta_data.file_path)
+
+ # transcribe with original whisper
+ try:
+ model = whisperx.load_model(self.options["model"], self.device, compute_type=self.options['compute_type'],
+ language=self.options['language'])
+ except ValueError:
+ print(f'Your hardware does not support {self.options["compute_type"]} - fallback to float32')
+ sys.stdout.flush()
+ model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
+ language=self.options['language'])
+
+ result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
+
+ # delete model if low on GPU resources
+ import gc; gc.collect(); torch.cuda.empty_cache(); del model
+
+ if not self.options["alignment_mode"] == "raw":
+ # load alignment model and metadata
+ model_a, metadata = whisperx.load_align_model(
+ language_code=result["language"], device=self.device
+ )
+
+ # align whisper output
+ result_aligned = whisperx.align(
+ result["segments"], model_a, metadata, audio, self.device
+ )
+ result = result_aligned
+
+ # delete model if low on GPU resources
+ import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
+
+ return result
+
+ def to_output(self, data: dict):
+ def _fix_missing_timestamps(data):
+ """
+ https://github.com/m-bain/whisperX/issues/253
+ Some characters might miss timestamps and recognition scores. This function adds estimated time stamps assuming a fixed time per character of 65ms.
+ Confidence for each added timestamp will be 0.
+ Args:
+ data (dictionary): output dictionary as returned by process_data
+ """
+ last_end = 0
+ for s in data["segments"]:
+ for w in s["words"]:
+ if "end" in w.keys():
+ last_end = w["end"]
+ else:
+ #TODO: rethink lower bound for confidence; place word centred instead of left aligned
+ w["start"] = last_end
+ last_end += 0.065
+ w["end"] = last_end
+ #w["score"] = 0.000
+ w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
+
+ def _hmean(scores):
+ if len(scores) > 0:
+ prod = scores[0]
+ for s in scores[1:]:
+ prod *= s
+ prod = prod**(1/len(scores))
+ else:
+ prod = 0
+ return prod
+
+ if (
+ self.options["alignment_mode"] == "word"
+ or self.options["alignment_mode"] == "segment"
+ ):
+ _fix_missing_timestamps(data)
+
+ if self.options["alignment_mode"] == "word":
+ anno_data = [
+ (w["start"], w["end"], w["word"], w["score"])
+ for w in data["word_segments"]
+ ]
+ else:
+ anno_data = [
+ #(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
+ (w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
+ ]
+
+ # convert to milliseconds
+ anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
+ out = self.session_manager.output_data_templates[self.output.io_id]
+ out.data = anno_data
+ return self.session_manager.output_data_templates
diff --git a/backends/nserver/modules/whisperx/whisperx_transcript.trainer b/backends/nserver/modules/whisperx/whisperx_transcript.trainer
new file mode 100644
index 0000000..44dae41
--- /dev/null
+++ b/backends/nserver/modules/whisperx/whisperx_transcript.trainer
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/backends/nserver/setup.cmd b/backends/nserver/setup.cmd
new file mode 100644
index 0000000..95cc8ca
--- /dev/null
+++ b/backends/nserver/setup.cmd
@@ -0,0 +1,4 @@
+python -m venv venv
+call venv/Scripts/activate
+pip install hcai-nova-server
+python nova-server
\ No newline at end of file
diff --git a/backends/nova_server.py b/backends/nserver/utils.py
similarity index 83%
rename from backends/nova_server.py
rename to backends/nserver/utils.py
index d451b5d..94107eb 100644
--- a/backends/nova_server.py
+++ b/backends/nserver/utils.py
@@ -10,34 +10,29 @@ import PIL.Image as Image
from utils.output_utils import upload_media_to_hoster
"""
-This file contains basic calling functions for ML tasks that are outsourced to nova-server
-(https://pypi.org/project/hcai-nova-server/). nova-server is an Open-Source backend that enables running models locally
-based on preefined modules (nova-server-modules), by accepting a request form.
+This file contains basic calling functions for ML tasks that are outsourced to nova server. It is an Open-Source backend
+that enables running models locally based on preefined modules, by accepting a request form.
Modules are deployed in in separate virtual environments so dependencies won't conflict.
-
-Setup nova-server:
-https://hcmlab.github.io/nova-server/docbuild/html/tutorials/introduction.html
-
"""
"""
-send_request_to_nova_server(request_form, address)
+send_request_to_n_server(request_form, address)
Function to send a request_form to the server, containing all the information we parsed from the Nostr event and added
in the module that is calling the server
"""
-def send_request_to_nova_server(request_form, address):
- print("Sending job to NOVA-Server")
+def send_request_to_server(request_form, address):
+ print("Sending job to Server")
url = ('http://' + address + '/process')
headers = {'Content-type': 'application/x-www-form-urlencoded'}
response = requests.post(url, headers=headers, data=request_form)
return response.text
-def send_file_to_nova_server(filepath, address):
- print("Sending file to NOVA-Server")
+def send_file_to_server(filepath, address):
+ print("Sending file to Server")
url = ('http://' + address + '/upload')
try:
fp = open(filepath, 'rb')
@@ -53,14 +48,14 @@ def send_file_to_nova_server(filepath, address):
"""
-check_nova_server_status(request_form, address)
+check_n_server_status(request_form, address)
Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
When the Job is successfully finished we grab the result and depending on the type return the output
We throw an exception on error
"""
-def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
+def check_server_status(jobID, address) -> str | pd.DataFrame:
headers = {'Content-type': 'application/x-www-form-urlencoded'}
url_status = 'http://' + address + '/job_status'
url_log = 'http://' + address + '/log'
@@ -85,7 +80,7 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
if status == 2:
try:
url_fetch = 'http://' + address + '/fetch_result'
- print("Fetching Results from NOVA-Server...")
+ print("Fetching Results from Server...")
data = {"jobID": jobID, "delete_after_download": True}
response = requests.post(url_fetch, headers=headers, data=data)
content_type = response.headers['content-type']
@@ -96,7 +91,6 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
result = upload_media_to_hoster("./outputs/image.jpg")
os.remove("./outputs/image.jpg")
return result
-
elif content_type == 'text/plain; charset=utf-8':
return response.content.decode('utf-8')
elif content_type == "application/x-zip-compressed":
diff --git a/main.py b/main.py
index bda58d4..5c49e14 100644
--- a/main.py
+++ b/main.py
@@ -10,7 +10,7 @@ import tasks.textextraction_pdf as textextraction_pdf
import tasks.textextraction_google as textextraction_google
import tasks.translation_google as translation_google
import tasks.translation_libretranslate as translation_libretranslate
-from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd
+from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd, imagegeneration_sdxl
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -81,6 +81,11 @@ def playground():
bot_config.SUPPORTED_DVMS.append(svdreplicate)
svdreplicate.run()
+ if os.getenv("N_SERVER") is not None and os.getenv("N_SERVER") != "":
+ unstable_artist = imagegeneration_sdxl.build_example("NostrAI DVM Artist",
+ "stable_diffusion", admin_config, os.getenv("N_SERVER"))
+ bot_config.SUPPORTED_DVMS.append(unstable_artist) # We add unstable Diffusion to the bot
+ unstable_artist.run()
#Let's define a function so we can add external DVMs to our bot, we will instanciate it afterwards
diff --git a/tasks/imagegeneration_sdxl.py b/tasks/imagegeneration_sdxl.py
index dd2f69a..24a5a71 100644
--- a/tasks/imagegeneration_sdxl.py
+++ b/tasks/imagegeneration_sdxl.py
@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
-from backends.nova_server import check_nova_server_status, send_request_to_nova_server
+from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -15,7 +15,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
-This File contains a Module to transform Text input on NOVA-Server and receive results back.
+This File contains a module to transform Text input on n-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -53,7 +53,7 @@ class ImageGenerationSDXL(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
- request_form["trainerFilePath"] = 'modules\\stablediffusionxl\\stablediffusionxl.trainer'
+ request_form["trainerFilePath"] = r'stablediffusionxl\stablediffusionxl.trainer'
prompt = ""
negative_prompt = ""
@@ -148,14 +148,14 @@ class ImageGenerationSDXL(DVMTaskInterface):
def process(self, request_form):
try:
- # Call the process route of NOVA-Server with our request form.
- response = send_request_to_nova_server(request_form, self.options['nova_server'])
+ # Call the process route of n-server with our request form.
+ response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
- print("Job " + request_form['jobID'] + " sent to NOVA-server")
+ print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
- thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
- print("Wait for results of NOVA-Server...")
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
result = thread.get()
return result
@@ -172,9 +172,9 @@ def build_example(name, identifier, admin_config, server_address, default_model=
dvm_config.LNBITS_INVOICE_KEY = "" # This one will not use Lnbits to create invoices, but rely on zaps
dvm_config.LNBITS_URL = ""
- # A module might have options it can be initialized with, here we set a default model, and the nova-server
+ # A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
- options = {'default_model': default_model, 'default_lora': default_lora, 'nova_server': server_address}
+ options = {'default_model': default_model, 'default_lora': default_lora, 'server': server_address}
nip90params = {
"negative_prompt": {
@@ -214,7 +214,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
- dvm = build_example("Unstable Diffusion", "unstable_diffusion", admin_config, os.getenv("NOVA_SERVER"), "stabilityai/stable-diffusion-xl", "")
+ dvm = build_example("Unstable Diffusion", "unstable_diffusion", admin_config, os.getenv("N_SERVER"), "stabilityai/stable-diffusion-xl", "")
dvm.run()
keep_alive()
\ No newline at end of file
diff --git a/tasks/imagegeneration_sdxlimg2img.py b/tasks/imagegeneration_sdxlimg2img.py
index 0c176ad..837b446 100644
--- a/tasks/imagegeneration_sdxlimg2img.py
+++ b/tasks/imagegeneration_sdxlimg2img.py
@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
-from backends.nova_server import check_nova_server_status, send_request_to_nova_server
+from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -15,7 +15,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
-This File contains a Module to transform Text input on NOVA-Server and receive results back.
+This File contains a Module to transform Text input on N-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -60,7 +60,7 @@ class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
- request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl-img2img.trainer'
+ request_form["trainerFilePath"] = r'stablediffusionxl\stablediffusionxl-img2img.trainer'
prompt = ""
negative_prompt = ""
@@ -178,13 +178,13 @@ class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
- response = send_request_to_nova_server(request_form, self.options['nova_server'])
+ response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
- print("Job " + request_form['jobID'] + " sent to NOVA-server")
+ print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
- thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
- print("Wait for results of NOVA-Server...")
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
result = thread.get()
return result
@@ -224,8 +224,8 @@ def build_example(name, identifier, admin_config, server_address, default_lora="
"nip90Params": nip90params
}
- # A module might have options it can be initialized with, here we set a default model, lora and the nova-server
- options = {'default_lora': default_lora, 'strength': strength, 'nova_server': server_address}
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'default_lora': default_lora, 'strength': strength, 'server': server_address}
nip89config = NIP89Config()
@@ -249,7 +249,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
- dvm = build_example("Image Converter Inkpunk", "image2image", admin_config, os.getenv("NOVA_SERVER"), "", 0.6)
+ dvm = build_example("Image Converter Inkpunk", "image2image", admin_config, os.getenv("N_SERVER"), "", 0.6)
dvm.run()
keep_alive()
\ No newline at end of file
diff --git a/tasks/imageinterrogator.py b/tasks/imageinterrogator.py
index 4759632..acebe5d 100644
--- a/tasks/imageinterrogator.py
+++ b/tasks/imageinterrogator.py
@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
-from backends.nova_server import check_nova_server_status, send_request_to_nova_server
+from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -48,7 +48,7 @@ class ImageInterrogator(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
- request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer'
+ request_form["trainerFilePath"] = r'\image_interrogator\image_interrogator.trainer'
url = ""
method = "prompt"
mode = "best"
@@ -93,13 +93,13 @@ class ImageInterrogator(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
- response = send_request_to_nova_server(request_form, self.options['nova_server'])
+ response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
- print("Job " + request_form['jobID'] + " sent to NOVA-server")
+ print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
- thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
- print("Wait for results of NOVA-Server...")
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
result = thread.get()
return result
@@ -134,8 +134,8 @@ def build_example(name, identifier, admin_config, server_address):
"nip90Params": nip90params
}
- # A module might have options it can be initialized with, here we set a default model, lora and the nova-server
- options = {'nova_server': server_address}
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
@@ -158,7 +158,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
- dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("NOVA_SERVER"))
+ dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()
\ No newline at end of file
diff --git a/tasks/imageupscale.py b/tasks/imageupscale.py
index 932ff6b..14503a5 100644
--- a/tasks/imageupscale.py
+++ b/tasks/imageupscale.py
@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
-from backends.nova_server import check_nova_server_status, send_request_to_nova_server
+from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -91,13 +91,13 @@ class ImageUpscale(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
- response = send_request_to_nova_server(request_form, self.options['nova_server'])
+ response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
- print("Job " + request_form['jobID'] + " sent to NOVA-server")
+ print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
- thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
- print("Wait for results of NOVA-Server...")
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
result = thread.get()
return result
@@ -128,8 +128,8 @@ def build_example(name, identifier, admin_config, server_address):
"nip90Params": nip90params
}
- # A module might have options it can be initialized with, here we set a default model, lora and the nova-server
- options = {'nova_server': server_address}
+ # A module might have options it can be initialized with, here we set a default model, lora and the server
+ options = {'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
@@ -152,7 +152,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
- dvm = build_example("Image Upscaler", "imageupscale", admin_config, os.getenv("NOVA_SERVER"))
+ dvm = build_example("Image Upscaler", "imageupscale", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()
\ No newline at end of file
diff --git a/tasks/textextraction_google.py b/tasks/textextraction_google.py
index fe47b86..1d21640 100644
--- a/tasks/textextraction_google.py
+++ b/tasks/textextraction_google.py
@@ -136,7 +136,7 @@ def build_example(name, identifier, admin_config):
dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
options = {'api_key': None}
- # A module might have options it can be initialized with, here we set a default model, and the nova-server
+ # A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
nip90params = {
diff --git a/tasks/textextraction_whisperx.py b/tasks/textextraction_whisperx.py
index f0f9f40..215f858 100644
--- a/tasks/textextraction_whisperx.py
+++ b/tasks/textextraction_whisperx.py
@@ -6,7 +6,7 @@ from pathlib import Path
import dotenv
-from backends.nova_server import check_nova_server_status, send_request_to_nova_server, send_file_to_nova_server
+from backends.nserver.utils import check_server_status, send_request_to_server, send_file_to_n_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -17,7 +17,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
-This File contains a Module to transform A media file input on NOVA-Server and receive results back.
+This File contains a Module to transform A media file input on n-server and receive results back.
Accepted Inputs: Url to media file (url)
Outputs: Transcribed text
@@ -53,7 +53,7 @@ class SpeechToTextWhisperX(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""),
- "trainerFilePath": 'modules\\whisperx\\whisperx_transcript.trainer'}
+ "trainerFilePath": r'whisperx\whisperx_transcript.trainer'}
if self.options.get("default_model"):
model = self.options['default_model']
@@ -107,7 +107,7 @@ class SpeechToTextWhisperX(DVMTaskInterface):
end_time = float(tag.as_vec()[3])
filepath = organize_input_media_data(url, input_type, start_time, end_time, dvm_config, client, True, media_format)
- path_on_server = send_file_to_nova_server(os.path.realpath(filepath), self.options['nova_server'])
+ path_on_server = send_file_to_n_server(os.path.realpath(filepath), self.options['server'])
io_input = {
"id": "audio",
@@ -134,13 +134,13 @@ class SpeechToTextWhisperX(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
- response = send_request_to_nova_server(request_form, self.options['nova_server'])
+ response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
- print("Job " + request_form['jobID'] + " sent to NOVA-server")
+ print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
- thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
- print("Wait for results of NOVA-Server...")
+ thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
+ print("Wait for results of server...")
result = thread.get()
return result
@@ -156,9 +156,9 @@ def build_example(name, identifier, admin_config, server_address):
dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
- # A module might have options it can be initialized with, here we set a default model, and the nova-server
+ # A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
- options = {'default_model': "base", 'nova_server': server_address}
+ options = {'default_model': "base", 'server': server_address}
nip90params = {
"model": {
@@ -199,7 +199,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
- dvm = build_example("Whisperer", "whisperx", admin_config, os.getenv("NOVA_SERVER"))
+ dvm = build_example("Whisperer", "whisperx", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()
\ No newline at end of file