adding local modules

This commit is contained in:
Believethehype
2023-12-08 09:09:03 +01:00
parent 32ff2516a4
commit 5225542efc
35 changed files with 1443 additions and 67 deletions

View File

@@ -10,6 +10,7 @@ OPENAI_API_KEY = "" # Enter your OpenAI API Key to use DVMs with OpenAI service
LIBRE_TRANSLATE_ENDPOINT = "" # Url to LibreTranslate Endpoint e.g. https://libretranslate.com
LIBRE_TRANSLATE_API_KEY = "" # API Key, if required (You can host your own instance where you don't need it)
REPLICATE_API_TOKEN = "" #API Key to run models on replicate.com
N_SERVER = "" # Enter the address of a n-server instance, locally or on a machine in your network host:port
# We will automatically create dtags and private keys based on the identifier variable in main.
# If your DVM already has a dtag and private key you can replace it here before publishing the DTAG to not create a new one.

2
.gitignore vendored
View File

@@ -169,3 +169,5 @@ app_deploy.py
db/Cashu/wallet.sqlite3
.idea/misc.xml
.idea/misc.xml
backends/nserver/cache/*
backends/nserver/modules/image_upscale/weights/*

View File

@@ -0,0 +1,7 @@
NOVA_SERVER_HOST = 0.0.0.0
NOVA_SERVER_PORT = 37318
NOVA_SERVER_CML_DIR = .\modules
NOVA_SERVER_DATA_DIR = .\data
NOVA_SERVER_CACHE_DIR = .\cache
NOVA_SERVER_TMP_DIR = .\tmp
NOVA_SERVER_LOG_DIR = .\log

View File

@@ -0,0 +1,129 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor
# Setting defaults
_default_options = {"kind": "prompt", "mode": "fast" }
# TODO: add log infos,
class ImageInterrogator(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
from PIL import Image as PILImage
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
kind = self.options['kind'] #"prompt" #"analysis" #prompt
mode = self.options['mode']
#url = self.current_session.input_data['input_image_url'].data[0]
#print(url)
input_image = self.current_session.input_data['input_image'].data
init_image = PILImage.fromarray(input_image)
mwidth = 256
mheight = 256
w = mwidth
h = mheight
if init_image.width > init_image.height:
scale = float(init_image.height / init_image.width)
w = mwidth
h = int(mheight * scale)
elif init_image.width < init_image.height:
scale = float(init_image.width / init_image.height)
w = int(mwidth * scale)
h = mheight
else:
w = mwidth
h = mheight
init_image = init_image.resize((w, h))
from clip_interrogator import Config, Interrogator
config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
if kind == "analysis":
ci = Interrogator(config)
image_features = ci.image_to_features(init_image)
top_mediums = ci.mediums.rank(image_features, 5)
top_artists = ci.artists.rank(image_features, 5)
top_movements = ci.movements.rank(image_features, 5)
top_trendings = ci.trendings.rank(image_features, 5)
top_flavors = ci.flavors.rank(image_features, 5)
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
movement_ranks = {movement: sim for movement, sim in
zip(top_movements, ci.similarities(image_features, top_movements))}
trending_ranks = {trending: sim for trending, sim in
zip(top_trendings, ci.similarities(image_features, top_trendings))}
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
print(result)
return result
else:
ci = Interrogator(config)
ci.config.blip_num_beams = 64
ci.config.chunk_size = 2024
ci.config.clip_offload = True
ci.config.apply_low_vram_defaults()
#MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
image = init_image
if mode == 'best':
prompt = ci.interrogate(image)
elif mode == 'classic':
prompt = ci.interrogate_classic(image)
elif mode == 'fast':
prompt = ci.interrogate_fast(image)
elif mode == 'negative':
prompt = ci.interrogate_negative(image)
#print(str(prompt))
return prompt
# config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
# ci = Interrogator(config)
# "ViT-L-14/openai"))
# "ViT-g-14/laion2B-s34B-b88K"))
def to_output(self, data: dict):
import numpy as np
self.current_session.output_data_templates['output'].data = np.array([data])
return self.current_session.output_data_templates

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output" data="text" default_value=""/>
</meta>
<model create="ImageInterrogator" script="image_interrogator.py" optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
</trainer>

View File

@@ -0,0 +1,11 @@
#Clip Interogator
This modules provides prompt generation based on images
* https://huggingface.co/spaces/pharmapsychotic/CLIP-Interrogator
## Options
- `kind`: string, identifier of the kind of processing
- `prompt`: Generates a prompt from image
- `analysis`: Generates a categorical analysis

View File

@@ -0,0 +1,5 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.1
clip_interrogator
git+https://github.com/huggingface/diffusers.git

View File

@@ -0,0 +1,12 @@
""" Clip Interrorgator
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,152 @@
"""RealESRGan Module
"""
import os
import glob
import sys
from nova_utils.interfaces.server_module import Processor
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
import numpy as np
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
import cv2
from PIL import Image as PILImage
# Setting defaults
_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
# TODO: add log infos,
class RealESRGan(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
self.model_path = None #Maybe need this later for manual path
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data
try:
model, netscale, file_url = self.manageModel(str(self.options['model']))
if self.model_path is not None:
model_path = self.model_path
else:
model_path = os.path.join('weights', self.options['model'] + '.pth')
if not os.path.isfile(model_path):
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
for url in file_url:
# model_path will be updated
model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
# use dni to control the denoise strength
dni_weight = None
if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
model_path = [model_path, wdn_model_path]
dni_weight = [float(self.options['denoise_strength']), 1 - float(self.options['denoise_strength'])]
half = True
if self.options["compute_type"] == "fp32":
half=False
upsampler = RealESRGANer(
scale=netscale,
model_path=model_path,
dni_weight=dni_weight,
model=model,
tile= int(self.options['tile']),
tile_pad=int(self.options['tile_pad']),
pre_pad=int(self.options['pre_pad']),
half=half,
gpu_id=None) #Can be set if multiple gpus are available
if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
from gfpgan import GFPGANer
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=int(self.options['outscale']),
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler)
pilimage = PILImage.fromarray(input_image)
img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
try:
if bool(self.options['face_enhance']):
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
else:
output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
except RuntimeError as error:
print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
return output
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates
def manageModel(self, model_name):
if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
elif model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
elif model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
elif model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
netscale = 4
file_url = [
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
]
return model, netscale, file_url

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
</meta>
<model create="RealESRGan" script="image_upscale_realesrgan.py" optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
</trainer>

View File

@@ -0,0 +1,166 @@
import argparse
import cv2
import glob
import os
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
def main():
"""Inference demo for Real-ESRGAN.
"""
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
parser.add_argument(
'-n',
'--model_name',
type=str,
default='RealESRGAN_x4plus',
help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
'realesr-animevideov3 | realesr-general-x4v3'))
parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
parser.add_argument(
'-dn',
'--denoise_strength',
type=float,
default=0.5,
help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
'Only used for the realesr-general-x4v3 model'))
parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
parser.add_argument(
'--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
parser.add_argument(
'--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
parser.add_argument(
'--alpha_upsampler',
type=str,
default='realesrgan',
help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
parser.add_argument(
'--ext',
type=str,
default='auto',
help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
parser.add_argument(
'-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
args = parser.parse_args()
# determine models according to model names
args.model_name = args.model_name.split('.')[0]
if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
netscale = 4
file_url = [
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
]
# determine model paths
if args.model_path is not None:
model_path = args.model_path
else:
model_path = os.path.join('weights', args.model_name + '.pth')
if not os.path.isfile(model_path):
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
for url in file_url:
# model_path will be updated
model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
# use dni to control the denoise strength
dni_weight = None
if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
model_path = [model_path, wdn_model_path]
dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
# restorer
upsampler = RealESRGANer(
scale=netscale,
model_path=model_path,
dni_weight=dni_weight,
model=model,
tile=args.tile,
tile_pad=args.tile_pad,
pre_pad=args.pre_pad,
half=not args.fp32,
gpu_id=args.gpu_id)
if args.face_enhance: # Use GFPGAN for face enhancement
from gfpgan import GFPGANer
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=args.outscale,
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler)
os.makedirs(args.output, exist_ok=True)
if os.path.isfile(args.input):
paths = [args.input]
else:
paths = sorted(glob.glob(os.path.join(args.input, '*')))
for idx, path in enumerate(paths):
imgname, extension = os.path.splitext(os.path.basename(path))
print('Testing', idx, imgname)
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
if len(img.shape) == 3 and img.shape[2] == 4:
img_mode = 'RGBA'
else:
img_mode = None
try:
if args.face_enhance:
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
else:
output, _ = upsampler.enhance(img, outscale=args.outscale)
except RuntimeError as error:
print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
else:
if args.ext == 'auto':
extension = extension[1:]
else:
extension = args.ext
if img_mode == 'RGBA': # RGBA images should be saved in png format
extension = 'png'
if args.suffix == '':
save_path = os.path.join(args.output, f'{imgname}.{extension}')
else:
save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
cv2.imwrite(save_path, output)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,13 @@
realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0
torchvision
basicsr>=1.4.2
facexlib>=0.2.5
gfpgan>=1.3.5
numpy
opencv-python
Pillow
tqdm
git+https://github.com/huggingface/diffusers.git

View File

@@ -0,0 +1,12 @@
""" RealESRGan
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,100 @@
def build_lora_xl(lora, prompt, lora_weight):
existing_lora = False
if lora == "3drenderstyle":
if lora_weight == "":
lora_weight = "1"
prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:"+lora_weight+">"
existing_lora = True
if lora == "psychedelicnoir":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " <lora:Psychedelic_Noir__sdxl:"+lora_weight+">>"
existing_lora = True
if lora == "wojak":
if lora_weight == "":
lora_weight = "1"
prompt = "<lora:wojak_big:"+lora_weight+">, " + prompt + ", wojak"
existing_lora = True
if lora == "dreamarts":
if lora_weight == "":
lora_weight = "1"
prompt = "<lora:DreamARTSDXL:"+lora_weight+">, " + prompt
existing_lora = True
if lora == "voxel":
if lora_weight == "":
lora_weight = "1"
prompt = "voxel style, " + prompt + " <lora:last:"+lora_weight+">"
existing_lora = True
if lora == "kru3ger":
if lora_weight == "":
lora_weight = "1"
prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:"+lora_weight+">"
existing_lora = True
if lora == "inkpunk":
if lora_weight == "":
lora_weight = "0.5"
prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:"+lora_weight+">"
existing_lora = True
if lora == "inkscenery":
if lora_weight == "":
lora_weight = "1"
prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:"+lora_weight+">"
existing_lora = True
if lora == "inkpainting":
if lora_weight == "":
lora_weight = "0.7"
prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::"+lora_weight+">,"
existing_lora = True
if lora == "timburton":
if lora_weight == "":
lora_weight = "1.27"
pencil_weight = "1.15"
prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
existing_lora = True
if lora == "pixelart":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:"+lora_weight+"> "
existing_lora = True
if lora == "pepe":
if lora_weight == "":
lora_weight = "0.8"
prompt = prompt + " ,<lora:DD-pepe-v2:"+lora_weight+"> pepe"
existing_lora = True
if lora == "bettertext":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " ,<lora:BetterTextRedmond:"+lora_weight+">"
existing_lora = True
if lora == "mspaint":
if lora_weight == "":
lora_weight = "1"
prompt = "MSPaint drawing " + prompt +">"
existing_lora = True
if lora == "woodfigure":
if lora_weight == "":
lora_weight = "0.7"
prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:"+lora_weight+">"
existing_lora = True
if lora == "fireelement":
prompt = prompt + ",composed of fire elements, fire element"
existing_lora = True
return lora, prompt, existing_lora

View File

@@ -0,0 +1,35 @@
# Stable Diffusion XL
This modules provides image generation based on prompts
* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
## Options
- `model`: string, identifier of the model to choose
- `stabilityai/stable-diffusion-xl-base-1.0`: Default Stable Diffusion XL model
- `ratio`: Ratio of the output image
- `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
- `high_noise_frac`: Denoising factor
- `n_steps`: how many iterations should be performed
## Example payload
```python
payload = {
'trainerFilePath': 'modules\\stablediffusionxl\\stablediffusionxl.trainer',
'server': '127.0.0.1',
'data' = '[{"id":"input_prompt","type":"input","src":"user:text","prompt":"' + prompt +'","active":"True"},{"id":"negative_prompt","type":"input","src":"user:text","prompt":"' + negative_prompt +'","active":"True"},{"id":"output_image","type":"output","src":"file:image","uri":"' + outputfile+'","active":"True"}]'
'optStr': 'model=stabilityai/stable-diffusion-xl-base-1.0;ratio=4-3'
}
import requests
url = 'http://127.0.0.1:53770/predict'
headers = {'Content-type': 'application/x-www-form-urlencoded'}
requests.post(url, headers=headers, data=payload)
```

View File

@@ -0,0 +1,9 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0
compel~=2.0.2
git+https://github.com/huggingface/diffusers.git
transformers
accelerate
numpy
omegaconf

View File

@@ -0,0 +1,176 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
# Add local dir to path for relative imports
sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor
from nova_utils.utils.cache_utils import get_file
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
from diffusers.utils import load_image
import numpy as np
from PIL import Image as PILImage
from lora import build_lora_xl
# Setting defaults
_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
# TODO: add log infos,
class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#input_image_url = self.current_session.input_data['input_image_url'].data
#input_image_url = ' '.join(input_image_url)
input_image = self.current_session.input_data['input_image'].data
input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt)
# print("Input Image: " + input_image_url)
print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt)
try:
model = self.options['model']
lora = self.options['lora']
#init_image = load_image(input_image_url).convert("RGB")
init_image = PILImage.fromarray(input_image)
mwidth = 1024
mheight = 1024
w = mwidth
h = mheight
if init_image.width > init_image.height:
scale = float(init_image.height / init_image.width)
w = mwidth
h = int(mheight * scale)
elif init_image.width < init_image.height:
scale = float(init_image.width / init_image.height)
w = int(mwidth * scale)
h = mheight
else:
w = mwidth
h = mheight
init_image = init_image.resize((w, h))
if lora != "" and lora != "None":
print("Loading lora...")
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
from diffusers import AutoPipelineForImage2Image
import torch
#init_image = init_image.resize((int(w/2), int(h/2)))
pipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16).to("cuda")
if existing_lora:
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "":
return "Lora not found"
lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
pipe.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path))
seed = 20000
generator = torch.manual_seed(seed)
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
image = pipe(
prompt=input_prompt,
negative_prompt=negative_prompt,
image=init_image,
generator=generator,
num_inference_steps=int(self.options['n_steps']),
image_guidance_scale=float(self.options['guidance_scale']),
strength=float(str(self.options['strength']))).images[0]
elif model == "stabilityai/stable-diffusion-xl-refiner-1.0":
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
model, torch_dtype=torch.float16, variant="fp16",
use_safetensors=True
)
n_steps = int(self.options['n_steps'])
transformation_strength = float(self.options['strength'])
cfg_scale = float(self.options['guidance_scale'])
pipe = pipe.to(self.device)
image = pipe(input_prompt, image=init_image,
negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
elif model == "timbrooks/instruct-pix2pix":
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
safety_checker=None)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(self.device)
n_steps = int(self.options['n_steps'])
cfg_scale = float(self.options['guidance_scale'])
image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
if torch.cuda.is_available():
del pipe
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
numpy_array = np.array(image)
return numpy_array
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from existing image based on a prompt" is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
</trainer>

View File

@@ -0,0 +1,241 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from ssl import Options
from nova_utils.interfaces.server_module import Processor
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, logging
from compel import Compel, ReturnedEmbeddingsType
from nova_utils.utils.cache_utils import get_file
import numpy as np
import torch
from PIL import Image
from lora import build_lora_xl
logging.disable_progress_bar()
logging.enable_explicit_format()
#logging.set_verbosity_info()
# Setting defaults
_default_options = {"model": "stabilityai/stable-diffusion-xl-base-1.0", "ratio": "1-1", "width": "", "height":"", "high_noise_frac" : "0.8", "n_steps" : "35", "lora" : "" }
# TODO: add log infos,
class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.variant = "fp16"
self.torch_d_type = torch.float32
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt)
new_width = 0
new_height = 0
print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt)
try:
if self.options['width'] != "" and self.options['height'] != "":
new_width = int(self.options['width'])
new_height = int(self.options['height'])
ratiow, ratioh = self.calculate_aspect(new_width, new_height)
print("Ratio:" + str(ratiow) + ":" + str(ratioh))
else:
ratiow = str(self.options['ratio']).split('-')[0]
ratioh =str(self.options['ratio']).split('-')[1]
model = self.options["model"]
lora = self.options["lora"]
mwidth = 1024
mheight = 1024
height = mheight
width = mwidth
ratiown = int(ratiow)
ratiohn= int(ratioh)
if ratiown > ratiohn:
height = int((ratiohn/ratiown) * float(width))
elif ratiown < ratiohn:
width = int((ratiown/ratiohn) * float(height))
elif ratiown == ratiohn:
width = height
print("Processing Output width: " + str(width) + " Output height: " + str(height))
if model == "stabilityai/stable-diffusion-xl-base-1.0":
base = StableDiffusionXLPipeline.from_pretrained(model, torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
print("Loaded model: " + model)
else:
model_uri = [ x for x in self.trainer.meta_uri if x.uri_id == model][0]
if str(model_uri) == "":
return "Model not found"
model_path = get_file(
fname=str(model_uri.uri_id) + ".safetensors",
origin=model_uri.uri_url,
file_hash=model_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
print(str(model_path))
base = StableDiffusionXLPipeline.from_single_file(str(model_path), torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
print("Loaded model: " + model)
if lora != "" and lora != "None":
print("Loading lora...")
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
if existing_lora:
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "":
return "Lora not found"
lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
base.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path))
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
text_encoder_2=base.text_encoder_2,
vae=base.vae,
torch_dtype=self.torch_d_type,
use_safetensors=True,
variant=self.variant,
)
compel_base = Compel(
tokenizer=[base.tokenizer, base.tokenizer_2],
text_encoder=[base.text_encoder, base.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True],
)
compel_refiner = Compel(
tokenizer=[refiner.tokenizer_2],
text_encoder=[refiner.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[True])
conditioning, pooled = compel_base(input_prompt)
negative_conditioning, negative_pooled = compel_base(negative_prompt)
conditioning_refiner, pooled_refiner = compel_refiner(input_prompt)
negative_conditioning_refiner, negative_pooled_refiner = compel_refiner(
negative_prompt)
n_steps = int(self.options['n_steps'])
high_noise_frac = float(self.options['high_noise_frac'])
#base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
img = base(
prompt_embeds=conditioning,
pooled_prompt_embeds=pooled,
negative_prompt_embeds=negative_conditioning,
negative_pooled_prompt_embeds=negative_pooled,
width=width,
height=height,
num_inference_steps=n_steps,
denoising_end=high_noise_frac,
output_type="latent",
).images
if torch.cuda.is_available():
del base
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
refiner.to(self.device)
# refiner.enable_model_cpu_offload()
image = refiner(
prompt_embeds=conditioning_refiner,
pooled_prompt_embeds=pooled_refiner,
negative_prompt_embeds=negative_conditioning_refiner,
negative_pooled_prompt_embeds=negative_pooled_refiner,
num_inference_steps=n_steps,
denoising_start=high_noise_frac,
num_images_per_prompt=1,
image=img,
).images[0]
if torch.cuda.is_available():
del refiner
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
if new_height != 0 or new_width != 0 and (new_width != mwidth or new_height != mheight) :
print("Resizing to width: " + str(new_width) + " height: " + str(new_height))
image = image.resize((new_width, new_height), Image.LANCZOS)
numpy_array = np.array(image)
return numpy_array
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def calculate_aspect(self, width: int, height: int):
def gcd(a, b):
"""The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
return a if b == 0 else gcd(b, a % b)
r = gcd(width, height)
x = int(width / r)
y = int(height / r)
return x, y
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt" is_iterable="False">
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530" hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
<uri id="dynavision" url="https://civitai.com/api/download/models/198962" hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
<uri id="colossus" url="https://civitai.com/api/download/models/213982" hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
<uri id="newreality" url="https://civitai.com/api/download/models/232565" hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
<uri id="unstable" url="https://civitai.com/api/download/models/209647" hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
<uri id="fantastic" url="https://civitai.com/api/download/models/143722" hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
<uri id="mohawk" url="https://civitai.com/api/download/models/207419" hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688" hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
<uri id="timeless" url="https://civitai.com/api/download/models/198246" hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832" hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
<uri id="chroma" url="https://civitai.com/api/download/models/169740" hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090" hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
</trainer>

View File

@@ -0,0 +1,12 @@
""" Stable Diffusion XL
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,52 @@
# WhisperX
This modules provides fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and
speaker diarization.
* https://github.com/m-bain/whisperX
## Options
- `model`: string, identifier of the model to choose, sorted ascending in required (V)RAM:
- `tiny`, `tiny.en`
- `base`, `base.en`
- `small`, `small.en`
- `medium`, `medium.en`
- `large-v1`
- `large-v2`
- `alignment_mode`: string, alignment method to use
- `raw` Segments as identified by Whisper
- `segment` Improved segmentation using separate alignment model. Roughly equivalent to sentence alignment.
- `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
- `language`: language code for transcription and alignment models. Supported languages:
- `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
- `None`: auto-detect language from first 30 seconds of audio
- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
## Examples
### Request
```python
import requests
import json
payload = {
"jobID" : "whisper_transcript",
"data": json.dumps([
{"src":"file:stream:audio", "type":"input", "id":"audio", "uri":"path/to/my/file.wav"},
{"src":"file:annotation:free", "type":"output", "id":"transcript", "uri":"path/to/my/transcript.annotation"}
]),
"trainerFilePath": "modules\\whisperx\\whisperx_transcript.trainer",
}
url = 'http://127.0.0.1:8080/process'
headers = {'Content-type': 'application/x-www-form-urlencoded'}
x = requests.post(url, headers=headers, data=payload)
print(x.text)
```

View File

@@ -0,0 +1,7 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0+cu118
torchvision>= 0.15.1+cu118
torchaudio >= 2.0.0+cu118
pyannote-audio @ git+https://github.com/shelm/pyannote-audio.git@d7b4de3
whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130

View File

@@ -0,0 +1,12 @@
""" WhisperX
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '1'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,124 @@
"""WhisperX Module
"""
from nova_utils.interfaces.server_module import Processor
import sys
# Setting defaults
_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
# supported language codes, cf. whisperx/alignment.py
# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
# {'vi', 'uk', 'pl', 'ur', 'ru', 'ko', 'en', 'zh', 'es', 'it', 'el', 'te', 'da', 'he', 'fa', 'pt', 'de',
# 'fr', 'tr', 'nl', 'cs', 'hu', 'fi', 'ar', 'ja', None}
class WhisperX(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.session_manager = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
assert len(self.input) == 1 and len(self.output) == 1
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_manager) -> dict:
import whisperx
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.session_manager = self.get_session_manager(ds_manager)
input_audio = self.session_manager.input_data['audio']
# sliding window will be applied by WhisperX
audio = whisperx.load_audio(input_audio.meta_data.file_path)
# transcribe with original whisper
try:
model = whisperx.load_model(self.options["model"], self.device, compute_type=self.options['compute_type'],
language=self.options['language'])
except ValueError:
print(f'Your hardware does not support {self.options["compute_type"]} - fallback to float32')
sys.stdout.flush()
model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
language=self.options['language'])
result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
# delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model
if not self.options["alignment_mode"] == "raw":
# load alignment model and metadata
model_a, metadata = whisperx.load_align_model(
language_code=result["language"], device=self.device
)
# align whisper output
result_aligned = whisperx.align(
result["segments"], model_a, metadata, audio, self.device
)
result = result_aligned
# delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
return result
def to_output(self, data: dict):
def _fix_missing_timestamps(data):
"""
https://github.com/m-bain/whisperX/issues/253
Some characters might miss timestamps and recognition scores. This function adds estimated time stamps assuming a fixed time per character of 65ms.
Confidence for each added timestamp will be 0.
Args:
data (dictionary): output dictionary as returned by process_data
"""
last_end = 0
for s in data["segments"]:
for w in s["words"]:
if "end" in w.keys():
last_end = w["end"]
else:
#TODO: rethink lower bound for confidence; place word centred instead of left aligned
w["start"] = last_end
last_end += 0.065
w["end"] = last_end
#w["score"] = 0.000
w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
def _hmean(scores):
if len(scores) > 0:
prod = scores[0]
for s in scores[1:]:
prod *= s
prod = prod**(1/len(scores))
else:
prod = 0
return prod
if (
self.options["alignment_mode"] == "word"
or self.options["alignment_mode"] == "segment"
):
_fix_missing_timestamps(data)
if self.options["alignment_mode"] == "word":
anno_data = [
(w["start"], w["end"], w["word"], w["score"])
for w in data["word_segments"]
]
else:
anno_data = [
#(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
(w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
]
# convert to milliseconds
anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
out = self.session_manager.output_data_templates[self.output.io_id]
out.data = anno_data
return self.session_manager.output_data_templates

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
</meta>
<model create="WhisperX" script="whisperx_transcript.py" optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
</trainer>

View File

@@ -0,0 +1,4 @@
python -m venv venv
call venv/Scripts/activate
pip install hcai-nova-server
python nova-server

View File

@@ -10,34 +10,29 @@ import PIL.Image as Image
from utils.output_utils import upload_media_to_hoster
"""
This file contains basic calling functions for ML tasks that are outsourced to nova-server
(https://pypi.org/project/hcai-nova-server/). nova-server is an Open-Source backend that enables running models locally
based on preefined modules (nova-server-modules), by accepting a request form.
This file contains basic calling functions for ML tasks that are outsourced to nova server. It is an Open-Source backend
that enables running models locally based on preefined modules, by accepting a request form.
Modules are deployed in in separate virtual environments so dependencies won't conflict.
Setup nova-server:
https://hcmlab.github.io/nova-server/docbuild/html/tutorials/introduction.html
"""
"""
send_request_to_nova_server(request_form, address)
send_request_to_n_server(request_form, address)
Function to send a request_form to the server, containing all the information we parsed from the Nostr event and added
in the module that is calling the server
"""
def send_request_to_nova_server(request_form, address):
print("Sending job to NOVA-Server")
def send_request_to_server(request_form, address):
print("Sending job to Server")
url = ('http://' + address + '/process')
headers = {'Content-type': 'application/x-www-form-urlencoded'}
response = requests.post(url, headers=headers, data=request_form)
return response.text
def send_file_to_nova_server(filepath, address):
print("Sending file to NOVA-Server")
def send_file_to_server(filepath, address):
print("Sending file to Server")
url = ('http://' + address + '/upload')
try:
fp = open(filepath, 'rb')
@@ -53,14 +48,14 @@ def send_file_to_nova_server(filepath, address):
"""
check_nova_server_status(request_form, address)
check_n_server_status(request_form, address)
Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
When the Job is successfully finished we grab the result and depending on the type return the output
We throw an exception on error
"""
def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
def check_server_status(jobID, address) -> str | pd.DataFrame:
headers = {'Content-type': 'application/x-www-form-urlencoded'}
url_status = 'http://' + address + '/job_status'
url_log = 'http://' + address + '/log'
@@ -85,7 +80,7 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
if status == 2:
try:
url_fetch = 'http://' + address + '/fetch_result'
print("Fetching Results from NOVA-Server...")
print("Fetching Results from Server...")
data = {"jobID": jobID, "delete_after_download": True}
response = requests.post(url_fetch, headers=headers, data=data)
content_type = response.headers['content-type']
@@ -96,7 +91,6 @@ def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
result = upload_media_to_hoster("./outputs/image.jpg")
os.remove("./outputs/image.jpg")
return result
elif content_type == 'text/plain; charset=utf-8':
return response.content.decode('utf-8')
elif content_type == "application/x-zip-compressed":

View File

@@ -10,7 +10,7 @@ import tasks.textextraction_pdf as textextraction_pdf
import tasks.textextraction_google as textextraction_google
import tasks.translation_google as translation_google
import tasks.translation_libretranslate as translation_libretranslate
from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd
from tasks import imagegeneration_replicate_sdxl, videogeneration_replicate_svd, imagegeneration_sdxl
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -81,6 +81,11 @@ def playground():
bot_config.SUPPORTED_DVMS.append(svdreplicate)
svdreplicate.run()
if os.getenv("N_SERVER") is not None and os.getenv("N_SERVER") != "":
unstable_artist = imagegeneration_sdxl.build_example("NostrAI DVM Artist",
"stable_diffusion", admin_config, os.getenv("N_SERVER"))
bot_config.SUPPORTED_DVMS.append(unstable_artist) # We add unstable Diffusion to the bot
unstable_artist.run()
#Let's define a function so we can add external DVMs to our bot, we will instanciate it afterwards

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
from backends.nova_server import check_nova_server_status, send_request_to_nova_server
from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -15,7 +15,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
This File contains a Module to transform Text input on NOVA-Server and receive results back.
This File contains a module to transform Text input on n-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -53,7 +53,7 @@ class ImageGenerationSDXL(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = 'modules\\stablediffusionxl\\stablediffusionxl.trainer'
request_form["trainerFilePath"] = r'stablediffusionxl\stablediffusionxl.trainer'
prompt = ""
negative_prompt = ""
@@ -148,14 +148,14 @@ class ImageGenerationSDXL(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_nova_server(request_form, self.options['nova_server'])
# Call the process route of n-server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to NOVA-server")
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
print("Wait for results of NOVA-Server...")
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
@@ -172,9 +172,9 @@ def build_example(name, identifier, admin_config, server_address, default_model=
dvm_config.LNBITS_INVOICE_KEY = "" # This one will not use Lnbits to create invoices, but rely on zaps
dvm_config.LNBITS_URL = ""
# A module might have options it can be initialized with, here we set a default model, and the nova-server
# A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
options = {'default_model': default_model, 'default_lora': default_lora, 'nova_server': server_address}
options = {'default_model': default_model, 'default_lora': default_lora, 'server': server_address}
nip90params = {
"negative_prompt": {
@@ -214,7 +214,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
dvm = build_example("Unstable Diffusion", "unstable_diffusion", admin_config, os.getenv("NOVA_SERVER"), "stabilityai/stable-diffusion-xl", "")
dvm = build_example("Unstable Diffusion", "unstable_diffusion", admin_config, os.getenv("N_SERVER"), "stabilityai/stable-diffusion-xl", "")
dvm.run()
keep_alive()

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
from backends.nova_server import check_nova_server_status, send_request_to_nova_server
from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -15,7 +15,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
This File contains a Module to transform Text input on NOVA-Server and receive results back.
This File contains a Module to transform Text input on N-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -60,7 +60,7 @@ class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl-img2img.trainer'
request_form["trainerFilePath"] = r'stablediffusionxl\stablediffusionxl-img2img.trainer'
prompt = ""
negative_prompt = ""
@@ -178,13 +178,13 @@ class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_nova_server(request_form, self.options['nova_server'])
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to NOVA-server")
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
print("Wait for results of NOVA-Server...")
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
@@ -224,8 +224,8 @@ def build_example(name, identifier, admin_config, server_address, default_lora="
"nip90Params": nip90params
}
# A module might have options it can be initialized with, here we set a default model, lora and the nova-server
options = {'default_lora': default_lora, 'strength': strength, 'nova_server': server_address}
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'default_lora': default_lora, 'strength': strength, 'server': server_address}
nip89config = NIP89Config()
@@ -249,7 +249,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
dvm = build_example("Image Converter Inkpunk", "image2image", admin_config, os.getenv("NOVA_SERVER"), "", 0.6)
dvm = build_example("Image Converter Inkpunk", "image2image", admin_config, os.getenv("N_SERVER"), "", 0.6)
dvm.run()
keep_alive()

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
from backends.nova_server import check_nova_server_status, send_request_to_nova_server
from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -48,7 +48,7 @@ class ImageInterrogator(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer'
request_form["trainerFilePath"] = r'\image_interrogator\image_interrogator.trainer'
url = ""
method = "prompt"
mode = "best"
@@ -93,13 +93,13 @@ class ImageInterrogator(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_nova_server(request_form, self.options['nova_server'])
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to NOVA-server")
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
print("Wait for results of NOVA-Server...")
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
@@ -134,8 +134,8 @@ def build_example(name, identifier, admin_config, server_address):
"nip90Params": nip90params
}
# A module might have options it can be initialized with, here we set a default model, lora and the nova-server
options = {'nova_server': server_address}
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
@@ -158,7 +158,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("NOVA_SERVER"))
dvm = build_example("Image Interrogator", "imageinterrogator", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import dotenv
from backends.nova_server import check_nova_server_status, send_request_to_nova_server
from backends.nserver.utils import check_server_status, send_request_to_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -91,13 +91,13 @@ class ImageUpscale(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_nova_server(request_form, self.options['nova_server'])
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to NOVA-server")
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
print("Wait for results of NOVA-Server...")
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
@@ -128,8 +128,8 @@ def build_example(name, identifier, admin_config, server_address):
"nip90Params": nip90params
}
# A module might have options it can be initialized with, here we set a default model, lora and the nova-server
options = {'nova_server': server_address}
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY,
@@ -152,7 +152,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
dvm = build_example("Image Upscaler", "imageupscale", admin_config, os.getenv("NOVA_SERVER"))
dvm = build_example("Image Upscaler", "imageupscale", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()

View File

@@ -136,7 +136,7 @@ def build_example(name, identifier, admin_config):
dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
options = {'api_key': None}
# A module might have options it can be initialized with, here we set a default model, and the nova-server
# A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
nip90params = {

View File

@@ -6,7 +6,7 @@ from pathlib import Path
import dotenv
from backends.nova_server import check_nova_server_status, send_request_to_nova_server, send_file_to_nova_server
from backends.nserver.utils import check_server_status, send_request_to_server, send_file_to_n_server
from interfaces.dvmtaskinterface import DVMTaskInterface
from utils.admin_utils import AdminConfig
from utils.backend_utils import keep_alive
@@ -17,7 +17,7 @@ from utils.definitions import EventDefinitions
from utils.nostr_utils import check_and_set_private_key
"""
This File contains a Module to transform A media file input on NOVA-Server and receive results back.
This File contains a Module to transform A media file input on n-server and receive results back.
Accepted Inputs: Url to media file (url)
Outputs: Transcribed text
@@ -53,7 +53,7 @@ class SpeechToTextWhisperX(DVMTaskInterface):
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""),
"trainerFilePath": 'modules\\whisperx\\whisperx_transcript.trainer'}
"trainerFilePath": r'whisperx\whisperx_transcript.trainer'}
if self.options.get("default_model"):
model = self.options['default_model']
@@ -107,7 +107,7 @@ class SpeechToTextWhisperX(DVMTaskInterface):
end_time = float(tag.as_vec()[3])
filepath = organize_input_media_data(url, input_type, start_time, end_time, dvm_config, client, True, media_format)
path_on_server = send_file_to_nova_server(os.path.realpath(filepath), self.options['nova_server'])
path_on_server = send_file_to_n_server(os.path.realpath(filepath), self.options['server'])
io_input = {
"id": "audio",
@@ -134,13 +134,13 @@ class SpeechToTextWhisperX(DVMTaskInterface):
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_nova_server(request_form, self.options['nova_server'])
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to NOVA-server")
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
print("Wait for results of NOVA-Server...")
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
@@ -156,9 +156,9 @@ def build_example(name, identifier, admin_config, server_address):
dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
# A module might have options it can be initialized with, here we set a default model, and the nova-server
# A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
options = {'default_model': "base", 'nova_server': server_address}
options = {'default_model': "base", 'server': server_address}
nip90params = {
"model": {
@@ -199,7 +199,7 @@ if __name__ == '__main__':
admin_config.REBROADCAST_NIP89 = False
admin_config.UPDATE_PROFILE = False
admin_config.LUD16 = ""
dvm = build_example("Whisperer", "whisperx", admin_config, os.getenv("NOVA_SERVER"))
dvm = build_example("Whisperer", "whisperx", admin_config, os.getenv("N_SERVER"))
dvm.run()
keep_alive()