cleanup backend examples

This commit is contained in:
Believethehype
2024-10-11 10:17:26 +02:00
parent c63292e503
commit 5485ba3638
48 changed files with 380 additions and 349 deletions

View File

@@ -1,18 +1,17 @@
"""StableDiffusionXL Module """StableDiffusionXL Module
""" """
import gc
import sys
import os import os
import sys
sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor from nova_utils.interfaces.server_module import Processor
# Setting defaults # Setting defaults
_default_options = {"kind": "prompt", "mode": "fast" } _default_options = {"kind": "prompt", "mode": "fast"}
# TODO: add log infos,
# TODO: add log infos,
class ImageInterrogator(Processor): class ImageInterrogator(Processor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -20,7 +19,6 @@ class ImageInterrogator(Processor):
self.device = None self.device = None
self.ds_iter = None self.ds_iter = None
self.current_session = None self.current_session = None
# IO shortcuts # IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"] self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -36,18 +34,17 @@ class ImageInterrogator(Processor):
self.device = "cuda" if torch.cuda.is_available() else "cpu" self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0] current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager'] self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
kind = self.options['kind'] #"prompt" #"analysis" #prompt kind = self.options['kind'] # "prompt" #"analysis" #prompt
mode = self.options['mode'] mode = self.options['mode']
#url = self.current_session.input_data['input_image_url'].data[0] # url = self.current_session.input_data['input_image_url'].data[0]
#print(url) # print(url)
input_image = self.current_session.input_data['input_image'].data input_image = self.current_session.input_data['input_image'].data
init_image = PILImage.fromarray(input_image) init_image = PILImage.fromarray(input_image)
mwidth = 256 mwidth = 256
mheight = 256 mheight = 256
w = mwidth w = mwidth
h = mheight h = mheight
if init_image.width > init_image.height: if init_image.width > init_image.height:
@@ -68,11 +65,9 @@ class ImageInterrogator(Processor):
config = Config(clip_model_name="ViT-L-14/openai", device="cuda") config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
if kind == "analysis": if kind == "analysis":
ci = Interrogator(config) ci = Interrogator(config)
image_features = ci.image_to_features(init_image) image_features = ci.image_to_features(init_image)
top_mediums = ci.mediums.rank(image_features, 5) top_mediums = ci.mediums.rank(image_features, 5)
@@ -81,15 +76,20 @@ class ImageInterrogator(Processor):
top_trendings = ci.trendings.rank(image_features, 5) top_trendings = ci.trendings.rank(image_features, 5)
top_flavors = ci.flavors.rank(image_features, 5) top_flavors = ci.flavors.rank(image_features, 5)
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))} medium_ranks = {medium: sim for medium, sim in
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))} zip(top_mediums, ci.similarities(image_features, top_mediums))}
artist_ranks = {artist: sim for artist, sim in
zip(top_artists, ci.similarities(image_features, top_artists))}
movement_ranks = {movement: sim for movement, sim in movement_ranks = {movement: sim for movement, sim in
zip(top_movements, ci.similarities(image_features, top_movements))} zip(top_movements, ci.similarities(image_features, top_movements))}
trending_ranks = {trending: sim for trending, sim in trending_ranks = {trending: sim for trending, sim in
zip(top_trendings, ci.similarities(image_features, top_trendings))} zip(top_trendings, ci.similarities(image_features, top_trendings))}
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))} flavor_ranks = {flavor: sim for flavor, sim in
zip(top_flavors, ci.similarities(image_features, top_flavors))}
result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks) result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(
artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(
trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
print(result) print(result)
return result return result
@@ -100,8 +100,8 @@ class ImageInterrogator(Processor):
ci.config.chunk_size = 2024 ci.config.chunk_size = 2024
ci.config.clip_offload = True ci.config.clip_offload = True
ci.config.apply_low_vram_defaults() ci.config.apply_low_vram_defaults()
#MODELS = ['ViT-L (best for Stable Diffusion 1.*)'] # MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024 ci.config.flavor_intermediate_count = 2024 # if clip_model_name == MODELS[0] else 1024
image = init_image image = init_image
if mode == 'best': if mode == 'best':
@@ -113,17 +113,15 @@ class ImageInterrogator(Processor):
elif mode == 'negative': elif mode == 'negative':
prompt = ci.interrogate_negative(image) prompt = ci.interrogate_negative(image)
#print(str(prompt)) # print(str(prompt))
return prompt return prompt
# config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git # config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
# ci = Interrogator(config) # ci = Interrogator(config)
# "ViT-L-14/openai")) # "ViT-L-14/openai"))
# "ViT-g-14/laion2B-s34B-b88K")) # "ViT-g-14/laion2B-s34B-b88K"))
def to_output(self, data: dict): def to_output(self, data: dict):
import numpy as np import numpy as np
self.current_session.output_data_templates['output'].data = np.array([data]) self.current_session.output_data_templates['output'].data = np.array([data])
return self.current_session.output_data_templates return self.current_session.output_data_templates

View File

@@ -0,0 +1,12 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image"
is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output" data="text" default_value=""/>
</meta>
<model create="ImageInterrogator" script="image_interrogator.py"
optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
</trainer>

View File

@@ -2,25 +2,23 @@
""" """
import os import os
import glob
import sys import sys
from nova_utils.interfaces.server_module import Processor
import cv2
import numpy as np
from PIL import Image as PILImage
from basicsr.archs.rrdbnet_arch import RRDBNet from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url from basicsr.utils.download_util import load_file_from_url
import numpy as np from nova_utils.interfaces.server_module import Processor
from realesrgan import RealESRGANer from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact from realesrgan.archs.srvgg_arch import SRVGGNetCompact
import cv2
from PIL import Image as PILImage
# Setting defaults # Setting defaults
_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False } _default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0, "tile_pad": 10,
"pre_pad": 0, "compute_type": "fp32", "face_enhance": False}
# TODO: add log infos,
# TODO: add log infos,
class RealESRGan(Processor): class RealESRGan(Processor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -28,8 +26,7 @@ class RealESRGan(Processor):
self.device = None self.device = None
self.ds_iter = None self.ds_iter = None
self.current_session = None self.current_session = None
self.model_path = None #Maybe need this later for manual path self.model_path = None # Maybe need this later for manual path
# IO shortcuts # IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"] self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -42,12 +39,11 @@ class RealESRGan(Processor):
current_session_name = self.ds_iter.session_names[0] current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager'] self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data input_image = self.current_session.input_data['input_image'].data
try: try:
model, netscale, file_url = self.manageModel(str(self.options['model'])) model, netscale, file_url = self.manageModel(str(self.options['model']))
if self.model_path is not None: if self.model_path is not None:
model_path = self.model_path model_path = self.model_path
else: else:
model_path = os.path.join('weights', self.options['model'] + '.pth') model_path = os.path.join('weights', self.options['model'] + '.pth')
@@ -58,7 +54,7 @@ class RealESRGan(Processor):
model_path = load_file_from_url( model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None) url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
# use dni to control the denoise strength # use dni to control the denoise strength
dni_weight = None dni_weight = None
if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1: if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3') wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
@@ -67,19 +63,18 @@ class RealESRGan(Processor):
half = True half = True
if self.options["compute_type"] == "fp32": if self.options["compute_type"] == "fp32":
half=False half = False
upsampler = RealESRGANer( upsampler = RealESRGANer(
scale=netscale, scale=netscale,
model_path=model_path, model_path=model_path,
dni_weight=dni_weight, dni_weight=dni_weight,
model=model, model=model,
tile= int(self.options['tile']), tile=int(self.options['tile']),
tile_pad=int(self.options['tile_pad']), tile_pad=int(self.options['tile_pad']),
pre_pad=int(self.options['pre_pad']), pre_pad=int(self.options['pre_pad']),
half=half, half=half,
gpu_id=None) #Can be set if multiple gpus are available gpu_id=None) # Can be set if multiple gpus are available
if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
from gfpgan import GFPGANer from gfpgan import GFPGANer
@@ -89,24 +84,24 @@ class RealESRGan(Processor):
arch='clean', arch='clean',
channel_multiplier=2, channel_multiplier=2,
bg_upsampler=upsampler) bg_upsampler=upsampler)
pilimage = PILImage.fromarray(input_image)
pilimage = PILImage.fromarray(input_image)
img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR) img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
try: try:
if bool(self.options['face_enhance']): if bool(self.options['face_enhance']):
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True) _, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False,
paste_back=True)
else: else:
output, _ = upsampler.enhance(img, outscale=int(self.options['outscale'])) output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
except RuntimeError as error: except RuntimeError as error:
print('Error', error) print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.') print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
return output return output
except Exception as e: except Exception as e:
@@ -114,12 +109,10 @@ class RealESRGan(Processor):
sys.stdout.flush() sys.stdout.flush()
return "Error" return "Error"
def to_output(self, data: dict): def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates return self.current_session.output_data_templates
def manageModel(self, model_name): def manageModel(self, model_name):
if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4) model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
@@ -132,7 +125,8 @@ class RealESRGan(Processor):
elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4 netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'] file_url = [
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2 netscale = 2
@@ -148,5 +142,5 @@ class RealESRGan(Processor):
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth', 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth' 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
] ]
return model, netscale, file_url return model, netscale, file_url

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
</meta>
<model create="RealESRGan" script="image_upscale_realesrgan.py"
optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
</trainer>

View File

@@ -1,10 +1,10 @@
import argparse import argparse
import cv2
import glob import glob
import os import os
import cv2
from basicsr.archs.rrdbnet_arch import RRDBNet from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url from basicsr.utils.download_util import load_file_from_url
from realesrgan import RealESRGANer from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact from realesrgan.archs.srvgg_arch import SRVGGNetCompact

View File

@@ -3,98 +3,96 @@ def build_lora_xl(lora, prompt, lora_weight):
if lora == "3drenderstyle": if lora == "3drenderstyle":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:"+lora_weight+">" prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "psychedelicnoir": if lora == "psychedelicnoir":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = prompt + " <lora:Psychedelic_Noir__sdxl:"+lora_weight+">>" prompt = prompt + " <lora:Psychedelic_Noir__sdxl:" + lora_weight + ">>"
existing_lora = True existing_lora = True
if lora == "wojak": if lora == "wojak":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "<lora:wojak_big:"+lora_weight+">, " + prompt + ", wojak" prompt = "<lora:wojak_big:" + lora_weight + ">, " + prompt + ", wojak"
existing_lora = True existing_lora = True
if lora == "dreamarts": if lora == "dreamarts":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "<lora:DreamARTSDXL:"+lora_weight+">, " + prompt prompt = "<lora:DreamARTSDXL:" + lora_weight + ">, " + prompt
existing_lora = True existing_lora = True
if lora == "voxel": if lora == "voxel":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "voxel style, " + prompt + " <lora:last:"+lora_weight+">" prompt = "voxel style, " + prompt + " <lora:last:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "kru3ger": if lora == "kru3ger":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:"+lora_weight+">" prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "inkpunk": if lora == "inkpunk":
if lora_weight == "": if lora_weight == "":
lora_weight = "0.5" lora_weight = "0.5"
prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:"+lora_weight+">" prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "inkscenery": if lora == "inkscenery":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:"+lora_weight+">" prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "inkpainting": if lora == "inkpainting":
if lora_weight == "": if lora_weight == "":
lora_weight = "0.7" lora_weight = "0.7"
prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::"+lora_weight+">," prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::" + lora_weight + ">,"
existing_lora = True existing_lora = True
if lora == "timburton": if lora == "timburton":
if lora_weight == "": if lora_weight == "":
lora_weight = "1.27" lora_weight = "1.27"
pencil_weight = "1.15" pencil_weight = "1.15"
prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")" prompt = prompt + " (hand drawn with pencil" + pencil_weight + "), (tim burton style:" + lora_weight + ")"
existing_lora = True existing_lora = True
if lora == "pixelart": if lora == "pixelart":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:"+lora_weight+"> " prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:" + lora_weight + "> "
existing_lora = True existing_lora = True
if lora == "pepe": if lora == "pepe":
if lora_weight == "": if lora_weight == "":
lora_weight = "0.8" lora_weight = "0.8"
prompt = prompt + " ,<lora:DD-pepe-v2:"+lora_weight+"> pepe" prompt = prompt + " ,<lora:DD-pepe-v2:" + lora_weight + "> pepe"
existing_lora = True existing_lora = True
if lora == "bettertext": if lora == "bettertext":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = prompt + " ,<lora:BetterTextRedmond:"+lora_weight+">" prompt = prompt + " ,<lora:BetterTextRedmond:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "mspaint": if lora == "mspaint":
if lora_weight == "": if lora_weight == "":
lora_weight = "1" lora_weight = "1"
prompt = "MSPaint drawing " + prompt +">" prompt = "MSPaint drawing " + prompt + ">"
existing_lora = True existing_lora = True
if lora == "woodfigure": if lora == "woodfigure":
if lora_weight == "": if lora_weight == "":
lora_weight = "0.7" lora_weight = "0.7"
prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:"+lora_weight+">" prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:" + lora_weight + ">"
existing_lora = True existing_lora = True
if lora == "fireelement": if lora == "fireelement":
prompt = prompt + ",composed of fire elements, fire element" prompt = prompt + ",composed of fire elements, fire element"
existing_lora = True existing_lora = True
return lora, prompt, existing_lora
return lora, prompt, existing_lora

View File

@@ -14,7 +14,7 @@ This modules provides image generation based on prompts
- `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16` - `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
- `high_noise_frac`: Denoising factor - `high_noise_frac`: Denoising factor
- `n_steps`: how many iterations should be performed - `n_steps`: how many iterations should be performed
## Example payload ## Example payload

View File

@@ -2,26 +2,26 @@
""" """
import gc import gc
import sys
import os import os
import sys
# Add local dir to path for relative imports # Add local dir to path for relative imports
sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor from nova_utils.interfaces.server_module import Processor
from nova_utils.utils.cache_utils import get_file from nova_utils.utils.cache_utils import get_file
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, \
from diffusers.utils import load_image EulerAncestralDiscreteScheduler
import numpy as np import numpy as np
from PIL import Image as PILImage from PIL import Image as PILImage
from lora import build_lora_xl from lora import build_lora_xl
# Setting defaults # Setting defaults
_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" } _default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength": "0.58",
"guidance_scale": "11.0", "n_steps": "30", "lora": "", "lora_weight": "0.5"}
# TODO: add log infos,
# TODO: add log infos,
class StableDiffusionXL(Processor): class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -29,7 +29,6 @@ class StableDiffusionXL(Processor):
self.device = None self.device = None
self.ds_iter = None self.ds_iter = None
self.current_session = None self.current_session = None
# IO shortcuts # IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"] self.input = [x for x in self.model_io if x.io_type == "input"]
@@ -42,15 +41,15 @@ class StableDiffusionXL(Processor):
self.device = "cuda" if torch.cuda.is_available() else "cpu" self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0] current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager'] self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#input_image_url = self.current_session.input_data['input_image_url'].data # input_image_url = self.current_session.input_data['input_image_url'].data
#input_image_url = ' '.join(input_image_url) # input_image_url = ' '.join(input_image_url)
input_image = self.current_session.input_data['input_image'].data input_image = self.current_session.input_data['input_image'].data
input_prompt = self.current_session.input_data['input_prompt'].data input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt) input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt) negative_prompt = ' '.join(negative_prompt)
# print("Input Image: " + input_image_url) # print("Input Image: " + input_image_url)
print("Input prompt: " + input_prompt) print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt) print("Negative prompt: " + negative_prompt)
@@ -58,8 +57,8 @@ class StableDiffusionXL(Processor):
model = self.options['model'] model = self.options['model']
lora = self.options['lora'] lora = self.options['lora']
#init_image = load_image(input_image_url).convert("RGB") # init_image = load_image(input_image_url).convert("RGB")
init_image = PILImage.fromarray(input_image) init_image = PILImage.fromarray(input_image)
mwidth = 1024 mwidth = 1024
mheight = 1024 mheight = 1024
@@ -82,44 +81,42 @@ class StableDiffusionXL(Processor):
if lora != "" and lora != "None": if lora != "" and lora != "None":
print("Loading lora...") print("Loading lora...")
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" ) lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
from diffusers import AutoPipelineForImage2Image from diffusers import AutoPipelineForImage2Image
import torch import torch
# init_image = init_image.resize((int(w/2), int(h/2)))
#init_image = init_image.resize((int(w/2), int(h/2)))
pipe = AutoPipelineForImage2Image.from_pretrained( pipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", "stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16).to("cuda") torch_dtype=torch.float16).to("cuda")
if existing_lora: if existing_lora:
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0] lora_uri = [x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "": if str(lora_uri) == "":
return "Lora not found" return "Lora not found"
lora_path = get_file( lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors", fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url, origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash, file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"), cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"), tmp_dir=os.getenv("TMP_DIR"),
) )
pipe.load_lora_weights(str(lora_path)) pipe.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path)) print("Loaded Lora: " + str(lora_path))
seed = 20000 seed = 20000
generator = torch.manual_seed(seed) generator = torch.manual_seed(seed)
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
image = pipe( image = pipe(
prompt=input_prompt, prompt=input_prompt,
negative_prompt=negative_prompt, negative_prompt=negative_prompt,
image=init_image, image=init_image,
generator=generator, generator=generator,
num_inference_steps=int(self.options['n_steps']), num_inference_steps=int(self.options['n_steps']),
image_guidance_scale=float(self.options['guidance_scale']), image_guidance_scale=float(self.options['guidance_scale']),
strength=float(str(self.options['strength']))).images[0] strength=float(str(self.options['strength']))).images[0]
@@ -137,19 +134,21 @@ class StableDiffusionXL(Processor):
pipe = pipe.to(self.device) pipe = pipe.to(self.device)
image = pipe(input_prompt, image=init_image, image = pipe(input_prompt, image=init_image,
negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0] negative_prompt=negative_prompt, num_inference_steps=n_steps,
strength=transformation_strength, guidance_scale=cfg_scale).images[0]
elif model == "timbrooks/instruct-pix2pix": elif model == "timbrooks/instruct-pix2pix":
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16, pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
safety_checker=None) safety_checker=None)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(self.device) pipe.to(self.device)
n_steps = int(self.options['n_steps']) n_steps = int(self.options['n_steps'])
cfg_scale = float(self.options['guidance_scale']) cfg_scale = float(self.options['guidance_scale'])
image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0] image = \
pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps,
image_guidance_scale=cfg_scale).images[0]
if torch.cuda.is_available(): if torch.cuda.is_available():
del pipe del pipe
@@ -157,7 +156,6 @@ class StableDiffusionXL(Processor):
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.ipc_collect() torch.cuda.ipc_collect()
numpy_array = np.array(image) numpy_array = np.array(image)
return numpy_array return numpy_array
@@ -167,10 +165,6 @@ class StableDiffusionXL(Processor):
sys.stdout.flush() sys.stdout.flush()
return "Error" return "Error"
def to_output(self, data: dict): def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates return self.current_session.output_data_templates

View File

@@ -0,0 +1,42 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration"
description="Generates Image from existing image based on a prompt" is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609"
hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552"
hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206"
hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194"
hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124"
hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160"
hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129"
hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862"
hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931"
hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917"
hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224"
hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793"
hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919"
hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257"
hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py"
optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
</trainer>

View File

@@ -0,0 +1,68 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt"
is_iterable="False">
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530"
hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
<uri id="dynavision" url="https://civitai.com/api/download/models/198962"
hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
<uri id="colossus" url="https://civitai.com/api/download/models/213982"
hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
<uri id="newreality" url="https://civitai.com/api/download/models/232565"
hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
<uri id="unstable" url="https://civitai.com/api/download/models/209647"
hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
<uri id="fantastic" url="https://civitai.com/api/download/models/143722"
hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
<uri id="mohawk" url="https://civitai.com/api/download/models/207419"
hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688"
hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
<uri id="timeless" url="https://civitai.com/api/download/models/198246"
hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832"
hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
<uri id="chroma" url="https://civitai.com/api/download/models/169740"
hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090"
hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609"
hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552"
hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206"
hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194"
hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124"
hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160"
hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129"
hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862"
hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931"
hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917"
hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224"
hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793"
hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919"
hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257"
hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl.py"
optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
</trainer>

View File

@@ -1,26 +1,20 @@
import gc import gc
import sys
import os import os
import sys
sys.path.insert(0, os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
from ssl import Options
from nova_utils.interfaces.server_module import Processor from nova_utils.interfaces.server_module import Processor
import torch import torch
from diffusers import StableVideoDiffusionPipeline from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from nova_utils.utils.cache_utils import get_file
import numpy as np import numpy as np
from PIL import Image as PILImage from PIL import Image as PILImage
# Setting defaults # Setting defaults
_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps":"7", "seed":""} _default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps": "7", "seed": ""}
# TODO: add log infos,
# TODO: add log infos,
class StableVideoDiffusion(Processor): class StableVideoDiffusion(Processor):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
@@ -28,24 +22,21 @@ class StableVideoDiffusion(Processor):
self.device = None self.device = None
self.ds_iter = None self.ds_iter = None
self.current_session = None self.current_session = None
# IO shortcuts # IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"] self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"] self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0] self.input = self.input[0]
self.output = self.output[0] self.output = self.output[0]
def process_data(self, ds_iter) -> dict: def process_data(self, ds_iter) -> dict:
self.device = "cuda" if torch.cuda.is_available() else "cpu" self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0] current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager'] self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data input_image = self.current_session.input_data['input_image'].data
try: try:
pipe = StableVideoDiffusionPipeline.from_pretrained( pipe = StableVideoDiffusionPipeline.from_pretrained(
self.options["model"], torch_dtype=torch.float16, variant="fp16" self.options["model"], torch_dtype=torch.float16, variant="fp16"
@@ -53,7 +44,7 @@ class StableVideoDiffusion(Processor):
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
# Load the conditioning image # Load the conditioning image
image = PILImage.fromarray(input_image) image = PILImage.fromarray(input_image)
image = image.resize((1024, 576)) image = image.resize((1024, 576))
if self.options["seed"] != "" and self.options["seed"] != " ": if self.options["seed"] != "" and self.options["seed"] != " ":
@@ -68,7 +59,6 @@ class StableVideoDiffusion(Processor):
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.ipc_collect() torch.cuda.ipc_collect()
np_video = np.stack([np.asarray(x) for x in frames]) np_video = np.stack([np.asarray(x) for x in frames])
return np_video return np_video
@@ -77,7 +67,7 @@ class StableVideoDiffusion(Processor):
print(e) print(e)
sys.stdout.flush() sys.stdout.flush()
return "Error" return "Error"
def calculate_aspect(self, width: int, height: int): def calculate_aspect(self, width: int, height: int):
def gcd(a, b): def gcd(a, b):
"""The GCD (greatest common divisor) is the highest number that evenly divides both width and height.""" """The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
@@ -89,12 +79,10 @@ class StableVideoDiffusion(Processor):
return x, y return x, y
def to_output(self, data: list): def to_output(self, data: list):
video = self.current_session.output_data_templates['output_video'] video = self.current_session.output_data_templates['output_video']
video.data = data video.data = data
video.meta_data.sample_rate = int(self.options['fps']) video.meta_data.sample_rate = int(self.options['fps'])
video.meta_data.media_type = 'video' video.meta_data.media_type = 'video'
return self.current_session.output_data_templates return self.current_session.output_data_templates

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="VideoGeneration" description="Generates Video from Image/prompt"
is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="output" id="output_video" data="stream:Video" default_value="sd_generated.mp4"/>
</meta>
<model create="StableVideoDiffusion" script="stablevideodiffusion.py"
optstr="{model:LIST:stabilityai/stable-video-diffusion-img2vid-xt,stabilityai/stable-video-diffusion-img2vid};{fps:STRING:7};{seed:STRING: }"/>
</trainer>

View File

@@ -21,9 +21,10 @@ speaker diarization.
- `word` Improved segmentation using separate alignment model. Equivalent to word alignment. - `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
- `language`: language code for transcription and alignment models. Supported languages: - `language`: language code for transcription and alignment models. Supported languages:
- `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh` - `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`,
`te`, `tr`, `uk`, `ur`, `vi`, `zh`
- `None`: auto-detect language from first 30 seconds of audio - `None`: auto-detect language from first 30 seconds of audio
- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption - `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
## Examples ## Examples

View File

@@ -1,10 +1,13 @@
"""WhisperX Module """WhisperX Module
""" """
from nova_utils.interfaces.server_module import Processor
import sys import sys
from nova_utils.interfaces.server_module import Processor
# Setting defaults # Setting defaults
_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'} _default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None,
'compute_type': 'float16'}
# supported language codes, cf. whisperx/alignment.py # supported language codes, cf. whisperx/alignment.py
# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None} # DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
@@ -45,11 +48,14 @@ class WhisperX(Processor):
sys.stdout.flush() sys.stdout.flush()
model = whisperx.load_model(self.options["model"], self.device, compute_type='float32', model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
language=self.options['language']) language=self.options['language'])
result = model.transcribe(audio, batch_size=int(self.options["batch_size"])) result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
# delete model if low on GPU resources # delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model import gc;
gc.collect();
torch.cuda.empty_cache();
del model
if not self.options["alignment_mode"] == "raw": if not self.options["alignment_mode"] == "raw":
# load alignment model and metadata # load alignment model and metadata
@@ -64,7 +70,10 @@ class WhisperX(Processor):
result = result_aligned result = result_aligned
# delete model if low on GPU resources # delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model_a import gc;
gc.collect();
torch.cuda.empty_cache();
del model_a
return result return result
@@ -83,26 +92,26 @@ class WhisperX(Processor):
if "end" in w.keys(): if "end" in w.keys():
last_end = w["end"] last_end = w["end"]
else: else:
#TODO: rethink lower bound for confidence; place word centred instead of left aligned # TODO: rethink lower bound for confidence; place word centred instead of left aligned
w["start"] = last_end w["start"] = last_end
last_end += 0.065 last_end += 0.065
w["end"] = last_end w["end"] = last_end
#w["score"] = 0.000 # w["score"] = 0.000
w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4]) w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
def _hmean(scores): def _hmean(scores):
if len(scores) > 0: if len(scores) > 0:
prod = scores[0] prod = scores[0]
for s in scores[1:]: for s in scores[1:]:
prod *= s prod *= s
prod = prod**(1/len(scores)) prod = prod ** (1 / len(scores))
else: else:
prod = 0 prod = 0
return prod return prod
if ( if (
self.options["alignment_mode"] == "word" self.options["alignment_mode"] == "word"
or self.options["alignment_mode"] == "segment" or self.options["alignment_mode"] == "segment"
): ):
_fix_missing_timestamps(data) _fix_missing_timestamps(data)
@@ -113,12 +122,13 @@ class WhisperX(Processor):
] ]
else: else:
anno_data = [ anno_data = [
#(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"] # (w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
(w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?) (w["start"], w["end"], w["text"], 1) for w in data["segments"]
# alignment 'raw' no longer contains a score(?)
] ]
# convert to milliseconds # convert to milliseconds
anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data] anno_data = [(x[0] * 1000, x[1] * 1000, x[2], x[3]) for x in anno_data]
out = self.session_manager.output_data_templates[self.output.io_id] out = self.session_manager.output_data_templates[self.output.io_id]
out.data = anno_data out.data = anno_data
return self.session_manager.output_data_templates return self.session_manager.output_data_templates

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
</meta>
<model create="WhisperX" script="whisperx_transcript.py"
optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
</trainer>

View File

@@ -4,9 +4,10 @@ import json
import os import os
import time import time
import zipfile import zipfile
import PIL.Image as Image
import pandas as pd import pandas as pd
import requests import requests
import PIL.Image as Image
from moviepy.video.io.VideoFileClip import VideoFileClip from moviepy.video.io.VideoFileClip import VideoFileClip
from nostr_dvm.utils.output_utils import upload_media_to_hoster from nostr_dvm.utils.output_utils import upload_media_to_hoster
@@ -24,6 +25,7 @@ in the module that is calling the server
""" """
def send_request_to_server(request_form, address): def send_request_to_server(request_form, address):
print("Sending job to Server") print("Sending job to Server")
url = ('http://' + address + '/process') url = ('http://' + address + '/process')
@@ -46,6 +48,7 @@ def send_file_to_server(filepath, address):
return result return result
""" """
check_n_server_status(request_form, address) check_n_server_status(request_form, address)
Function that requests the status of the current process with the jobID (we use the Nostr event as jobID). Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
@@ -76,7 +79,6 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
# WAITING = 0, RUNNING = 1, FINISHED = 2, ERROR = 3 # WAITING = 0, RUNNING = 1, FINISHED = 2, ERROR = 3
time.sleep(1.0) time.sleep(1.0)
if status == 2: if status == 2:
try: try:
url_fetch = 'http://' + address + '/fetch_result' url_fetch = 'http://' + address + '/fetch_result'
@@ -93,7 +95,7 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
return result return result
elif content_type == 'video/mp4': elif content_type == 'video/mp4':
with open('./outputs/video.mp4', 'wb') as f: with open('./outputs/video.mp4', 'wb') as f:
f.write(response.content) f.write(response.content)
f.close() f.close()
clip = VideoFileClip("./outputs/video.mp4") clip = VideoFileClip("./outputs/video.mp4")
clip.write_videofile("./outputs/video2.mp4") clip.write_videofile("./outputs/video2.mp4")
@@ -121,4 +123,4 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
print("Couldn't fetch result: " + str(e)) print("Couldn't fetch result: " + str(e))
elif status == 3: elif status == 3:
return "error" return "error"

View File

@@ -42,14 +42,14 @@ class StableDiffusion:
self.tokenizer = load_tokenizer(model) self.tokenizer = load_tokenizer(model)
def generate_latents( def generate_latents(
self, self,
text: str, text: str,
n_images: int = 1, n_images: int = 1,
num_steps: int = 50, num_steps: int = 50,
cfg_weight: float = 7.5, cfg_weight: float = 7.5,
negative_text: str = "", negative_text: str = "",
latent_size: Tuple[int] = (64, 64), latent_size: Tuple[int] = (64, 64),
seed=None, seed=None,
): ):
# Set the PRNG state # Set the PRNG state
seed = seed or int(time.time()) seed = seed or int(time.time())
@@ -94,4 +94,4 @@ class StableDiffusion:
def decode(self, x_t): def decode(self, x_t):
x = self.autoencoder.decode(x_t / self.autoencoder.scaling_factor) x = self.autoencoder.decode(x_t / self.autoencoder.scaling_factor)
x = mx.minimum(1, mx.maximum(0, x / 2 + 0.5)) x = mx.minimum(1, mx.maximum(0, x / 2 + 0.5))
return x return x

View File

@@ -1,7 +1,7 @@
# Copyright © 2023 Apple Inc. # Copyright © 2023 Apple Inc.
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple from typing import Tuple
@dataclass @dataclass

View File

@@ -1,14 +1,12 @@
# Copyright © 2023 Apple Inc. # Copyright © 2023 Apple Inc.
import json import json
from functools import partial
import numpy as np
from huggingface_hub import hf_hub_download
from safetensors import safe_open as safetensor_open
import mlx.core as mx import mlx.core as mx
import numpy as np
from huggingface_hub import hf_hub_download
from mlx.utils import tree_unflatten from mlx.utils import tree_unflatten
from safetensors import safe_open as safetensor_open
from .clip import CLIPTextModel from .clip import CLIPTextModel
from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
@@ -16,7 +14,6 @@ from .tokenizer import Tokenizer
from .unet import UNetModel from .unet import UNetModel
from .vae import Autoencoder from .vae import Autoencoder
_DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base" _DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base"
_MODELS = { _MODELS = {
# See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license # See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license
@@ -285,7 +282,7 @@ def load_tokenizer(key: str = _DEFAULT_MODEL):
merges_file = hf_hub_download(key, _MODELS[key]["tokenizer_merges"]) merges_file = hf_hub_download(key, _MODELS[key]["tokenizer_merges"])
with open(merges_file, encoding="utf-8") as f: with open(merges_file, encoding="utf-8") as f:
bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1] bpe_merges = f.read().strip().split("\n")[1: 49152 - 256 - 2 + 1]
bpe_merges = [tuple(m.split()) for m in bpe_merges] bpe_merges = [tuple(m.split()) for m in bpe_merges]
bpe_ranks = dict(map(reversed, enumerate(bpe_merges))) bpe_ranks = dict(map(reversed, enumerate(bpe_merges)))

View File

@@ -1,9 +1,9 @@
# Copyright © 2023 Apple Inc. # Copyright © 2023 Apple Inc.
from .config import DiffusionConfig
import mlx.core as mx import mlx.core as mx
from .config import DiffusionConfig
def _linspace(a, b, num): def _linspace(a, b, num):
x = mx.arange(0, num) / (num - 1) x = mx.arange(0, num) / (num - 1)
@@ -37,7 +37,7 @@ class SimpleEulerSampler:
) )
elif config.beta_schedule == "scaled_linear": elif config.beta_schedule == "scaled_linear":
betas = _linspace( betas = _linspace(
config.beta_start**0.5, config.beta_end**0.5, config.num_train_steps config.beta_start ** 0.5, config.beta_end ** 0.5, config.num_train_steps
).square() ).square()
else: else:
raise NotImplementedError(f"{config.beta_schedule} is not implemented.") raise NotImplementedError(f"{config.beta_schedule} is not implemented.")
@@ -52,7 +52,7 @@ class SimpleEulerSampler:
def sample_prior(self, shape, dtype=mx.float32, key=None): def sample_prior(self, shape, dtype=mx.float32, key=None):
noise = mx.random.normal(shape, key=key) noise = mx.random.normal(shape, key=key)
return ( return (
noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt() noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt()
).astype(dtype) ).astype(dtype)
def sigmas(self, t): def sigmas(self, t):

View File

@@ -34,11 +34,11 @@ class TimestepEmbedding(nn.Module):
class TransformerBlock(nn.Module): class TransformerBlock(nn.Module):
def __init__( def __init__(
self, self,
model_dims: int, model_dims: int,
num_heads: int, num_heads: int,
hidden_dims: Optional[int] = None, hidden_dims: Optional[int] = None,
memory_dims: Optional[int] = None, memory_dims: Optional[int] = None,
): ):
super().__init__() super().__init__()
@@ -85,13 +85,13 @@ class Transformer2D(nn.Module):
"""A transformer model for inputs with 2 spatial dimensions.""" """A transformer model for inputs with 2 spatial dimensions."""
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
model_dims: int, model_dims: int,
encoder_dims: int, encoder_dims: int,
num_heads: int, num_heads: int,
num_layers: int = 1, num_layers: int = 1,
norm_num_groups: int = 32, norm_num_groups: int = 32,
): ):
super().__init__() super().__init__()
@@ -125,11 +125,11 @@ class Transformer2D(nn.Module):
class ResnetBlock2D(nn.Module): class ResnetBlock2D(nn.Module):
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
out_channels: Optional[int] = None, out_channels: Optional[int] = None,
groups: int = 32, groups: int = 32,
temb_channels: Optional[int] = None, temb_channels: Optional[int] = None,
): ):
super().__init__() super().__init__()
@@ -169,19 +169,19 @@ class ResnetBlock2D(nn.Module):
class UNetBlock2D(nn.Module): class UNetBlock2D(nn.Module):
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
out_channels: int, out_channels: int,
temb_channels: int, temb_channels: int,
prev_out_channels: Optional[int] = None, prev_out_channels: Optional[int] = None,
num_layers: int = 1, num_layers: int = 1,
transformer_layers_per_block: int = 1, transformer_layers_per_block: int = 1,
num_attention_heads: int = 8, num_attention_heads: int = 8,
cross_attention_dim=1280, cross_attention_dim=1280,
resnet_groups: int = 32, resnet_groups: int = 32,
add_downsample=True, add_downsample=True,
add_upsample=True, add_upsample=True,
add_cross_attention=True, add_cross_attention=True,
): ):
super().__init__() super().__init__()
@@ -232,13 +232,13 @@ class UNetBlock2D(nn.Module):
) )
def __call__( def __call__(
self, self,
x, x,
encoder_x=None, encoder_x=None,
temb=None, temb=None,
attn_mask=None, attn_mask=None,
encoder_attn_mask=None, encoder_attn_mask=None,
residual_hidden_states=None, residual_hidden_states=None,
): ):
output_states = [] output_states = []
@@ -340,9 +340,9 @@ class UNetModel(nn.Module):
# Make the upsampling blocks # Make the upsampling blocks
block_channels = ( block_channels = (
[config.block_out_channels[0]] [config.block_out_channels[0]]
+ list(config.block_out_channels) + list(config.block_out_channels)
+ [config.block_out_channels[-1]] + [config.block_out_channels[-1]]
) )
self.up_blocks = [ self.up_blocks = [
UNetBlock2D( UNetBlock2D(

View File

@@ -44,13 +44,13 @@ class Attention(nn.Module):
class EncoderDecoderBlock2D(nn.Module): class EncoderDecoderBlock2D(nn.Module):
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
out_channels: int, out_channels: int,
num_layers: int = 1, num_layers: int = 1,
resnet_groups: int = 32, resnet_groups: int = 32,
add_downsample=True, add_downsample=True,
add_upsample=True, add_upsample=True,
): ):
super().__init__() super().__init__()
@@ -93,12 +93,12 @@ class Encoder(nn.Module):
"""Implements the encoder side of the Autoencoder.""" """Implements the encoder side of the Autoencoder."""
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
out_channels: int, out_channels: int,
block_out_channels: List[int] = [64], block_out_channels: List[int] = [64],
layers_per_block: int = 2, layers_per_block: int = 2,
resnet_groups: int = 32, resnet_groups: int = 32,
): ):
super().__init__() super().__init__()
@@ -159,12 +159,12 @@ class Decoder(nn.Module):
"""Implements the decoder side of the Autoencoder.""" """Implements the decoder side of the Autoencoder."""
def __init__( def __init__(
self, self,
in_channels: int, in_channels: int,
out_channels: int, out_channels: int,
block_out_channels: List[int] = [64], block_out_channels: List[int] = [64],
layers_per_block: int = 2, layers_per_block: int = 2,
resnet_groups: int = 32, resnet_groups: int = 32,
): ):
super().__init__() super().__init__()

View File

@@ -1,10 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output" data="text" default_value=""/>
</meta>
<model create="ImageInterrogator" script="image_interrogator.py" optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
</trainer>

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
</meta>
<model create="RealESRGan" script="image_upscale_realesrgan.py" optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
</trainer>

View File

@@ -1,26 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from existing image based on a prompt" is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
</trainer>

View File

@@ -1,41 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt" is_iterable="False">
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530" hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
<uri id="dynavision" url="https://civitai.com/api/download/models/198962" hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
<uri id="colossus" url="https://civitai.com/api/download/models/213982" hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
<uri id="newreality" url="https://civitai.com/api/download/models/232565" hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
<uri id="unstable" url="https://civitai.com/api/download/models/209647" hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
<uri id="fantastic" url="https://civitai.com/api/download/models/143722" hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
<uri id="mohawk" url="https://civitai.com/api/download/models/207419" hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688" hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
<uri id="timeless" url="https://civitai.com/api/download/models/198246" hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832" hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
<uri id="chroma" url="https://civitai.com/api/download/models/169740" hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090" hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
</trainer>

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="VideoGeneration" description="Generates Video from Image/prompt" is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="output" id="output_video" data="stream:Video" default_value="sd_generated.mp4"/>
</meta>
<model create="StableVideoDiffusion" script="stablevideodiffusion.py" optstr="{model:LIST:stabilityai/stable-video-diffusion-img2vid-xt,stabilityai/stable-video-diffusion-img2vid};{fps:STRING:7};{seed:STRING: }"/>
</trainer>

View File

@@ -1,9 +0,0 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
</meta>
<model create="WhisperX" script="whisperx_transcript.py" optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
</trainer>