mirror of
https://github.com/believethehype/nostrdvm.git
synced 2025-09-26 18:27:02 +02:00
cleanup backend examples
This commit is contained in:
@@ -1,18 +1,17 @@
|
||||
"""StableDiffusionXL Module
|
||||
"""
|
||||
import gc
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
|
||||
# Setting defaults
|
||||
_default_options = {"kind": "prompt", "mode": "fast" }
|
||||
_default_options = {"kind": "prompt", "mode": "fast"}
|
||||
|
||||
# TODO: add log infos,
|
||||
|
||||
# TODO: add log infos,
|
||||
class ImageInterrogator(Processor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -20,7 +19,6 @@ class ImageInterrogator(Processor):
|
||||
self.device = None
|
||||
self.ds_iter = None
|
||||
self.current_session = None
|
||||
|
||||
|
||||
# IO shortcuts
|
||||
self.input = [x for x in self.model_io if x.io_type == "input"]
|
||||
@@ -36,18 +34,17 @@ class ImageInterrogator(Processor):
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.ds_iter = ds_iter
|
||||
current_session_name = self.ds_iter.session_names[0]
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
|
||||
kind = self.options['kind'] #"prompt" #"analysis" #prompt
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
|
||||
kind = self.options['kind'] # "prompt" #"analysis" #prompt
|
||||
mode = self.options['mode']
|
||||
#url = self.current_session.input_data['input_image_url'].data[0]
|
||||
#print(url)
|
||||
# url = self.current_session.input_data['input_image_url'].data[0]
|
||||
# print(url)
|
||||
input_image = self.current_session.input_data['input_image'].data
|
||||
init_image = PILImage.fromarray(input_image)
|
||||
init_image = PILImage.fromarray(input_image)
|
||||
mwidth = 256
|
||||
mheight = 256
|
||||
|
||||
|
||||
w = mwidth
|
||||
h = mheight
|
||||
if init_image.width > init_image.height:
|
||||
@@ -68,11 +65,9 @@ class ImageInterrogator(Processor):
|
||||
|
||||
config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
|
||||
|
||||
|
||||
if kind == "analysis":
|
||||
ci = Interrogator(config)
|
||||
|
||||
|
||||
image_features = ci.image_to_features(init_image)
|
||||
|
||||
top_mediums = ci.mediums.rank(image_features, 5)
|
||||
@@ -81,15 +76,20 @@ class ImageInterrogator(Processor):
|
||||
top_trendings = ci.trendings.rank(image_features, 5)
|
||||
top_flavors = ci.flavors.rank(image_features, 5)
|
||||
|
||||
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
|
||||
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
|
||||
medium_ranks = {medium: sim for medium, sim in
|
||||
zip(top_mediums, ci.similarities(image_features, top_mediums))}
|
||||
artist_ranks = {artist: sim for artist, sim in
|
||||
zip(top_artists, ci.similarities(image_features, top_artists))}
|
||||
movement_ranks = {movement: sim for movement, sim in
|
||||
zip(top_movements, ci.similarities(image_features, top_movements))}
|
||||
zip(top_movements, ci.similarities(image_features, top_movements))}
|
||||
trending_ranks = {trending: sim for trending, sim in
|
||||
zip(top_trendings, ci.similarities(image_features, top_trendings))}
|
||||
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
|
||||
zip(top_trendings, ci.similarities(image_features, top_trendings))}
|
||||
flavor_ranks = {flavor: sim for flavor, sim in
|
||||
zip(top_flavors, ci.similarities(image_features, top_flavors))}
|
||||
|
||||
result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
|
||||
result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(
|
||||
artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(
|
||||
trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
|
||||
|
||||
print(result)
|
||||
return result
|
||||
@@ -100,8 +100,8 @@ class ImageInterrogator(Processor):
|
||||
ci.config.chunk_size = 2024
|
||||
ci.config.clip_offload = True
|
||||
ci.config.apply_low_vram_defaults()
|
||||
#MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
|
||||
ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
|
||||
# MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
|
||||
ci.config.flavor_intermediate_count = 2024 # if clip_model_name == MODELS[0] else 1024
|
||||
|
||||
image = init_image
|
||||
if mode == 'best':
|
||||
@@ -113,17 +113,15 @@ class ImageInterrogator(Processor):
|
||||
elif mode == 'negative':
|
||||
prompt = ci.interrogate_negative(image)
|
||||
|
||||
#print(str(prompt))
|
||||
# print(str(prompt))
|
||||
return prompt
|
||||
|
||||
|
||||
# config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
|
||||
# ci = Interrogator(config)
|
||||
# "ViT-L-14/openai"))
|
||||
# "ViT-g-14/laion2B-s34B-b88K"))
|
||||
# "ViT-L-14/openai"))
|
||||
# "ViT-g-14/laion2B-s34B-b88K"))
|
||||
|
||||
|
||||
def to_output(self, data: dict):
|
||||
import numpy as np
|
||||
self.current_session.output_data_templates['output'].data = np.array([data])
|
||||
return self.current_session.output_data_templates
|
||||
return self.current_session.output_data_templates
|
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image"
|
||||
is_iterable="False">
|
||||
<io type="input" id="input_image" data="image" default_value=""/>
|
||||
<io type="output" id="output" data="text" default_value=""/>
|
||||
|
||||
</meta>
|
||||
<model create="ImageInterrogator" script="image_interrogator.py"
|
||||
optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
|
||||
</trainer>
|
@@ -2,25 +2,23 @@
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob
|
||||
import sys
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image as PILImage
|
||||
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
from realesrgan import RealESRGANer
|
||||
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
||||
import cv2
|
||||
from PIL import Image as PILImage
|
||||
|
||||
|
||||
# Setting defaults
|
||||
_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
|
||||
_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0, "tile_pad": 10,
|
||||
"pre_pad": 0, "compute_type": "fp32", "face_enhance": False}
|
||||
|
||||
# TODO: add log infos,
|
||||
|
||||
# TODO: add log infos,
|
||||
class RealESRGan(Processor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -28,8 +26,7 @@ class RealESRGan(Processor):
|
||||
self.device = None
|
||||
self.ds_iter = None
|
||||
self.current_session = None
|
||||
self.model_path = None #Maybe need this later for manual path
|
||||
|
||||
self.model_path = None # Maybe need this later for manual path
|
||||
|
||||
# IO shortcuts
|
||||
self.input = [x for x in self.model_io if x.io_type == "input"]
|
||||
@@ -42,12 +39,11 @@ class RealESRGan(Processor):
|
||||
current_session_name = self.ds_iter.session_names[0]
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
input_image = self.current_session.input_data['input_image'].data
|
||||
|
||||
|
||||
try:
|
||||
model, netscale, file_url = self.manageModel(str(self.options['model']))
|
||||
|
||||
if self.model_path is not None:
|
||||
if self.model_path is not None:
|
||||
model_path = self.model_path
|
||||
else:
|
||||
model_path = os.path.join('weights', self.options['model'] + '.pth')
|
||||
@@ -58,7 +54,7 @@ class RealESRGan(Processor):
|
||||
model_path = load_file_from_url(
|
||||
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
|
||||
|
||||
# use dni to control the denoise strength
|
||||
# use dni to control the denoise strength
|
||||
dni_weight = None
|
||||
if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
|
||||
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
|
||||
@@ -67,19 +63,18 @@ class RealESRGan(Processor):
|
||||
|
||||
half = True
|
||||
if self.options["compute_type"] == "fp32":
|
||||
half=False
|
||||
|
||||
half = False
|
||||
|
||||
upsampler = RealESRGANer(
|
||||
scale=netscale,
|
||||
model_path=model_path,
|
||||
dni_weight=dni_weight,
|
||||
model=model,
|
||||
tile= int(self.options['tile']),
|
||||
tile_pad=int(self.options['tile_pad']),
|
||||
pre_pad=int(self.options['pre_pad']),
|
||||
half=half,
|
||||
gpu_id=None) #Can be set if multiple gpus are available
|
||||
scale=netscale,
|
||||
model_path=model_path,
|
||||
dni_weight=dni_weight,
|
||||
model=model,
|
||||
tile=int(self.options['tile']),
|
||||
tile_pad=int(self.options['tile_pad']),
|
||||
pre_pad=int(self.options['pre_pad']),
|
||||
half=half,
|
||||
gpu_id=None) # Can be set if multiple gpus are available
|
||||
|
||||
if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
|
||||
from gfpgan import GFPGANer
|
||||
@@ -89,24 +84,24 @@ class RealESRGan(Processor):
|
||||
arch='clean',
|
||||
channel_multiplier=2,
|
||||
bg_upsampler=upsampler)
|
||||
|
||||
|
||||
pilimage = PILImage.fromarray(input_image)
|
||||
|
||||
pilimage = PILImage.fromarray(input_image)
|
||||
img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
|
||||
try:
|
||||
if bool(self.options['face_enhance']):
|
||||
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
|
||||
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False,
|
||||
paste_back=True)
|
||||
else:
|
||||
output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
|
||||
except RuntimeError as error:
|
||||
print('Error', error)
|
||||
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
|
||||
|
||||
|
||||
output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
except Exception as e:
|
||||
@@ -114,12 +109,10 @@ class RealESRGan(Processor):
|
||||
sys.stdout.flush()
|
||||
return "Error"
|
||||
|
||||
|
||||
def to_output(self, data: dict):
|
||||
self.current_session.output_data_templates['output_image'].data = data
|
||||
return self.current_session.output_data_templates
|
||||
|
||||
|
||||
def manageModel(self, model_name):
|
||||
if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
|
||||
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
|
||||
@@ -132,7 +125,8 @@ class RealESRGan(Processor):
|
||||
elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
|
||||
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
|
||||
netscale = 4
|
||||
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
|
||||
file_url = [
|
||||
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
|
||||
elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
|
||||
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
|
||||
netscale = 2
|
||||
@@ -148,5 +142,5 @@ class RealESRGan(Processor):
|
||||
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
|
||||
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
|
||||
]
|
||||
|
||||
return model, netscale, file_url
|
||||
|
||||
return model, netscale, file_url
|
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
|
||||
<io type="input" id="input_image" data="image" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value=""/>
|
||||
</meta>
|
||||
<model create="RealESRGan" script="image_upscale_realesrgan.py"
|
||||
optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
|
||||
</trainer>
|
@@ -1,10 +1,10 @@
|
||||
import argparse
|
||||
import cv2
|
||||
import glob
|
||||
import os
|
||||
|
||||
import cv2
|
||||
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
|
||||
from realesrgan import RealESRGANer
|
||||
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
|
||||
|
@@ -3,98 +3,96 @@ def build_lora_xl(lora, prompt, lora_weight):
|
||||
if lora == "3drenderstyle":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:"+lora_weight+">"
|
||||
prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "psychedelicnoir":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = prompt + " <lora:Psychedelic_Noir__sdxl:"+lora_weight+">>"
|
||||
prompt = prompt + " <lora:Psychedelic_Noir__sdxl:" + lora_weight + ">>"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "wojak":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "<lora:wojak_big:"+lora_weight+">, " + prompt + ", wojak"
|
||||
prompt = "<lora:wojak_big:" + lora_weight + ">, " + prompt + ", wojak"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "dreamarts":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "<lora:DreamARTSDXL:"+lora_weight+">, " + prompt
|
||||
prompt = "<lora:DreamARTSDXL:" + lora_weight + ">, " + prompt
|
||||
existing_lora = True
|
||||
|
||||
if lora == "voxel":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "voxel style, " + prompt + " <lora:last:"+lora_weight+">"
|
||||
prompt = "voxel style, " + prompt + " <lora:last:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "kru3ger":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:"+lora_weight+">"
|
||||
prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "inkpunk":
|
||||
if lora_weight == "":
|
||||
lora_weight = "0.5"
|
||||
prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:"+lora_weight+">"
|
||||
prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "inkscenery":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:"+lora_weight+">"
|
||||
prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "inkpainting":
|
||||
if lora_weight == "":
|
||||
lora_weight = "0.7"
|
||||
prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::"+lora_weight+">,"
|
||||
prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::" + lora_weight + ">,"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "timburton":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1.27"
|
||||
pencil_weight = "1.15"
|
||||
prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
|
||||
prompt = prompt + " (hand drawn with pencil" + pencil_weight + "), (tim burton style:" + lora_weight + ")"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "pixelart":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:"+lora_weight+"> "
|
||||
prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:" + lora_weight + "> "
|
||||
existing_lora = True
|
||||
|
||||
if lora == "pepe":
|
||||
if lora_weight == "":
|
||||
lora_weight = "0.8"
|
||||
prompt = prompt + " ,<lora:DD-pepe-v2:"+lora_weight+"> pepe"
|
||||
prompt = prompt + " ,<lora:DD-pepe-v2:" + lora_weight + "> pepe"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "bettertext":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = prompt + " ,<lora:BetterTextRedmond:"+lora_weight+">"
|
||||
prompt = prompt + " ,<lora:BetterTextRedmond:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "mspaint":
|
||||
if lora_weight == "":
|
||||
lora_weight = "1"
|
||||
prompt = "MSPaint drawing " + prompt +">"
|
||||
prompt = "MSPaint drawing " + prompt + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "woodfigure":
|
||||
if lora_weight == "":
|
||||
lora_weight = "0.7"
|
||||
prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:"+lora_weight+">"
|
||||
prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:" + lora_weight + ">"
|
||||
existing_lora = True
|
||||
|
||||
if lora == "fireelement":
|
||||
prompt = prompt + ",composed of fire elements, fire element"
|
||||
existing_lora = True
|
||||
|
||||
|
||||
|
||||
return lora, prompt, existing_lora
|
||||
return lora, prompt, existing_lora
|
@@ -14,7 +14,7 @@ This modules provides image generation based on prompts
|
||||
- `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
|
||||
|
||||
- `high_noise_frac`: Denoising factor
|
||||
|
||||
|
||||
- `n_steps`: how many iterations should be performed
|
||||
|
||||
## Example payload
|
@@ -2,26 +2,26 @@
|
||||
"""
|
||||
|
||||
import gc
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add local dir to path for relative imports
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
from nova_utils.utils.cache_utils import get_file
|
||||
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
|
||||
from diffusers.utils import load_image
|
||||
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, \
|
||||
EulerAncestralDiscreteScheduler
|
||||
import numpy as np
|
||||
from PIL import Image as PILImage
|
||||
from lora import build_lora_xl
|
||||
|
||||
|
||||
|
||||
# Setting defaults
|
||||
_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
|
||||
_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength": "0.58",
|
||||
"guidance_scale": "11.0", "n_steps": "30", "lora": "", "lora_weight": "0.5"}
|
||||
|
||||
# TODO: add log infos,
|
||||
|
||||
# TODO: add log infos,
|
||||
class StableDiffusionXL(Processor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -29,7 +29,6 @@ class StableDiffusionXL(Processor):
|
||||
self.device = None
|
||||
self.ds_iter = None
|
||||
self.current_session = None
|
||||
|
||||
|
||||
# IO shortcuts
|
||||
self.input = [x for x in self.model_io if x.io_type == "input"]
|
||||
@@ -42,15 +41,15 @@ class StableDiffusionXL(Processor):
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.ds_iter = ds_iter
|
||||
current_session_name = self.ds_iter.session_names[0]
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
#input_image_url = self.current_session.input_data['input_image_url'].data
|
||||
#input_image_url = ' '.join(input_image_url)
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
# input_image_url = self.current_session.input_data['input_image_url'].data
|
||||
# input_image_url = ' '.join(input_image_url)
|
||||
input_image = self.current_session.input_data['input_image'].data
|
||||
input_prompt = self.current_session.input_data['input_prompt'].data
|
||||
input_prompt = ' '.join(input_prompt)
|
||||
negative_prompt = self.current_session.input_data['negative_prompt'].data
|
||||
negative_prompt = ' '.join(negative_prompt)
|
||||
# print("Input Image: " + input_image_url)
|
||||
# print("Input Image: " + input_image_url)
|
||||
print("Input prompt: " + input_prompt)
|
||||
print("Negative prompt: " + negative_prompt)
|
||||
|
||||
@@ -58,8 +57,8 @@ class StableDiffusionXL(Processor):
|
||||
|
||||
model = self.options['model']
|
||||
lora = self.options['lora']
|
||||
#init_image = load_image(input_image_url).convert("RGB")
|
||||
init_image = PILImage.fromarray(input_image)
|
||||
# init_image = load_image(input_image_url).convert("RGB")
|
||||
init_image = PILImage.fromarray(input_image)
|
||||
|
||||
mwidth = 1024
|
||||
mheight = 1024
|
||||
@@ -82,44 +81,42 @@ class StableDiffusionXL(Processor):
|
||||
if lora != "" and lora != "None":
|
||||
print("Loading lora...")
|
||||
|
||||
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
|
||||
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
|
||||
|
||||
from diffusers import AutoPipelineForImage2Image
|
||||
import torch
|
||||
|
||||
|
||||
|
||||
#init_image = init_image.resize((int(w/2), int(h/2)))
|
||||
# init_image = init_image.resize((int(w/2), int(h/2)))
|
||||
|
||||
pipe = AutoPipelineForImage2Image.from_pretrained(
|
||||
"stabilityai/stable-diffusion-xl-base-1.0",
|
||||
torch_dtype=torch.float16).to("cuda")
|
||||
|
||||
if existing_lora:
|
||||
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
|
||||
lora_uri = [x for x in self.trainer.meta_uri if x.uri_id == lora][0]
|
||||
if str(lora_uri) == "":
|
||||
return "Lora not found"
|
||||
return "Lora not found"
|
||||
lora_path = get_file(
|
||||
fname=str(lora_uri.uri_id) + ".safetensors",
|
||||
origin=lora_uri.uri_url,
|
||||
file_hash=lora_uri.uri_hash,
|
||||
cache_dir=os.getenv("CACHE_DIR"),
|
||||
tmp_dir=os.getenv("TMP_DIR"),
|
||||
)
|
||||
)
|
||||
pipe.load_lora_weights(str(lora_path))
|
||||
print("Loaded Lora: " + str(lora_path))
|
||||
|
||||
seed = 20000
|
||||
generator = torch.manual_seed(seed)
|
||||
|
||||
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
|
||||
|
||||
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
|
||||
|
||||
image = pipe(
|
||||
prompt=input_prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
image=init_image,
|
||||
generator=generator,
|
||||
num_inference_steps=int(self.options['n_steps']),
|
||||
num_inference_steps=int(self.options['n_steps']),
|
||||
image_guidance_scale=float(self.options['guidance_scale']),
|
||||
strength=float(str(self.options['strength']))).images[0]
|
||||
|
||||
@@ -137,19 +134,21 @@ class StableDiffusionXL(Processor):
|
||||
|
||||
pipe = pipe.to(self.device)
|
||||
image = pipe(input_prompt, image=init_image,
|
||||
negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
|
||||
|
||||
negative_prompt=negative_prompt, num_inference_steps=n_steps,
|
||||
strength=transformation_strength, guidance_scale=cfg_scale).images[0]
|
||||
|
||||
elif model == "timbrooks/instruct-pix2pix":
|
||||
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
|
||||
safety_checker=None)
|
||||
safety_checker=None)
|
||||
|
||||
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
|
||||
|
||||
pipe.to(self.device)
|
||||
n_steps = int(self.options['n_steps'])
|
||||
cfg_scale = float(self.options['guidance_scale'])
|
||||
image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
|
||||
|
||||
image = \
|
||||
pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps,
|
||||
image_guidance_scale=cfg_scale).images[0]
|
||||
|
||||
if torch.cuda.is_available():
|
||||
del pipe
|
||||
@@ -157,7 +156,6 @@ class StableDiffusionXL(Processor):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
|
||||
|
||||
numpy_array = np.array(image)
|
||||
return numpy_array
|
||||
|
||||
@@ -167,10 +165,6 @@ class StableDiffusionXL(Processor):
|
||||
sys.stdout.flush()
|
||||
return "Error"
|
||||
|
||||
|
||||
def to_output(self, data: dict):
|
||||
self.current_session.output_data_templates['output_image'].data = data
|
||||
return self.current_session.output_data_templates
|
||||
|
||||
|
||||
|
@@ -0,0 +1,42 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration"
|
||||
description="Generates Image from existing image based on a prompt" is_iterable="False">
|
||||
<io type="input" id="input_image" data="Image" default_value=""/>
|
||||
<io type="input" id="input_prompt" data="prompt" default_value=""/>
|
||||
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value=""/>
|
||||
|
||||
<uri id="voxel" url="https://civitai.com/api/download/models/128609"
|
||||
hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
|
||||
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552"
|
||||
hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
|
||||
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206"
|
||||
hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
|
||||
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194"
|
||||
hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
|
||||
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124"
|
||||
hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
|
||||
<uri id="wojak" url="https://civitai.com/api/download/models/140160"
|
||||
hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
|
||||
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129"
|
||||
hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
|
||||
<uri id="timburton" url="https://civitai.com/api/download/models/207862"
|
||||
hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
|
||||
<uri id="pixelart" url="https://civitai.com/api/download/models/135931"
|
||||
hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
|
||||
<uri id="pepe" url="https://civitai.com/api/download/models/181917"
|
||||
hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
|
||||
<uri id="bettertext" url="https://civitai.com/api/download/models/163224"
|
||||
hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
|
||||
<uri id="mspaint" url="https://civitai.com/api/download/models/205793"
|
||||
hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
|
||||
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919"
|
||||
hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
|
||||
<uri id="fireelement" url="https://civitai.com/api/download/models/175257"
|
||||
hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
|
||||
</meta>
|
||||
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py"
|
||||
optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
|
||||
</trainer>
|
@@ -0,0 +1,68 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt"
|
||||
is_iterable="False">
|
||||
<io type="input" id="input_prompt" data="prompt" default_value=""/>
|
||||
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
|
||||
|
||||
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530"
|
||||
hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
|
||||
<uri id="dynavision" url="https://civitai.com/api/download/models/198962"
|
||||
hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
|
||||
<uri id="colossus" url="https://civitai.com/api/download/models/213982"
|
||||
hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
|
||||
<uri id="newreality" url="https://civitai.com/api/download/models/232565"
|
||||
hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
|
||||
<uri id="unstable" url="https://civitai.com/api/download/models/209647"
|
||||
hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
|
||||
<uri id="fantastic" url="https://civitai.com/api/download/models/143722"
|
||||
hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
|
||||
<uri id="mohawk" url="https://civitai.com/api/download/models/207419"
|
||||
hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
|
||||
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688"
|
||||
hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
|
||||
<uri id="timeless" url="https://civitai.com/api/download/models/198246"
|
||||
hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
|
||||
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832"
|
||||
hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
|
||||
<uri id="chroma" url="https://civitai.com/api/download/models/169740"
|
||||
hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
|
||||
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090"
|
||||
hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
|
||||
|
||||
<uri id="voxel" url="https://civitai.com/api/download/models/128609"
|
||||
hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
|
||||
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552"
|
||||
hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
|
||||
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206"
|
||||
hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
|
||||
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194"
|
||||
hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
|
||||
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124"
|
||||
hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
|
||||
<uri id="wojak" url="https://civitai.com/api/download/models/140160"
|
||||
hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
|
||||
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129"
|
||||
hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
|
||||
<uri id="timburton" url="https://civitai.com/api/download/models/207862"
|
||||
hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
|
||||
<uri id="pixelart" url="https://civitai.com/api/download/models/135931"
|
||||
hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
|
||||
<uri id="pepe" url="https://civitai.com/api/download/models/181917"
|
||||
hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
|
||||
<uri id="bettertext" url="https://civitai.com/api/download/models/163224"
|
||||
hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
|
||||
<uri id="mspaint" url="https://civitai.com/api/download/models/205793"
|
||||
hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
|
||||
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919"
|
||||
hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
|
||||
<uri id="fireelement" url="https://civitai.com/api/download/models/175257"
|
||||
hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
|
||||
|
||||
|
||||
</meta>
|
||||
<model create="StableDiffusionXL" script="stablediffusionxl.py"
|
||||
optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
|
||||
</trainer>
|
@@ -1,26 +1,20 @@
|
||||
import gc
|
||||
import sys
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
||||
from ssl import Options
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
import torch
|
||||
from diffusers import StableVideoDiffusionPipeline
|
||||
from diffusers.utils import load_image, export_to_video
|
||||
from nova_utils.utils.cache_utils import get_file
|
||||
import numpy as np
|
||||
from PIL import Image as PILImage
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Setting defaults
|
||||
_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps":"7", "seed":""}
|
||||
_default_options = {"model": "stabilityai/stable-video-diffusion-img2vid-xt", "fps": "7", "seed": ""}
|
||||
|
||||
# TODO: add log infos,
|
||||
|
||||
# TODO: add log infos,
|
||||
class StableVideoDiffusion(Processor):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -28,24 +22,21 @@ class StableVideoDiffusion(Processor):
|
||||
self.device = None
|
||||
self.ds_iter = None
|
||||
self.current_session = None
|
||||
|
||||
|
||||
# IO shortcuts
|
||||
self.input = [x for x in self.model_io if x.io_type == "input"]
|
||||
self.output = [x for x in self.model_io if x.io_type == "output"]
|
||||
self.input = self.input[0]
|
||||
self.output = self.output[0]
|
||||
|
||||
def process_data(self, ds_iter) -> dict:
|
||||
|
||||
|
||||
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
self.ds_iter = ds_iter
|
||||
current_session_name = self.ds_iter.session_names[0]
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
|
||||
input_image = self.current_session.input_data['input_image'].data
|
||||
|
||||
|
||||
try:
|
||||
pipe = StableVideoDiffusionPipeline.from_pretrained(
|
||||
self.options["model"], torch_dtype=torch.float16, variant="fp16"
|
||||
@@ -53,7 +44,7 @@ class StableVideoDiffusion(Processor):
|
||||
pipe.enable_model_cpu_offload()
|
||||
|
||||
# Load the conditioning image
|
||||
image = PILImage.fromarray(input_image)
|
||||
image = PILImage.fromarray(input_image)
|
||||
image = image.resize((1024, 576))
|
||||
|
||||
if self.options["seed"] != "" and self.options["seed"] != " ":
|
||||
@@ -68,7 +59,6 @@ class StableVideoDiffusion(Processor):
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.ipc_collect()
|
||||
|
||||
|
||||
np_video = np.stack([np.asarray(x) for x in frames])
|
||||
return np_video
|
||||
|
||||
@@ -77,7 +67,7 @@ class StableVideoDiffusion(Processor):
|
||||
print(e)
|
||||
sys.stdout.flush()
|
||||
return "Error"
|
||||
|
||||
|
||||
def calculate_aspect(self, width: int, height: int):
|
||||
def gcd(a, b):
|
||||
"""The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
|
||||
@@ -89,12 +79,10 @@ class StableVideoDiffusion(Processor):
|
||||
|
||||
return x, y
|
||||
|
||||
|
||||
|
||||
def to_output(self, data: list):
|
||||
video = self.current_session.output_data_templates['output_video']
|
||||
video.data = data
|
||||
video.meta_data.sample_rate = int(self.options['fps'])
|
||||
video.meta_data.media_type = 'video'
|
||||
|
||||
return self.current_session.output_data_templates
|
||||
return self.current_session.output_data_templates
|
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="VideoGeneration" description="Generates Video from Image/prompt"
|
||||
is_iterable="False">
|
||||
<io type="input" id="input_image" data="Image" default_value=""/>
|
||||
<io type="output" id="output_video" data="stream:Video" default_value="sd_generated.mp4"/>
|
||||
</meta>
|
||||
<model create="StableVideoDiffusion" script="stablevideodiffusion.py"
|
||||
optstr="{model:LIST:stabilityai/stable-video-diffusion-img2vid-xt,stabilityai/stable-video-diffusion-img2vid};{fps:STRING:7};{seed:STRING: }"/>
|
||||
</trainer>
|
@@ -21,9 +21,10 @@ speaker diarization.
|
||||
- `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
|
||||
|
||||
- `language`: language code for transcription and alignment models. Supported languages:
|
||||
- `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
|
||||
- `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`,
|
||||
`te`, `tr`, `uk`, `ur`, `vi`, `zh`
|
||||
- `None`: auto-detect language from first 30 seconds of audio
|
||||
|
||||
|
||||
- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
|
||||
|
||||
## Examples
|
@@ -1,10 +1,13 @@
|
||||
"""WhisperX Module
|
||||
"""
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
import sys
|
||||
|
||||
from nova_utils.interfaces.server_module import Processor
|
||||
|
||||
# Setting defaults
|
||||
_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
|
||||
_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None,
|
||||
'compute_type': 'float16'}
|
||||
|
||||
|
||||
# supported language codes, cf. whisperx/alignment.py
|
||||
# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
|
||||
@@ -45,11 +48,14 @@ class WhisperX(Processor):
|
||||
sys.stdout.flush()
|
||||
model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
|
||||
language=self.options['language'])
|
||||
|
||||
|
||||
result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
|
||||
|
||||
# delete model if low on GPU resources
|
||||
import gc; gc.collect(); torch.cuda.empty_cache(); del model
|
||||
import gc;
|
||||
gc.collect();
|
||||
torch.cuda.empty_cache();
|
||||
del model
|
||||
|
||||
if not self.options["alignment_mode"] == "raw":
|
||||
# load alignment model and metadata
|
||||
@@ -64,7 +70,10 @@ class WhisperX(Processor):
|
||||
result = result_aligned
|
||||
|
||||
# delete model if low on GPU resources
|
||||
import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
|
||||
import gc;
|
||||
gc.collect();
|
||||
torch.cuda.empty_cache();
|
||||
del model_a
|
||||
|
||||
return result
|
||||
|
||||
@@ -83,26 +92,26 @@ class WhisperX(Processor):
|
||||
if "end" in w.keys():
|
||||
last_end = w["end"]
|
||||
else:
|
||||
#TODO: rethink lower bound for confidence; place word centred instead of left aligned
|
||||
# TODO: rethink lower bound for confidence; place word centred instead of left aligned
|
||||
w["start"] = last_end
|
||||
last_end += 0.065
|
||||
w["end"] = last_end
|
||||
#w["score"] = 0.000
|
||||
# w["score"] = 0.000
|
||||
w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
|
||||
|
||||
|
||||
def _hmean(scores):
|
||||
if len(scores) > 0:
|
||||
prod = scores[0]
|
||||
for s in scores[1:]:
|
||||
prod *= s
|
||||
prod = prod**(1/len(scores))
|
||||
prod = prod ** (1 / len(scores))
|
||||
else:
|
||||
prod = 0
|
||||
return prod
|
||||
|
||||
|
||||
if (
|
||||
self.options["alignment_mode"] == "word"
|
||||
or self.options["alignment_mode"] == "segment"
|
||||
self.options["alignment_mode"] == "word"
|
||||
or self.options["alignment_mode"] == "segment"
|
||||
):
|
||||
_fix_missing_timestamps(data)
|
||||
|
||||
@@ -113,12 +122,13 @@ class WhisperX(Processor):
|
||||
]
|
||||
else:
|
||||
anno_data = [
|
||||
#(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
|
||||
(w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
|
||||
# (w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
|
||||
(w["start"], w["end"], w["text"], 1) for w in data["segments"]
|
||||
# alignment 'raw' no longer contains a score(?)
|
||||
]
|
||||
|
||||
# convert to milliseconds
|
||||
anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
|
||||
anno_data = [(x[0] * 1000, x[1] * 1000, x[2], x[3]) for x in anno_data]
|
||||
out = self.session_manager.output_data_templates[self.output.io_id]
|
||||
out.data = anno_data
|
||||
return self.session_manager.output_data_templates
|
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
|
||||
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
|
||||
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
|
||||
</meta>
|
||||
<model create="WhisperX" script="whisperx_transcript.py"
|
||||
optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
|
||||
</trainer>
|
@@ -4,9 +4,10 @@ import json
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
|
||||
import PIL.Image as Image
|
||||
import pandas as pd
|
||||
import requests
|
||||
import PIL.Image as Image
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
from nostr_dvm.utils.output_utils import upload_media_to_hoster
|
||||
@@ -24,6 +25,7 @@ in the module that is calling the server
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def send_request_to_server(request_form, address):
|
||||
print("Sending job to Server")
|
||||
url = ('http://' + address + '/process')
|
||||
@@ -46,6 +48,7 @@ def send_file_to_server(filepath, address):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
"""
|
||||
check_n_server_status(request_form, address)
|
||||
Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
|
||||
@@ -76,7 +79,6 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
|
||||
# WAITING = 0, RUNNING = 1, FINISHED = 2, ERROR = 3
|
||||
time.sleep(1.0)
|
||||
|
||||
|
||||
if status == 2:
|
||||
try:
|
||||
url_fetch = 'http://' + address + '/fetch_result'
|
||||
@@ -93,7 +95,7 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
|
||||
return result
|
||||
elif content_type == 'video/mp4':
|
||||
with open('./outputs/video.mp4', 'wb') as f:
|
||||
f.write(response.content)
|
||||
f.write(response.content)
|
||||
f.close()
|
||||
clip = VideoFileClip("./outputs/video.mp4")
|
||||
clip.write_videofile("./outputs/video2.mp4")
|
||||
@@ -121,4 +123,4 @@ def check_server_status(jobID, address) -> str | pd.DataFrame:
|
||||
print("Couldn't fetch result: " + str(e))
|
||||
|
||||
elif status == 3:
|
||||
return "error"
|
||||
return "error"
|
@@ -42,14 +42,14 @@ class StableDiffusion:
|
||||
self.tokenizer = load_tokenizer(model)
|
||||
|
||||
def generate_latents(
|
||||
self,
|
||||
text: str,
|
||||
n_images: int = 1,
|
||||
num_steps: int = 50,
|
||||
cfg_weight: float = 7.5,
|
||||
negative_text: str = "",
|
||||
latent_size: Tuple[int] = (64, 64),
|
||||
seed=None,
|
||||
self,
|
||||
text: str,
|
||||
n_images: int = 1,
|
||||
num_steps: int = 50,
|
||||
cfg_weight: float = 7.5,
|
||||
negative_text: str = "",
|
||||
latent_size: Tuple[int] = (64, 64),
|
||||
seed=None,
|
||||
):
|
||||
# Set the PRNG state
|
||||
seed = seed or int(time.time())
|
||||
@@ -94,4 +94,4 @@ class StableDiffusion:
|
||||
def decode(self, x_t):
|
||||
x = self.autoencoder.decode(x_t / self.autoencoder.scaling_factor)
|
||||
x = mx.minimum(1, mx.maximum(0, x / 2 + 0.5))
|
||||
return x
|
||||
return x
|
||||
|
@@ -1,7 +1,7 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Tuple
|
||||
from typing import Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@@ -1,14 +1,12 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
import json
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import hf_hub_download
|
||||
from safetensors import safe_open as safetensor_open
|
||||
|
||||
import mlx.core as mx
|
||||
import numpy as np
|
||||
from huggingface_hub import hf_hub_download
|
||||
from mlx.utils import tree_unflatten
|
||||
from safetensors import safe_open as safetensor_open
|
||||
|
||||
from .clip import CLIPTextModel
|
||||
from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
|
||||
@@ -16,7 +14,6 @@ from .tokenizer import Tokenizer
|
||||
from .unet import UNetModel
|
||||
from .vae import Autoencoder
|
||||
|
||||
|
||||
_DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base"
|
||||
_MODELS = {
|
||||
# See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license
|
||||
@@ -285,7 +282,7 @@ def load_tokenizer(key: str = _DEFAULT_MODEL):
|
||||
|
||||
merges_file = hf_hub_download(key, _MODELS[key]["tokenizer_merges"])
|
||||
with open(merges_file, encoding="utf-8") as f:
|
||||
bpe_merges = f.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
|
||||
bpe_merges = f.read().strip().split("\n")[1: 49152 - 256 - 2 + 1]
|
||||
bpe_merges = [tuple(m.split()) for m in bpe_merges]
|
||||
bpe_ranks = dict(map(reversed, enumerate(bpe_merges)))
|
||||
|
||||
|
@@ -1,9 +1,9 @@
|
||||
# Copyright © 2023 Apple Inc.
|
||||
|
||||
from .config import DiffusionConfig
|
||||
|
||||
import mlx.core as mx
|
||||
|
||||
from .config import DiffusionConfig
|
||||
|
||||
|
||||
def _linspace(a, b, num):
|
||||
x = mx.arange(0, num) / (num - 1)
|
||||
@@ -37,7 +37,7 @@ class SimpleEulerSampler:
|
||||
)
|
||||
elif config.beta_schedule == "scaled_linear":
|
||||
betas = _linspace(
|
||||
config.beta_start**0.5, config.beta_end**0.5, config.num_train_steps
|
||||
config.beta_start ** 0.5, config.beta_end ** 0.5, config.num_train_steps
|
||||
).square()
|
||||
else:
|
||||
raise NotImplementedError(f"{config.beta_schedule} is not implemented.")
|
||||
@@ -52,7 +52,7 @@ class SimpleEulerSampler:
|
||||
def sample_prior(self, shape, dtype=mx.float32, key=None):
|
||||
noise = mx.random.normal(shape, key=key)
|
||||
return (
|
||||
noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt()
|
||||
noise * self._sigmas[-1] * (self._sigmas[-1].square() + 1).rsqrt()
|
||||
).astype(dtype)
|
||||
|
||||
def sigmas(self, t):
|
||||
|
@@ -34,11 +34,11 @@ class TimestepEmbedding(nn.Module):
|
||||
|
||||
class TransformerBlock(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
model_dims: int,
|
||||
num_heads: int,
|
||||
hidden_dims: Optional[int] = None,
|
||||
memory_dims: Optional[int] = None,
|
||||
self,
|
||||
model_dims: int,
|
||||
num_heads: int,
|
||||
hidden_dims: Optional[int] = None,
|
||||
memory_dims: Optional[int] = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -85,13 +85,13 @@ class Transformer2D(nn.Module):
|
||||
"""A transformer model for inputs with 2 spatial dimensions."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
model_dims: int,
|
||||
encoder_dims: int,
|
||||
num_heads: int,
|
||||
num_layers: int = 1,
|
||||
norm_num_groups: int = 32,
|
||||
self,
|
||||
in_channels: int,
|
||||
model_dims: int,
|
||||
encoder_dims: int,
|
||||
num_heads: int,
|
||||
num_layers: int = 1,
|
||||
norm_num_groups: int = 32,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -125,11 +125,11 @@ class Transformer2D(nn.Module):
|
||||
|
||||
class ResnetBlock2D(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: Optional[int] = None,
|
||||
groups: int = 32,
|
||||
temb_channels: Optional[int] = None,
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: Optional[int] = None,
|
||||
groups: int = 32,
|
||||
temb_channels: Optional[int] = None,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -169,19 +169,19 @@ class ResnetBlock2D(nn.Module):
|
||||
|
||||
class UNetBlock2D(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
temb_channels: int,
|
||||
prev_out_channels: Optional[int] = None,
|
||||
num_layers: int = 1,
|
||||
transformer_layers_per_block: int = 1,
|
||||
num_attention_heads: int = 8,
|
||||
cross_attention_dim=1280,
|
||||
resnet_groups: int = 32,
|
||||
add_downsample=True,
|
||||
add_upsample=True,
|
||||
add_cross_attention=True,
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
temb_channels: int,
|
||||
prev_out_channels: Optional[int] = None,
|
||||
num_layers: int = 1,
|
||||
transformer_layers_per_block: int = 1,
|
||||
num_attention_heads: int = 8,
|
||||
cross_attention_dim=1280,
|
||||
resnet_groups: int = 32,
|
||||
add_downsample=True,
|
||||
add_upsample=True,
|
||||
add_cross_attention=True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -232,13 +232,13 @@ class UNetBlock2D(nn.Module):
|
||||
)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
x,
|
||||
encoder_x=None,
|
||||
temb=None,
|
||||
attn_mask=None,
|
||||
encoder_attn_mask=None,
|
||||
residual_hidden_states=None,
|
||||
self,
|
||||
x,
|
||||
encoder_x=None,
|
||||
temb=None,
|
||||
attn_mask=None,
|
||||
encoder_attn_mask=None,
|
||||
residual_hidden_states=None,
|
||||
):
|
||||
output_states = []
|
||||
|
||||
@@ -340,9 +340,9 @@ class UNetModel(nn.Module):
|
||||
|
||||
# Make the upsampling blocks
|
||||
block_channels = (
|
||||
[config.block_out_channels[0]]
|
||||
+ list(config.block_out_channels)
|
||||
+ [config.block_out_channels[-1]]
|
||||
[config.block_out_channels[0]]
|
||||
+ list(config.block_out_channels)
|
||||
+ [config.block_out_channels[-1]]
|
||||
)
|
||||
self.up_blocks = [
|
||||
UNetBlock2D(
|
||||
|
@@ -44,13 +44,13 @@ class Attention(nn.Module):
|
||||
|
||||
class EncoderDecoderBlock2D(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
num_layers: int = 1,
|
||||
resnet_groups: int = 32,
|
||||
add_downsample=True,
|
||||
add_upsample=True,
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
num_layers: int = 1,
|
||||
resnet_groups: int = 32,
|
||||
add_downsample=True,
|
||||
add_upsample=True,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -93,12 +93,12 @@ class Encoder(nn.Module):
|
||||
"""Implements the encoder side of the Autoencoder."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
block_out_channels: List[int] = [64],
|
||||
layers_per_block: int = 2,
|
||||
resnet_groups: int = 32,
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
block_out_channels: List[int] = [64],
|
||||
layers_per_block: int = 2,
|
||||
resnet_groups: int = 32,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -159,12 +159,12 @@ class Decoder(nn.Module):
|
||||
"""Implements the decoder side of the Autoencoder."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
block_out_channels: List[int] = [64],
|
||||
layers_per_block: int = 2,
|
||||
resnet_groups: int = 32,
|
||||
self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
block_out_channels: List[int] = [64],
|
||||
layers_per_block: int = 2,
|
||||
resnet_groups: int = 32,
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
|
@@ -1,10 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image" is_iterable="False">
|
||||
<io type="input" id="input_image" data="image" default_value=""/>
|
||||
<io type="output" id="output" data="text" default_value=""/>
|
||||
|
||||
</meta>
|
||||
<model create="ImageInterrogator" script="image_interrogator.py" optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
|
||||
</trainer>
|
@@ -1,9 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
|
||||
<io type="input" id="input_image" data="image" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value=""/>
|
||||
</meta>
|
||||
<model create="RealESRGan" script="image_upscale_realesrgan.py" optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
|
||||
</trainer>
|
@@ -1,26 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from existing image based on a prompt" is_iterable="False">
|
||||
<io type="input" id="input_image" data="Image" default_value=""/>
|
||||
<io type="input" id="input_prompt" data="prompt" default_value=""/>
|
||||
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value=""/>
|
||||
|
||||
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
|
||||
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
|
||||
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
|
||||
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
|
||||
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
|
||||
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
|
||||
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
|
||||
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
|
||||
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
|
||||
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
|
||||
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
|
||||
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
|
||||
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
|
||||
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
|
||||
</meta>
|
||||
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
|
||||
</trainer>
|
@@ -1,41 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt" is_iterable="False">
|
||||
<io type="input" id="input_prompt" data="prompt" default_value=""/>
|
||||
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
|
||||
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
|
||||
|
||||
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530" hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
|
||||
<uri id="dynavision" url="https://civitai.com/api/download/models/198962" hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
|
||||
<uri id="colossus" url="https://civitai.com/api/download/models/213982" hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
|
||||
<uri id="newreality" url="https://civitai.com/api/download/models/232565" hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
|
||||
<uri id="unstable" url="https://civitai.com/api/download/models/209647" hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
|
||||
<uri id="fantastic" url="https://civitai.com/api/download/models/143722" hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
|
||||
<uri id="mohawk" url="https://civitai.com/api/download/models/207419" hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
|
||||
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688" hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
|
||||
<uri id="timeless" url="https://civitai.com/api/download/models/198246" hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
|
||||
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832" hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
|
||||
<uri id="chroma" url="https://civitai.com/api/download/models/169740" hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
|
||||
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090" hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
|
||||
|
||||
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
|
||||
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
|
||||
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
|
||||
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
|
||||
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
|
||||
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
|
||||
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
|
||||
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
|
||||
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
|
||||
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
|
||||
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
|
||||
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
|
||||
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
|
||||
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
|
||||
|
||||
|
||||
|
||||
</meta>
|
||||
<model create="StableDiffusionXL" script="stablediffusionxl.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
|
||||
</trainer>
|
@@ -1,9 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="VideoGeneration" description="Generates Video from Image/prompt" is_iterable="False">
|
||||
<io type="input" id="input_image" data="Image" default_value=""/>
|
||||
<io type="output" id="output_video" data="stream:Video" default_value="sd_generated.mp4"/>
|
||||
</meta>
|
||||
<model create="StableVideoDiffusion" script="stablevideodiffusion.py" optstr="{model:LIST:stabilityai/stable-video-diffusion-img2vid-xt,stabilityai/stable-video-diffusion-img2vid};{fps:STRING:7};{seed:STRING: }"/>
|
||||
</trainer>
|
@@ -1,9 +0,0 @@
|
||||
<?xml version="1.0" ?>
|
||||
<trainer ssi-v="5">
|
||||
<info trained="true" seed="1234"/>
|
||||
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
|
||||
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
|
||||
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
|
||||
</meta>
|
||||
<model create="WhisperX" script="whisperx_transcript.py" optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
|
||||
</trainer>
|
Reference in New Issue
Block a user