Merge pull request #14 from believethehype/backends-nserver

Backends nserver
This commit is contained in:
believethehype
2023-12-20 18:52:04 +01:00
committed by GitHub
66 changed files with 2399 additions and 104 deletions

View File

@@ -1,12 +1,13 @@
import os
from pathlib import Path
import dotenv
from sys import platform
from nostr_dvm.bot import Bot
from nostr_dvm.tasks import videogeneration_replicate_svd, imagegeneration_replicate_sdxl, textgeneration_llmlite, \
trending_notes_nostrband, discovery_inactive_follows, translation_google, textextraction_pdf, \
translation_libretranslate, textextraction_google, convert_media, imagegeneration_openai_dalle, texttospeech, \
imagegeneration_mlx, advanced_search, textextraction_whisper_mlx
imagegeneration_sd21_mlx, advanced_search
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
@@ -139,10 +140,10 @@ def playground():
bot_config.SUPPORTED_DVMS.append(tts)
tts.run()
from sys import platform
if platform == "darwin":
# Test with MLX for OSX M1/M2/M3 chips
mlx = imagegeneration_mlx.build_example("SD with MLX", "mlx_sd", admin_config)
mlx = imagegeneration_sd21_mlx.build_example("SD with MLX", "mlx_sd", admin_config)
bot_config.SUPPORTED_DVMS.append(mlx)
mlx.run()

View File

@@ -0,0 +1,129 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor
# Setting defaults
_default_options = {"kind": "prompt", "mode": "fast" }
# TODO: add log infos,
class ImageInterrogator(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
from PIL import Image as PILImage
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
kind = self.options['kind'] #"prompt" #"analysis" #prompt
mode = self.options['mode']
#url = self.current_session.input_data['input_image_url'].data[0]
#print(url)
input_image = self.current_session.input_data['input_image'].data
init_image = PILImage.fromarray(input_image)
mwidth = 256
mheight = 256
w = mwidth
h = mheight
if init_image.width > init_image.height:
scale = float(init_image.height / init_image.width)
w = mwidth
h = int(mheight * scale)
elif init_image.width < init_image.height:
scale = float(init_image.width / init_image.height)
w = int(mwidth * scale)
h = mheight
else:
w = mwidth
h = mheight
init_image = init_image.resize((w, h))
from clip_interrogator import Config, Interrogator
config = Config(clip_model_name="ViT-L-14/openai", device="cuda")
if kind == "analysis":
ci = Interrogator(config)
image_features = ci.image_to_features(init_image)
top_mediums = ci.mediums.rank(image_features, 5)
top_artists = ci.artists.rank(image_features, 5)
top_movements = ci.movements.rank(image_features, 5)
top_trendings = ci.trendings.rank(image_features, 5)
top_flavors = ci.flavors.rank(image_features, 5)
medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
movement_ranks = {movement: sim for movement, sim in
zip(top_movements, ci.similarities(image_features, top_movements))}
trending_ranks = {trending: sim for trending, sim in
zip(top_trendings, ci.similarities(image_features, top_trendings))}
flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}
result = "Medium Ranks:\n" + str(medium_ranks) + "\nArtist Ranks: " + str(artist_ranks) + "\nMovement Ranks:\n" + str(movement_ranks) + "\nTrending Ranks:\n" + str(trending_ranks) + "\nFlavor Ranks:\n" + str(flavor_ranks)
print(result)
return result
else:
ci = Interrogator(config)
ci.config.blip_num_beams = 64
ci.config.chunk_size = 2024
ci.config.clip_offload = True
ci.config.apply_low_vram_defaults()
#MODELS = ['ViT-L (best for Stable Diffusion 1.*)']
ci.config.flavor_intermediate_count = 2024 #if clip_model_name == MODELS[0] else 1024
image = init_image
if mode == 'best':
prompt = ci.interrogate(image)
elif mode == 'classic':
prompt = ci.interrogate_classic(image)
elif mode == 'fast':
prompt = ci.interrogate_fast(image)
elif mode == 'negative':
prompt = ci.interrogate_negative(image)
#print(str(prompt))
return prompt
# config = Config(clip_model_name=os.environ['TRANSFORMERS_CACHE'] + "ViT-L-14/openai", device="cuda")git
# ci = Interrogator(config)
# "ViT-L-14/openai"))
# "ViT-g-14/laion2B-s34B-b88K"))
def to_output(self, data: dict):
import numpy as np
self.current_session.output_data_templates['output'].data = np.array([data])
return self.current_session.output_data_templates

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Prompt from Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output" data="text" default_value=""/>
</meta>
<model create="ImageInterrogator" script="image_interrogator.py" optstr="{kind:LIST:prompt,analysis};{mode:LIST:best,classic,fast,negative}"/>
</trainer>

View File

@@ -0,0 +1,11 @@
#Clip Interogator
This modules provides prompt generation based on images
* https://huggingface.co/spaces/pharmapsychotic/CLIP-Interrogator
## Options
- `kind`: string, identifier of the kind of processing
- `prompt`: Generates a prompt from image
- `analysis`: Generates a categorical analysis

View File

@@ -0,0 +1,5 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.1
clip_interrogator
git+https://github.com/huggingface/diffusers.git

View File

@@ -0,0 +1,12 @@
""" Clip Interrorgator
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,152 @@
"""RealESRGan Module
"""
import os
import glob
import sys
from nova_utils.interfaces.server_module import Processor
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
import numpy as np
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
import cv2
from PIL import Image as PILImage
# Setting defaults
_default_options = {"model": "RealESRGAN_x4plus", "outscale": 4, "denoise_strength": 0.5, "tile": 0,"tile_pad": 10,"pre_pad": 0, "compute_type": "fp32", "face_enhance": False }
# TODO: add log infos,
class RealESRGan(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
self.model_path = None #Maybe need this later for manual path
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_image = self.current_session.input_data['input_image'].data
try:
model, netscale, file_url = self.manageModel(str(self.options['model']))
if self.model_path is not None:
model_path = self.model_path
else:
model_path = os.path.join('weights', self.options['model'] + '.pth')
if not os.path.isfile(model_path):
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
for url in file_url:
# model_path will be updated
model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
# use dni to control the denoise strength
dni_weight = None
if self.options['model'] == 'realesr-general-x4v3' and float(self.options['denoise_strength']) != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
model_path = [model_path, wdn_model_path]
dni_weight = [float(self.options['denoise_strength']), 1 - float(self.options['denoise_strength'])]
half = True
if self.options["compute_type"] == "fp32":
half=False
upsampler = RealESRGANer(
scale=netscale,
model_path=model_path,
dni_weight=dni_weight,
model=model,
tile= int(self.options['tile']),
tile_pad=int(self.options['tile_pad']),
pre_pad=int(self.options['pre_pad']),
half=half,
gpu_id=None) #Can be set if multiple gpus are available
if bool(self.options['face_enhance']): # Use GFPGAN for face enhancement
from gfpgan import GFPGANer
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=int(self.options['outscale']),
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler)
pilimage = PILImage.fromarray(input_image)
img = cv2.cvtColor(np.array(pilimage), cv2.COLOR_RGB2BGR)
try:
if bool(self.options['face_enhance']):
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
else:
output, _ = upsampler.enhance(img, outscale=int(self.options['outscale']))
except RuntimeError as error:
print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
return output
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates
def manageModel(self, model_name):
if model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
elif model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
elif model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
elif model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
elif model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
netscale = 4
file_url = [
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
]
return model, netscale, file_url

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Upscales an Image" is_iterable="False">
<io type="input" id="input_image" data="image" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
</meta>
<model create="RealESRGan" script="image_upscale_realesrgan.py" optstr="{model:LIST:RealESRGAN_x4plus,RealESRNet_x4plus,RealESRGAN_x4plus_anime_6B,RealESRGAN_x2plus,realesr-animevideov3,realesr-general-x4v3};{outscale:STRING:4};{denoise_strength:STRING:0.5};{tile:STRING:0};{tile_pad:STRING:10};{pre_pad:STRING:0};{compute_type:STRING:fp32};{face_enhance:BOOL:False}"/>
</trainer>

View File

@@ -0,0 +1,166 @@
import argparse
import cv2
import glob
import os
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.download_util import load_file_from_url
from realesrgan import RealESRGANer
from realesrgan.archs.srvgg_arch import SRVGGNetCompact
def main():
"""Inference demo for Real-ESRGAN.
"""
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
parser.add_argument(
'-n',
'--model_name',
type=str,
default='RealESRGAN_x4plus',
help=('Model names: RealESRGAN_x4plus | RealESRNet_x4plus | RealESRGAN_x4plus_anime_6B | RealESRGAN_x2plus | '
'realesr-animevideov3 | realesr-general-x4v3'))
parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
parser.add_argument(
'-dn',
'--denoise_strength',
type=float,
default=0.5,
help=('Denoise strength. 0 for weak denoise (keep noise), 1 for strong denoise ability. '
'Only used for the realesr-general-x4v3 model'))
parser.add_argument('-s', '--outscale', type=float, default=4, help='The final upsampling scale of the image')
parser.add_argument(
'--model_path', type=str, default=None, help='[Option] Model path. Usually, you do not need to specify it')
parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
parser.add_argument('-t', '--tile', type=int, default=0, help='Tile size, 0 for no tile during testing')
parser.add_argument('--tile_pad', type=int, default=10, help='Tile padding')
parser.add_argument('--pre_pad', type=int, default=0, help='Pre padding size at each border')
parser.add_argument('--face_enhance', action='store_true', help='Use GFPGAN to enhance face')
parser.add_argument(
'--fp32', action='store_true', help='Use fp32 precision during inference. Default: fp16 (half precision).')
parser.add_argument(
'--alpha_upsampler',
type=str,
default='realesrgan',
help='The upsampler for the alpha channels. Options: realesrgan | bicubic')
parser.add_argument(
'--ext',
type=str,
default='auto',
help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
parser.add_argument(
'-g', '--gpu-id', type=int, default=None, help='gpu device to use (default=None) can be 0,1,2 for multi-gpu')
args = parser.parse_args()
# determine models according to model names
args.model_name = args.model_name.split('.')[0]
if args.model_name == 'RealESRGAN_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
elif args.model_name == 'RealESRNet_x4plus': # x4 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
elif args.model_name == 'RealESRGAN_x4plus_anime_6B': # x4 RRDBNet model with 6 blocks
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
elif args.model_name == 'RealESRGAN_x2plus': # x2 RRDBNet model
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
netscale = 2
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
elif args.model_name == 'realesr-animevideov3': # x4 VGG-style model (XS size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu')
netscale = 4
file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth']
elif args.model_name == 'realesr-general-x4v3': # x4 VGG-style model (S size)
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
netscale = 4
file_url = [
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth',
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth'
]
# determine model paths
if args.model_path is not None:
model_path = args.model_path
else:
model_path = os.path.join('weights', args.model_name + '.pth')
if not os.path.isfile(model_path):
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
for url in file_url:
# model_path will be updated
model_path = load_file_from_url(
url=url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
# use dni to control the denoise strength
dni_weight = None
if args.model_name == 'realesr-general-x4v3' and args.denoise_strength != 1:
wdn_model_path = model_path.replace('realesr-general-x4v3', 'realesr-general-wdn-x4v3')
model_path = [model_path, wdn_model_path]
dni_weight = [args.denoise_strength, 1 - args.denoise_strength]
# restorer
upsampler = RealESRGANer(
scale=netscale,
model_path=model_path,
dni_weight=dni_weight,
model=model,
tile=args.tile,
tile_pad=args.tile_pad,
pre_pad=args.pre_pad,
half=not args.fp32,
gpu_id=args.gpu_id)
if args.face_enhance: # Use GFPGAN for face enhancement
from gfpgan import GFPGANer
face_enhancer = GFPGANer(
model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
upscale=args.outscale,
arch='clean',
channel_multiplier=2,
bg_upsampler=upsampler)
os.makedirs(args.output, exist_ok=True)
if os.path.isfile(args.input):
paths = [args.input]
else:
paths = sorted(glob.glob(os.path.join(args.input, '*')))
for idx, path in enumerate(paths):
imgname, extension = os.path.splitext(os.path.basename(path))
print('Testing', idx, imgname)
img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
if len(img.shape) == 3 and img.shape[2] == 4:
img_mode = 'RGBA'
else:
img_mode = None
try:
if args.face_enhance:
_, _, output = face_enhancer.enhance(img, has_aligned=False, only_center_face=False, paste_back=True)
else:
output, _ = upsampler.enhance(img, outscale=args.outscale)
except RuntimeError as error:
print('Error', error)
print('If you encounter CUDA out of memory, try to set --tile with a smaller number.')
else:
if args.ext == 'auto':
extension = extension[1:]
else:
extension = args.ext
if img_mode == 'RGBA': # RGBA images should be saved in png format
extension = 'png'
if args.suffix == '':
save_path = os.path.join(args.output, f'{imgname}.{extension}')
else:
save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
cv2.imwrite(save_path, output)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,13 @@
realesrgan @git+https://github.com/xinntao/Real-ESRGAN.git
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0
torchvision
basicsr>=1.4.2
facexlib>=0.2.5
gfpgan>=1.3.5
numpy
opencv-python
Pillow
tqdm
git+https://github.com/huggingface/diffusers.git

View File

@@ -0,0 +1,12 @@
""" RealESRGan
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,100 @@
def build_lora_xl(lora, prompt, lora_weight):
existing_lora = False
if lora == "3drenderstyle":
if lora_weight == "":
lora_weight = "1"
prompt = "3d style, 3d render, " + prompt + " <lora:3d_render_style_xl:"+lora_weight+">"
existing_lora = True
if lora == "psychedelicnoir":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " <lora:Psychedelic_Noir__sdxl:"+lora_weight+">>"
existing_lora = True
if lora == "wojak":
if lora_weight == "":
lora_weight = "1"
prompt = "<lora:wojak_big:"+lora_weight+">, " + prompt + ", wojak"
existing_lora = True
if lora == "dreamarts":
if lora_weight == "":
lora_weight = "1"
prompt = "<lora:DreamARTSDXL:"+lora_weight+">, " + prompt
existing_lora = True
if lora == "voxel":
if lora_weight == "":
lora_weight = "1"
prompt = "voxel style, " + prompt + " <lora:last:"+lora_weight+">"
existing_lora = True
if lora == "kru3ger":
if lora_weight == "":
lora_weight = "1"
prompt = "kru3ger_style, " + prompt + "<lora:sebastiankrueger-kru3ger_style-000007:"+lora_weight+">"
existing_lora = True
if lora == "inkpunk":
if lora_weight == "":
lora_weight = "0.5"
prompt = "inkpunk style, " + prompt + " <lora:IPXL_v2:"+lora_weight+">"
existing_lora = True
if lora == "inkscenery":
if lora_weight == "":
lora_weight = "1"
prompt = " ink scenery, " + prompt + " <lora:ink_scenery_xl:"+lora_weight+">"
existing_lora = True
if lora == "inkpainting":
if lora_weight == "":
lora_weight = "0.7"
prompt = "painting style, " + prompt + " <lora:Ink_Painting-000006::"+lora_weight+">,"
existing_lora = True
if lora == "timburton":
if lora_weight == "":
lora_weight = "1.27"
pencil_weight = "1.15"
prompt = prompt + " (hand drawn with pencil"+pencil_weight+"), (tim burton style:"+lora_weight+")"
existing_lora = True
if lora == "pixelart":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " (flat shading:1.2), (minimalist:1.4), <lora:pixelbuildings128-v2:"+lora_weight+"> "
existing_lora = True
if lora == "pepe":
if lora_weight == "":
lora_weight = "0.8"
prompt = prompt + " ,<lora:DD-pepe-v2:"+lora_weight+"> pepe"
existing_lora = True
if lora == "bettertext":
if lora_weight == "":
lora_weight = "1"
prompt = prompt + " ,<lora:BetterTextRedmond:"+lora_weight+">"
existing_lora = True
if lora == "mspaint":
if lora_weight == "":
lora_weight = "1"
prompt = "MSPaint drawing " + prompt +">"
existing_lora = True
if lora == "woodfigure":
if lora_weight == "":
lora_weight = "0.7"
prompt = prompt + ",woodfigurez,artistic style <lora:woodfigurez-sdxl:"+lora_weight+">"
existing_lora = True
if lora == "fireelement":
prompt = prompt + ",composed of fire elements, fire element"
existing_lora = True
return lora, prompt, existing_lora

View File

@@ -0,0 +1,35 @@
# Stable Diffusion XL
This modules provides image generation based on prompts
* https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
## Options
- `model`: string, identifier of the model to choose
- `stabilityai/stable-diffusion-xl-base-1.0`: Default Stable Diffusion XL model
- `ratio`: Ratio of the output image
- `1-1` ,`4-3`, `16-9`, `16-10`, `3-4`,`9-16`,`10-16`
- `high_noise_frac`: Denoising factor
- `n_steps`: how many iterations should be performed
## Example payload
```python
payload = {
'trainerFilePath': 'modules\\stablediffusionxl\\stablediffusionxl.trainer',
'server': '127.0.0.1',
'data' = '[{"id":"input_prompt","type":"input","src":"user:text","prompt":"' + prompt +'","active":"True"},{"id":"negative_prompt","type":"input","src":"user:text","prompt":"' + negative_prompt +'","active":"True"},{"id":"output_image","type":"output","src":"file:image","uri":"' + outputfile+'","active":"True"}]'
'optStr': 'model=stabilityai/stable-diffusion-xl-base-1.0;ratio=4-3'
}
import requests
url = 'http://127.0.0.1:53770/predict'
headers = {'Content-type': 'application/x-www-form-urlencoded'}
requests.post(url, headers=headers, data=payload)
```

View File

@@ -0,0 +1,9 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0
compel~=2.0.2
git+https://github.com/huggingface/diffusers.git
transformers
accelerate
numpy
omegaconf

View File

@@ -0,0 +1,176 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
# Add local dir to path for relative imports
sys.path.insert(0, os.path.dirname(__file__))
from nova_utils.interfaces.server_module import Processor
from nova_utils.utils.cache_utils import get_file
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
from diffusers.utils import load_image
import numpy as np
from PIL import Image as PILImage
from lora import build_lora_xl
# Setting defaults
_default_options = {"model": "stabilityai/stable-diffusion-xl-refiner-1.0", "strength" : "0.58", "guidance_scale" : "11.0", "n_steps" : "30", "lora": "","lora_weight": "0.5" }
# TODO: add log infos,
class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
#input_image_url = self.current_session.input_data['input_image_url'].data
#input_image_url = ' '.join(input_image_url)
input_image = self.current_session.input_data['input_image'].data
input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt)
# print("Input Image: " + input_image_url)
print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt)
try:
model = self.options['model']
lora = self.options['lora']
#init_image = load_image(input_image_url).convert("RGB")
init_image = PILImage.fromarray(input_image)
mwidth = 1024
mheight = 1024
w = mwidth
h = mheight
if init_image.width > init_image.height:
scale = float(init_image.height / init_image.width)
w = mwidth
h = int(mheight * scale)
elif init_image.width < init_image.height:
scale = float(init_image.width / init_image.height)
w = int(mwidth * scale)
h = mheight
else:
w = mwidth
h = mheight
init_image = init_image.resize((w, h))
if lora != "" and lora != "None":
print("Loading lora...")
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "" )
from diffusers import AutoPipelineForImage2Image
import torch
#init_image = init_image.resize((int(w/2), int(h/2)))
pipe = AutoPipelineForImage2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16).to("cuda")
if existing_lora:
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "":
return "Lora not found"
lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
pipe.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path))
seed = 20000
generator = torch.manual_seed(seed)
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
image = pipe(
prompt=input_prompt,
negative_prompt=negative_prompt,
image=init_image,
generator=generator,
num_inference_steps=int(self.options['n_steps']),
image_guidance_scale=float(self.options['guidance_scale']),
strength=float(str(self.options['strength']))).images[0]
elif model == "stabilityai/stable-diffusion-xl-refiner-1.0":
pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
model, torch_dtype=torch.float16, variant="fp16",
use_safetensors=True
)
n_steps = int(self.options['n_steps'])
transformation_strength = float(self.options['strength'])
cfg_scale = float(self.options['guidance_scale'])
pipe = pipe.to(self.device)
image = pipe(input_prompt, image=init_image,
negative_prompt=negative_prompt, num_inference_steps=n_steps, strength=transformation_strength, guidance_scale=cfg_scale).images[0]
elif model == "timbrooks/instruct-pix2pix":
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model, torch_dtype=torch.float16,
safety_checker=None)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to(self.device)
n_steps = int(self.options['n_steps'])
cfg_scale = float(self.options['guidance_scale'])
image = pipe(input_prompt, negative_prompt=negative_prompt, image=init_image, num_inference_steps=n_steps, image_guidance_scale=cfg_scale).images[0]
if torch.cuda.is_available():
del pipe
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
numpy_array = np.array(image)
return numpy_array
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates

View File

@@ -0,0 +1,26 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from existing image based on a prompt" is_iterable="False">
<io type="input" id="input_image" data="Image" default_value=""/>
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value=""/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl-img2img.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-refiner-1.0,timbrooks/instruct-pix2pix};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure};{strength:STRING:0.8};{guidance_scale:STRING:11.0};{n_steps:STRING:30}"/>
</trainer>

View File

@@ -0,0 +1,242 @@
"""StableDiffusionXL Module
"""
import gc
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from ssl import Options
from nova_utils.interfaces.server_module import Processor
from diffusers import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, logging
from compel import Compel, ReturnedEmbeddingsType
from nova_utils.utils.cache_utils import get_file
import numpy as np
PYTORCH_ENABLE_MPS_FALLBACK = 1
import torch
from PIL import Image
from lora import build_lora_xl
logging.disable_progress_bar()
logging.enable_explicit_format()
#logging.set_verbosity_info()
# Setting defaults
_default_options = {"model": "stabilityai/stable-diffusion-xl-base-1.0", "ratio": "1-1", "width": "", "height":"", "high_noise_frac" : "0.8", "n_steps" : "35", "lora" : "" }
# TODO: add log infos,
class StableDiffusionXL(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.current_session = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_iter) -> dict:
self._device = ("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_built() else "cpu"))
self.variant = "fp16"
self.torch_d_type = torch.float16
self.ds_iter = ds_iter
current_session_name = self.ds_iter.session_names[0]
self.current_session = self.ds_iter.sessions[current_session_name]['manager']
input_prompt = self.current_session.input_data['input_prompt'].data
input_prompt = ' '.join(input_prompt)
negative_prompt = self.current_session.input_data['negative_prompt'].data
negative_prompt = ' '.join(negative_prompt)
new_width = 0
new_height = 0
print("Input prompt: " + input_prompt)
print("Negative prompt: " + negative_prompt)
try:
if self.options['width'] != "" and self.options['height'] != "":
new_width = int(self.options['width'])
new_height = int(self.options['height'])
ratiow, ratioh = self.calculate_aspect(new_width, new_height)
print("Ratio:" + str(ratiow) + ":" + str(ratioh))
else:
ratiow = str(self.options['ratio']).split('-')[0]
ratioh =str(self.options['ratio']).split('-')[1]
model = self.options["model"]
lora = self.options["lora"]
mwidth = 1024
mheight = 1024
height = mheight
width = mwidth
ratiown = int(ratiow)
ratiohn= int(ratioh)
if ratiown > ratiohn:
height = int((ratiohn/ratiown) * float(width))
elif ratiown < ratiohn:
width = int((ratiown/ratiohn) * float(height))
elif ratiown == ratiohn:
width = height
print("Processing Output width: " + str(width) + " Output height: " + str(height))
if model == "stabilityai/stable-diffusion-xl-base-1.0":
base = StableDiffusionXLPipeline.from_pretrained(model, torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
print("Loaded model: " + model)
else:
model_uri = [ x for x in self.trainer.meta_uri if x.uri_id == model][0]
if str(model_uri) == "":
return "Model not found"
model_path = get_file(
fname=str(model_uri.uri_id) + ".safetensors",
origin=model_uri.uri_url,
file_hash=model_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
print(str(model_path))
base = StableDiffusionXLPipeline.from_single_file(str(model_path), torch_dtype=self.torch_d_type, variant=self.variant, use_safetensors=True).to(self.device)
print("Loaded model: " + model)
if lora != "" and lora != "None":
print("Loading lora...")
lora, input_prompt, existing_lora = build_lora_xl(lora, input_prompt, "")
if existing_lora:
lora_uri = [ x for x in self.trainer.meta_uri if x.uri_id == lora][0]
if str(lora_uri) == "":
return "Lora not found"
lora_path = get_file(
fname=str(lora_uri.uri_id) + ".safetensors",
origin=lora_uri.uri_url,
file_hash=lora_uri.uri_hash,
cache_dir=os.getenv("CACHE_DIR"),
tmp_dir=os.getenv("TMP_DIR"),
)
base.load_lora_weights(str(lora_path))
print("Loaded Lora: " + str(lora_path))
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-refiner-1.0",
text_encoder_2=base.text_encoder_2,
vae=base.vae,
torch_dtype=self.torch_d_type,
use_safetensors=True,
variant=self.variant,
)
compel_base = Compel(
tokenizer=[base.tokenizer, base.tokenizer_2],
text_encoder=[base.text_encoder, base.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[False, True],
)
compel_refiner = Compel(
tokenizer=[refiner.tokenizer_2],
text_encoder=[refiner.text_encoder_2],
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
requires_pooled=[True])
conditioning, pooled = compel_base(input_prompt)
negative_conditioning, negative_pooled = compel_base(negative_prompt)
conditioning_refiner, pooled_refiner = compel_refiner(input_prompt)
negative_conditioning_refiner, negative_pooled_refiner = compel_refiner(
negative_prompt)
n_steps = int(self.options['n_steps'])
high_noise_frac = float(self.options['high_noise_frac'])
#base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)
img = base(
prompt_embeds=conditioning,
pooled_prompt_embeds=pooled,
negative_prompt_embeds=negative_conditioning,
negative_pooled_prompt_embeds=negative_pooled,
width=width,
height=height,
num_inference_steps=n_steps,
denoising_end=high_noise_frac,
output_type="latent",
).images
if torch.cuda.is_available():
del base
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
refiner.to(self.device)
# refiner.enable_model_cpu_offload()
image = refiner(
prompt_embeds=conditioning_refiner,
pooled_prompt_embeds=pooled_refiner,
negative_prompt_embeds=negative_conditioning_refiner,
negative_pooled_prompt_embeds=negative_pooled_refiner,
num_inference_steps=n_steps,
denoising_start=high_noise_frac,
num_images_per_prompt=1,
image=img,
).images[0]
if torch.cuda.is_available():
del refiner
gc.collect()
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
if new_height != 0 or new_width != 0 and (new_width != mwidth or new_height != mheight) :
print("Resizing to width: " + str(new_width) + " height: " + str(new_height))
image = image.resize((new_width, new_height), Image.LANCZOS)
numpy_array = np.array(image)
return numpy_array
except Exception as e:
print(e)
sys.stdout.flush()
return "Error"
def calculate_aspect(self, width: int, height: int):
def gcd(a, b):
"""The GCD (greatest common divisor) is the highest number that evenly divides both width and height."""
return a if b == 0 else gcd(b, a % b)
r = gcd(width, height)
x = int(width / r)
y = int(height / r)
return x, y
def to_output(self, data: dict):
self.current_session.output_data_templates['output_image'].data = data
return self.current_session.output_data_templates

View File

@@ -0,0 +1,41 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="ImageGeneration" description="Generates Image from prompt" is_iterable="False">
<io type="input" id="input_prompt" data="prompt" default_value=""/>
<io type="input" id="negative_prompt" data="prompt" default_value=""/>
<io type="output" id="output_image" data="image" default_value="sd.jpg"/>
<uri id="juggernaut" url="https://civitai.com/api/download/models/198530" hash='1FE6C7EC54C786040CDABC7B4E89720069D97096922E20D01F13E7764412B47F'/>
<uri id="dynavision" url="https://civitai.com/api/download/models/198962" hash='FD9CDC26C3B6D1F30BACBC435E455E925E35622E4873CCFC55FD1C88E980585E'/>
<uri id="colossus" url="https://civitai.com/api/download/models/213982" hash='5A7E9DD581B3A9EDF2ED0D9FB2036C389325CD7BA13A754CE19BEEDBB69CEB73'/>
<uri id="newreality" url="https://civitai.com/api/download/models/232565" hash='06A85616411135F8CAF161F71CB0948F79E85750E4AF36A885C75485A9B68E2F'/>
<uri id="unstable" url="https://civitai.com/api/download/models/209647" hash='05C9E2274A74AE6957B986C92E5699FDFACFFD7EE24CED0D33CB696DE1A6C98B'/>
<uri id="fantastic" url="https://civitai.com/api/download/models/143722" hash='B0C590726969EF93BC4136C167D339A277946787223BFAD7B1DC9A68A4F183FC'/>
<uri id="mohawk" url="https://civitai.com/api/download/models/207419" hash='0248CA08AA5D5B342355173677C77ADD42E41ECEC3B6B6E52E9C9C471C30C508'/>
<uri id="dreamshaper" url="https://civitai.com/api/download/models/126688" hash='0F1B80CFE81B9C3BDE7FDCBF6898897B2811B27BE1DF684583C3D85CBC9B1FA4'/>
<uri id="timeless" url="https://civitai.com/api/download/models/198246" hash='A771B2B5E8D2A3C23A3A65F9A51E675F253F101C34BE7DC06FD18D534579D8F8'/>
<uri id="crystalclear" url="https://civitai.com/api/download/models/133832" hash='0B76532E03A1BAC388CBF559AF00384ABCBD2B5B3F8834158AE4B1B9146A3843'/>
<uri id="chroma" url="https://civitai.com/api/download/models/169740" hash='D2B9E5240C4BC74BB98063CEE16671FDC08D5B7BF197074A0C896E5DBB25BD24'/>
<uri id="bluepencil" url="https://civitai.com/api/download/models/212090" hash='C4D7E01814F0EED57A7120629D3017AC018AD7CDECB48F7FBE6B12F9C9C4D6B9'/>
<uri id="voxel" url="https://civitai.com/api/download/models/128609" hash='7D9A5F11E1B38D97F75D2B84BFB5BB3BF95CD0E5F2500B002D13374EB4F88B5C'/>
<uri id="inkpunk" url="https://civitai.com/api/download/models/201552" hash='6BD1A90A93AE288D959B6A90738EB2DB79EC26936F460750D8379C78554A8D53'/>
<uri id="3drenderstyle" url="https://civitai.com/api/download/models/218206" hash='C4AD16F1B116F10BBB4070D3ABD0249F799B609DAD8BC8CF92A0AC94A8DE8133'/>
<uri id="psychedelicnoir" url="https://civitai.com/api/download/models/140194" hash='896B6B4B6DDC4A28C1CB69359944F04AEBF5954B7A5909FD9629E5549FFC2BDF'/>
<uri id="dreamarts" url="https://civitai.com/api/download/models/137124" hash='6A8A5968FB31FB6D83E8E0FE390CF2F3693A35FC4CF247A794B0B261E166B19B'/>
<uri id="wojak" url="https://civitai.com/api/download/models/140160" hash='0BD68F0199197CD9D8377A30E9F288479721D1838228A4484272EFF09A479209'/>
<uri id="kru3ger" url="https://civitai.com/api/download/models/142129" hash='AE92E349446A74D44ABDB1441AF648B2078E4FBB8F46C7158AD18120553DDC3D'/>
<uri id="timburton" url="https://civitai.com/api/download/models/207862" hash='62C229B13622B19928B2D5B9FA5988E612C6DC3060D3AACFE720F43D034D9870'/>
<uri id="pixelart" url="https://civitai.com/api/download/models/135931" hash='BBF3D8DEFBFB3FB71331545225C0CF50C74A748D2525F7C19EBB8F74445DE274'/>
<uri id="pepe" url="https://civitai.com/api/download/models/181917" hash='CBE1E1C746301801613CB331F2051AD16FF724DDA764A54135AA89D909067B97'/>
<uri id="bettertext" url="https://civitai.com/api/download/models/163224" hash='AB1EE501387633DFBFD05970D7BBC0921D23CA804FFC0E717828A8796E8D63CF'/>
<uri id="mspaint" url="https://civitai.com/api/download/models/205793" hash='C9503F84E12F2B016FFB8BA689220BA38BBC511573C64AC9BD0ADC853780DA5D'/>
<uri id="woodfigure" url="https://civitai.com/api/download/models/207919" hash='9E8D768E0D707867717EBF0CB93EBF65431CC5A105982FA5FFD162D78E20B8C1'/>
<uri id="fireelement" url="https://civitai.com/api/download/models/175257" hash='CB04B04F2D90B0A168AFFB26CC7C6F76834FEB8C2F0F30ABE35784084D1FFFBE'/>
</meta>
<model create="StableDiffusionXL" script="stablediffusionxl.py" optstr="{model:LIST:stabilityai/stable-diffusion-xl-base-1.0,juggernaut,dynavision,colossus,newreality,unstable,fantastic,mohawk,dreamshaper,timeless,crystalclear,chroma,bluepencil};{lora:LIST:None,voxel,inkpunk,3drenderstyle,psychedelicnoir,dreamarts,kru3ger,wojak,timburton,pixelart,pepe,bettertext,mspaint,woodfigure,fireelement};{width:STRING:1024};{height:STRING:1024};{high_noise_frac:STRING:0.8};{n_steps:STRING:35}"/>
</trainer>

View File

@@ -0,0 +1,12 @@
""" Stable Diffusion XL
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '0'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,52 @@
# WhisperX
This modules provides fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and
speaker diarization.
* https://github.com/m-bain/whisperX
## Options
- `model`: string, identifier of the model to choose, sorted ascending in required (V)RAM:
- `tiny`, `tiny.en`
- `base`, `base.en`
- `small`, `small.en`
- `medium`, `medium.en`
- `large-v1`
- `large-v2`
- `alignment_mode`: string, alignment method to use
- `raw` Segments as identified by Whisper
- `segment` Improved segmentation using separate alignment model. Roughly equivalent to sentence alignment.
- `word` Improved segmentation using separate alignment model. Equivalent to word alignment.
- `language`: language code for transcription and alignment models. Supported languages:
- `ar`, `cs`, `da`, `de`, `el`, `en`, `es`, `fa`, `fi`, `fr`, `he`, `hu`, `it`, `ja`, `ko`, `nl`, `pl`, `pt`, `ru`, `te`, `tr`, `uk`, `ur`, `vi`, `zh`
- `None`: auto-detect language from first 30 seconds of audio
- `batch_size`: how many samples to process at once, increases speed but also (V)RAM consumption
## Examples
### Request
```python
import requests
import json
payload = {
"jobID" : "whisper_transcript",
"data": json.dumps([
{"src":"file:stream:audio", "type":"input", "id":"audio", "uri":"path/to/my/file.wav"},
{"src":"file:annotation:free", "type":"output", "id":"transcript", "uri":"path/to/my/transcript.annotation"}
]),
"trainerFilePath": "modules\\whisperx\\whisperx_transcript.trainer",
}
url = 'http://127.0.0.1:8080/process'
headers = {'Content-type': 'application/x-www-form-urlencoded'}
x = requests.post(url, headers=headers, data=payload)
print(x.text)
```

View File

@@ -0,0 +1,7 @@
hcai-nova-utils>=1.5.5
--extra-index-url https://download.pytorch.org/whl/cu118
torch==2.1.0+cu118
torchvision>= 0.15.1+cu118
torchaudio >= 2.0.0+cu118
pyannote-audio @ git+https://github.com/shelm/pyannote-audio.git@d7b4de3
whisperx @ git+https://github.com/m-bain/whisperx.git@49e0130

View File

@@ -0,0 +1,12 @@
""" WhisperX
"""
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = '1'
_MINOR_VERSION = '0'
_PATCH_VERSION = '1'
__version__ = '.'.join([
_MAJOR_VERSION,
_MINOR_VERSION,
_PATCH_VERSION,
])

View File

@@ -0,0 +1,124 @@
"""WhisperX Module
"""
from nova_utils.interfaces.server_module import Processor
import sys
# Setting defaults
_default_options = {"model": "tiny", "alignment_mode": "segment", "batch_size": "16", 'language': None, 'compute_type': 'float16'}
# supported language codes, cf. whisperx/alignment.py
# DEFAULT_ALIGN_MODELS_TORCH.keys() | DEFAULT_ALIGN_MODELS_HF.keys() | {None}
# {'vi', 'uk', 'pl', 'ur', 'ru', 'ko', 'en', 'zh', 'es', 'it', 'el', 'te', 'da', 'he', 'fa', 'pt', 'de',
# 'fr', 'tr', 'nl', 'cs', 'hu', 'fi', 'ar', 'ja', None}
class WhisperX(Processor):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.options = _default_options | self.options
self.device = None
self.ds_iter = None
self.session_manager = None
# IO shortcuts
self.input = [x for x in self.model_io if x.io_type == "input"]
self.output = [x for x in self.model_io if x.io_type == "output"]
assert len(self.input) == 1 and len(self.output) == 1
self.input = self.input[0]
self.output = self.output[0]
def process_data(self, ds_manager) -> dict:
import whisperx
import torch
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.session_manager = self.get_session_manager(ds_manager)
input_audio = self.session_manager.input_data['audio']
# sliding window will be applied by WhisperX
audio = whisperx.load_audio(input_audio.meta_data.file_path)
# transcribe with original whisper
try:
model = whisperx.load_model(self.options["model"], self.device, compute_type=self.options['compute_type'],
language=self.options['language'])
except ValueError:
print(f'Your hardware does not support {self.options["compute_type"]} - fallback to float32')
sys.stdout.flush()
model = whisperx.load_model(self.options["model"], self.device, compute_type='float32',
language=self.options['language'])
result = model.transcribe(audio, batch_size=int(self.options["batch_size"]))
# delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model
if not self.options["alignment_mode"] == "raw":
# load alignment model and metadata
model_a, metadata = whisperx.load_align_model(
language_code=result["language"], device=self.device
)
# align whisper output
result_aligned = whisperx.align(
result["segments"], model_a, metadata, audio, self.device
)
result = result_aligned
# delete model if low on GPU resources
import gc; gc.collect(); torch.cuda.empty_cache(); del model_a
return result
def to_output(self, data: dict):
def _fix_missing_timestamps(data):
"""
https://github.com/m-bain/whisperX/issues/253
Some characters might miss timestamps and recognition scores. This function adds estimated time stamps assuming a fixed time per character of 65ms.
Confidence for each added timestamp will be 0.
Args:
data (dictionary): output dictionary as returned by process_data
"""
last_end = 0
for s in data["segments"]:
for w in s["words"]:
if "end" in w.keys():
last_end = w["end"]
else:
#TODO: rethink lower bound for confidence; place word centred instead of left aligned
w["start"] = last_end
last_end += 0.065
w["end"] = last_end
#w["score"] = 0.000
w['score'] = _hmean([x['score'] for x in s['words'] if len(x) == 4])
def _hmean(scores):
if len(scores) > 0:
prod = scores[0]
for s in scores[1:]:
prod *= s
prod = prod**(1/len(scores))
else:
prod = 0
return prod
if (
self.options["alignment_mode"] == "word"
or self.options["alignment_mode"] == "segment"
):
_fix_missing_timestamps(data)
if self.options["alignment_mode"] == "word":
anno_data = [
(w["start"], w["end"], w["word"], w["score"])
for w in data["word_segments"]
]
else:
anno_data = [
#(w["start"], w["end"], w["text"], _hmean([x['score'] for x in w['words']])) for w in data["segments"]
(w["start"], w["end"], w["text"], 1) for w in data["segments"] # alignment 'raw' no longer contains a score(?)
]
# convert to milliseconds
anno_data = [(x[0]*1000, x[1]*1000, x[2], x[3]) for x in anno_data]
out = self.session_manager.output_data_templates[self.output.io_id]
out.data = anno_data
return self.session_manager.output_data_templates

View File

@@ -0,0 +1,9 @@
<?xml version="1.0" ?>
<trainer ssi-v="5">
<info trained="true" seed="1234"/>
<meta backend="nova-server" category="Transcript" description="Transcribes audio" is_iterable="False">
<io type="input" id="audio" data="stream:Audio" default_value="audio"/>
<io type="output" id="transcript" data="annotation:Free" default_value="transcript"/>
</meta>
<model create="WhisperX" script="whisperx_transcript.py" optstr="{model:LIST:base,tiny,small,medium,large-v1,large-v2,tiny.en,base.en,small.en,medium.en};{alignment_mode:LIST:segment,word,raw};{language:LIST:None,en,de,ar,cs,da,el,es,fa,fi,fr,he,hu,it,ja,ko,nl,pl,pt,ru,te,tr,uk,ur,vi,zh};{batch_size:STRING:16};{compute_type:LIST:float16,float32,int8}"/>
</trainer>

View File

@@ -0,0 +1,2 @@
call venv/Scripts/activate
nova-server

View File

@@ -0,0 +1,3 @@
python -m venv venv
call venv/Scripts/activate
pip install hcai-nova-server

View File

@@ -11,7 +11,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster
"""
This file contains basic calling functions for ML tasks that are outsourced to nova server. It is an Open-Source backend
that enables running models locally based on preefined modules, by accepting a request form.
that enables running models locally based on preefined modules, by accepting a request.
Modules are deployed in in separate virtual environments so dependencies won't conflict.
"""

View File

@@ -1,9 +1,8 @@
import importlib
import json
import os
import subprocess
from datetime import timedelta
from pathlib import Path
from sys import platform
from nostr_sdk import PublicKey, Keys, Client, Tag, Event, EventBuilder, Filter, HandleNotification, Timestamp, \
init_logger, LogLevel, Options, nip04_encrypt
@@ -475,8 +474,11 @@ class DVM:
request_form = dvm.create_request_from_nostr_event(job_event, self.client, self.dvm_config)
if dvm_config.USE_OWN_VENV:
python_bin = (r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
+ "/bin/python")
python_location = "/bin/python"
if platform == "win32":
python_location = "/Scripts/python"
python_bin = ( r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
+ python_location)
retcode = subprocess.call([python_bin, dvm_config.SCRIPT,
'--request', json.dumps(request_form),
'--identifier', dvm_config.IDENTIFIER,

View File

@@ -3,6 +3,7 @@ import os
import subprocess
from subprocess import run
import sys
from sys import platform
from threading import Thread
from venv import create
from nostr_sdk import Keys
@@ -59,15 +60,18 @@ class DVMTaskInterface:
def install_dependencies(self, dvm_config):
if dvm_config.SCRIPT != "":
if self.dvm_config.USE_OWN_VENV:
dir = r'cache/venvs/' + os.path.basename(dvm_config.SCRIPT).split(".py")[0]
pip_location = 'bin/pip'
if platform == "win32":
pip_location = dir + '/Scripts/pip'
if not os.path.isdir(dir):
print(dir)
print("Creating Venv: " + dir)
create(dir, with_pip=True, upgrade_deps=True)
self.dependencies.append(("nostr-dvm", "nostr-dvm"))
for (module, package) in self.dependencies:
print("Installing Venv Module: " + module)
run(["bin/pip", "install", "--force-reinstall", package], cwd=dir)
run([pip_location, "install", "--upgrade", package], cwd=dir)
else:
for module, package in self.dependencies:
if module != "nostr-dvm":

View File

@@ -6,12 +6,27 @@ Reusable backend functions can be defined in backends (e.g. API calls)
Current List of Tasks:
| Module | Kind | Description | Backend |
|-------------------------|------|------------------------------------------------|-------------|
| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local |
| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google | googleAPI |
| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI |
| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI |
| ImageGenerationDALLE | 5100 | Generates an Image with Dall-E | openAI |
| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI |
| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local |
| Module | Kind | Description | Backend |
|------------------------------|--------------|------------------------------------------------------------|------------------|
| TextExtractionPDF | 5000 | Extracts Text from a PDF file | local |
| SpeechToTextGoogle | 5000 | Extracts Speech from Media files via Google Services | googleAPI |
| SpeechToTextWhisperX | 5000 | Extracts Speech from Media files via local WhisperX | nserver |
| ImageInterrogator | 5000 | Extracts Prompts from Images | nserver |
| TranslationGoogle | 5002 | Translates Inputs to another language | googleAPI |
| TranslationLibre | 5002 | Translates Inputs to another language | libreAPI |
| TextGenerationLLMLite | 5050 | Chat with LLM backends like Ollama, ChatGPT etc | local/api/openai |
| ImageGenerationSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | nserver |
| ImageGenerationSDXLIMG2IMG | 5100 | Generates an Image from an Image with Stable Diffusion XL | nserver |
| ImageGenerationReplicateSDXL | 5100 | Generates an Image from Prompt with Stable Diffusion XL | replicate |
| ImageGenerationMLX | 5100 | Generates an Image with Stable Diffusion 2.1 on M1/2/3 Mac | mlx |
| ImageGenerationDALLE | 5100 | Generates an Image with OpenAI's Dall-E | openAI |
| ImageUpscale | 5100 | Upscales an Image | nserver |
| MediaConverter | 5200 | Converts a link of a media file and uploads it | openAI |
| VideoGenerationReplicateSVD | 5202 (inoff) | Generates a Video from an Image | replicate |
| TextToSpeech | 5250 (inoff) | Generate Audio from a prompt | local |
| TrendingNotesNostrBand | 5300 | Show trending notes on nostr.band | nostr.band api |
| DiscoverInactiveFollows | 5301 | Find inactive Nostr users | local |
| AdvancedSearch | 5302 (inoff) | Search Content on nostr.band | local |
Kinds with (inoff) are suggestions and not merged yet and might change in the future.
Backends might require to add an API key to the .env file or run an external server/framework the dvm will communicate with.

View File

@@ -47,8 +47,8 @@ class AdvancedSearch(DVMTaskInterface):
# default values
user = ""
since_days = 800 #days ago
until_days = 0 #days ago
since_days = 800 # days ago
until_days = 0 # days ago
search = ""
max_results = 20
@@ -98,11 +98,14 @@ class AdvancedSearch(DVMTaskInterface):
search_until = Timestamp.from_secs(dif)
if options["user"] == "":
notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"])
notes_filter = Filter().kind(1).search(options["search"]).since(search_since).until(search_until).limit(
options["max_results"])
elif options["search"] == "":
notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until(search_until).limit(options["max_results"])
notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).since(search_since).until(
search_until).limit(options["max_results"])
else:
notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since(search_since).until(search_until).limit(options["max_results"])
notes_filter = Filter().kind(1).author(PublicKey.from_hex(options["user"])).search(options["search"]).since(
search_since).until(search_until).limit(options["max_results"])
events = cli.get_events_of([notes_filter], timedelta(seconds=5))
@@ -116,8 +119,6 @@ class AdvancedSearch(DVMTaskInterface):
return json.dumps(result_list)
def post_process(self, result, event):
"""Overwrite the interface function to return a social client readable format, if requested"""
for tag in event.tags():
@@ -170,9 +171,9 @@ def build_example(name, identifier, admin_config):
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return AdvancedSearch(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
admin_config=admin_config)
def process_venv():
@@ -182,5 +183,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -8,7 +8,6 @@ from nostr_dvm.utils.nip89_utils import NIP89Config
from nostr_dvm.utils.mediasource_utils import organize_input_media_data
from nostr_dvm.utils.output_utils import upload_media_to_hoster
"""
This File contains a Module convert media locally
@@ -98,10 +97,10 @@ def build_example(name, identifier, admin_config):
nip89config = NIP89Config()
return MediaConverter(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -111,4 +110,4 @@ def process_venv():
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -163,6 +163,7 @@ class DiscoverInactiveFollows(DVMTaskInterface):
# if not text/plain, don't post-process
return result
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -196,6 +197,7 @@ def build_example(name, identifier, admin_config):
return DiscoverInactiveFollows(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -203,5 +205,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -107,7 +107,6 @@ class ImageGenerationDALLE(DVMTaskInterface):
n=int(options['number']),
)
image_url = response.data[0].url
# rehost the result instead of relying on the openai link
response = requests.get(image_url)
@@ -162,8 +161,8 @@ def process_venv():
result = dvm.process(json.loads(args.request))
time.sleep(10)
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -6,7 +6,6 @@ from PIL import Image
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -153,5 +152,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -12,7 +12,7 @@ from nostr_dvm.utils.output_utils import upload_media_to_hoster
from nostr_dvm.utils.zap_utils import get_price_per_sat
"""
This File contains a Module to generate an Image on replicate and receive results back.
This File contains a Module to generate an Image on Macs with M1/M2/M3 chips and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
@@ -95,7 +95,7 @@ class ImageGenerationMLX(DVMTaskInterface):
def process(self, request_form):
try:
import mlx.core as mx
from backends.mlx.stable_diffusion import StableDiffusion
from nostr_dvm.backends.mlx.modules.stable_diffusion import StableDiffusion
options = DVMTaskInterface.set_options(request_form)
sd = StableDiffusion()

View File

@@ -0,0 +1,206 @@
import json
from multiprocessing.pool import ThreadPool
from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a module to transform Text input on n-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable
-lora # loras (weights on top of models) voxel,
"""
class ImageGenerationSDXL(DVMTaskInterface):
KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
TASK: str = "text-to-image"
FIX_COST: float = 70
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type != "text":
return False
elif tag.as_vec()[0] == 'output':
output = tag.as_vec()[1]
if (output == "" or
not (output == "image/png" or "image/jpg"
or output == "image/png;format=url" or output == "image/jpg;format=url")):
print("Output format not supported, skipping..")
return False
return True
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl.trainer'
prompt = ""
negative_prompt = ""
if self.options.get("default_model") and self.options.get("default_model") != "":
model = self.options['default_model']
else:
model = "stabilityai/stable-diffusion-xl-base-1.0"
ratio_width = "1"
ratio_height = "1"
width = ""
height = ""
if self.options.get("default_lora") and self.options.get("default_lora") != "":
lora = self.options['default_lora']
else:
lora = ""
lora_weight = ""
strength = ""
guidance_scale = ""
for tag in event.tags():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
if input_type == "text":
prompt = tag.as_vec()[1]
elif tag.as_vec()[0] == 'param':
print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
if tag.as_vec()[1] == "negative_prompt":
negative_prompt = tag.as_vec()[2]
elif tag.as_vec()[1] == "lora":
lora = tag.as_vec()[2]
elif tag.as_vec()[1] == "lora_weight":
lora_weight = tag.as_vec()[2]
elif tag.as_vec()[1] == "strength":
strength = float(tag.as_vec()[2])
elif tag.as_vec()[1] == "guidance_scale":
guidance_scale = float(tag.as_vec()[2])
elif tag.as_vec()[1] == "ratio":
if len(tag.as_vec()) > 3:
ratio_width = (tag.as_vec()[2])
ratio_height = (tag.as_vec()[3])
elif len(tag.as_vec()) == 3:
split = tag.as_vec()[2].split(":")
ratio_width = split[0]
ratio_height = split[1]
# if size is set it will overwrite ratio.
elif tag.as_vec()[1] == "size":
if len(tag.as_vec()) > 3:
width = (tag.as_vec()[2])
height = (tag.as_vec()[3])
elif len(tag.as_vec()) == 3:
split = tag.as_vec()[2].split("x")
if len(split) > 1:
width = split[0]
height = split[1]
elif tag.as_vec()[1] == "model":
model = tag.as_vec()[2]
io_input = {
"id": "input_prompt",
"type": "input",
"src": "request:text",
"data": prompt
}
io_negative = {
"id": "negative_prompt",
"type": "input",
"src": "request:text",
"data": negative_prompt
}
io_output = {
"id": "output_image",
"type": "output",
"src": "request:image"
}
request_form['data'] = json.dumps([io_input, io_negative, io_output])
options = {
"model": model,
"ratio": ratio_width + '-' + ratio_height,
"width": width,
"height": height,
"strength": strength,
"guidance_scale": guidance_scale,
"lora": lora,
"lora_weight": lora_weight
}
request_form['options'] = json.dumps(options)
return request_form
def process(self, request_form):
try:
# Call the process route of n-server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
except Exception as e:
raise Exception(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, server_address, default_model="stabilityai/stable-diffusion-xl"
"-base-1.0", default_lora=""):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
admin_config.LUD16 = dvm_config.LN_ADDRESS
# A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
options = {'default_model': default_model, 'default_lora': default_lora, 'server': server_address}
nip89info = {
"name": name,
"image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
"about": "I draw images based on a prompt with a Model called unstable diffusion",
"encryptionSupported": True,
"cashuAccepted": True,
"nip90Params": {
"negative_prompt": {
"required": False,
"values": []
},
"ratio": {
"required": False,
"values": ["1:1", "4:3", "16:9", "3:4", "9:16", "10:16"]
}
}
}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return ImageGenerationSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
dvm = ImageGenerationSDXL(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()

View File

@@ -0,0 +1,235 @@
import json
from multiprocessing.pool import ThreadPool
from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a Module to transform Image (and Text) input on N-server and receive results back.
Accepted Inputs: Prompt (text)
Outputs: An url to an Image
Params: -model # models: juggernaut, dynavision, colossusProject, newreality, unstable
-lora # loras (weights on top of models) voxel,
"""
class ImageGenerationSDXLIMG2IMG(DVMTaskInterface):
KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
TASK: str = "image-to-image"
FIX_COST: float = 70
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
hasurl = False
hasprompt = False
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type == "url":
hasurl = True
elif input_type == "text":
hasprompt = True # Little optional when lora is set
elif tag.as_vec()[0] == 'output':
output = tag.as_vec()[1]
if (output == "" or
not (output == "image/png" or "image/jpg"
or output == "image/png;format=url" or output == "image/jpg;format=url")):
print("Output format not supported, skipping..")
return False
if not hasurl:
return False
return True
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\stablediffusionxl\stablediffusionxl-img2img.trainer'
prompt = ""
negative_prompt = ""
url = ""
if self.options.get("default_model"):
model = self.options['default_model']
else:
model = "stabilityai/stable-diffusion-xl-refiner-1.0"
ratio_width = "1"
ratio_height = "1"
width = ""
height = ""
if self.options.get("default_lora") and self.options.get("default_lora") != "":
lora = self.options['default_lora']
else:
lora = ""
lora_weight = ""
if self.options.get("strength"):
strength = float(self.options['strength'])
else:
strength = 0.8
if self.options.get("guidance_scale"):
guidance_scale = float(self.options['guidance_scale'])
else:
guidance_scale = 11.0
for tag in event.tags():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
if input_type == "text":
prompt = tag.as_vec()[1]
elif input_type == "url":
url = tag.as_vec()[1]
elif tag.as_vec()[0] == 'param':
print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
if tag.as_vec()[1] == "negative_prompt":
negative_prompt = tag.as_vec()[2]
elif tag.as_vec()[1] == "lora":
lora = tag.as_vec()[2]
elif tag.as_vec()[1] == "lora_weight":
lora_weight = tag.as_vec()[2]
elif tag.as_vec()[1] == "strength":
strength = float(tag.as_vec()[2])
elif tag.as_vec()[1] == "guidance_scale":
guidance_scale = float(tag.as_vec()[2])
elif tag.as_vec()[1] == "ratio":
if len(tag.as_vec()) > 3:
ratio_width = (tag.as_vec()[2])
ratio_height = (tag.as_vec()[3])
elif len(tag.as_vec()) == 3:
split = tag.as_vec()[2].split(":")
ratio_width = split[0]
ratio_height = split[1]
# if size is set it will overwrite ratio.
elif tag.as_vec()[1] == "size":
if len(tag.as_vec()) > 3:
width = (tag.as_vec()[2])
height = (tag.as_vec()[3])
elif len(tag.as_vec()) == 3:
split = tag.as_vec()[2].split("x")
if len(split) > 1:
width = split[0]
height = split[1]
elif tag.as_vec()[1] == "model":
model = tag.as_vec()[2]
io_input_image = {
"id": "input_image",
"type": "input",
"src": "url:Image",
"uri": url
}
io_input = {
"id": "input_prompt",
"type": "input",
"src": "request:text",
"data": prompt
}
io_negative = {
"id": "negative_prompt",
"type": "input",
"src": "request:text",
"data": negative_prompt
}
io_output = {
"id": "output_image",
"type": "output",
"src": "request:image"
}
request_form['data'] = json.dumps([io_input_image, io_input, io_negative, io_output])
options = {
"model": model,
"ratio": ratio_width + '-' + ratio_height,
"width": width,
"height": height,
"strength": strength,
"guidance_scale": guidance_scale,
"lora": lora,
"lora_weight": lora_weight,
"n_steps": 30
}
request_form['options'] = json.dumps(options)
return request_form
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
except Exception as e:
raise Exception(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, server_address, default_lora="", strength=0.6):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
admin_config.LUD16 = dvm_config.LN_ADDRESS
nip89info = {
"name": name,
"image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
"about": "I convert an image to another image, kinda random for now. ",
"encryptionSupported": True,
"cashuAccepted": True,
"nip90Params": {
"negative_prompt": {
"required": False,
"values": []
},
"lora": {
"required": False,
"values": ["inkpunk", "timburton", "voxel"]
},
"strength": {
"required": False,
"values": []
}
}
}
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'default_lora': default_lora, 'strength': strength, 'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return ImageGenerationSDXLIMG2IMG(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
dvm = ImageGenerationSDXLIMG2IMG(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()

View File

@@ -0,0 +1,149 @@
import json
from multiprocessing.pool import ThreadPool
from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a Module to extract a prompt from an image from an url.
Accepted Inputs: link to image (url)
Outputs: An textual description of the image
"""
class ImageInterrogator(DVMTaskInterface):
KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
TASK: str = "image-to-text"
FIX_COST: float = 80
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
hasurl = False
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type == "url":
hasurl = True
if not hasurl:
return False
return True
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\image_interrogator\image_interrogator.trainer'
url = ""
method = "prompt"
mode = "best"
for tag in event.tags():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
if input_type == "url":
url = tag.as_vec()[1]
elif tag.as_vec()[0] == 'param':
print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
if tag.as_vec()[1] == "method":
method = tag.as_vec()[2]
elif tag.as_vec()[1] == "mode":
mode = tag.as_vec()[2]
io_input_image = {
"id": "input_image",
"type": "input",
"src": "url:Image",
"uri": url
}
io_output = {
"id": "output",
"type": "output",
"src": "request:text"
}
request_form['data'] = json.dumps([io_input_image, io_output])
options = {
"kind": method,
"mode": mode
}
request_form['options'] = json.dumps(options)
return request_form
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
except Exception as e:
raise Exception(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, server_address):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
admin_config.LUD16 = dvm_config.LN_ADDRESS
nip89info = {
"name": name,
"image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
"about": "I analyse Images an return a prompt or a prompt analysis",
"encryptionSupported": True,
"cashuAccepted": True,
"nip90Params": {
"method": {
"required": False,
"values": ["prompt", "analysis"]
},
"mode": {
"required": False,
"values": ["best", "classic", "fast", "negative"]
}
}
}
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'server': server_address}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return ImageInterrogator(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
dvm = ImageInterrogator(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()

View File

@@ -0,0 +1,141 @@
import json
from multiprocessing.pool import ThreadPool
from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a Module to upscale an image from an url by factor 2-4
Accepted Inputs: link to image (url)
Outputs: An url to an Image
Params: -upscale 2,3,4
"""
class ImageUpscale(DVMTaskInterface):
KIND: int = EventDefinitions.KIND_NIP90_GENERATE_IMAGE
TASK: str = "image-to-image"
FIX_COST: float = 20
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
hasurl = False
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type == "url":
hasurl = True
if not hasurl:
return False
return True
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", "")}
request_form["trainerFilePath"] = r'modules\image_upscale\image_upscale_realesrgan.trainer'
url = ""
out_scale = 4
for tag in event.tags():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
if input_type == "url":
url = tag.as_vec()[1]
elif tag.as_vec()[0] == 'param':
print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
if tag.as_vec()[1] == "upscale":
out_scale = tag.as_vec()[2]
io_input_image = {
"id": "input_image",
"type": "input",
"src": "url:Image",
"uri": url
}
io_output = {
"id": "output_image",
"type": "output",
"src": "request:image"
}
request_form['data'] = json.dumps([io_input_image, io_output])
options = {
"outscale": out_scale,
}
request_form['options'] = json.dumps(options)
return request_form
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
except Exception as e:
raise Exception(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, server_address):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
admin_config.LUD16 = dvm_config.LN_ADDRESS
# A module might have options it can be initialized with, here we set a default model, lora and the server
options = {'server': server_address}
nip89info = {
"name": name,
"image": "https://image.nostr.build/229c14e440895da30de77b3ca145d66d4b04efb4027ba3c44ca147eecde891f1.jpg",
"about": "I upscale an image using realESRGan up to factor 4 (default is factor 4)",
"encryptionSupported": True,
"cashuAccepted": True,
"nip90Params": {
"upscale": {
"required": False,
"values": ["2", "3", "4"]
}
}
}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return ImageUpscale(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
dvm = ImageUpscale(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()

View File

@@ -1,20 +1,16 @@
import json
import os
import time
from pathlib import Path
import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.mediasource_utils import organize_input_media_data
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a Module to transform a media file input on Google Cloud
This File contains a Module to extract text form a media file input on Google Cloud
Accepted Inputs: Url to media file (url)
Outputs: Transcribed text
@@ -30,7 +26,6 @@ class SpeechToTextGoogle(DVMTaskInterface):
dependencies = [("nostr-dvm", "nostr-dvm"),
("speech_recognition", "SpeechRecognition==3.10.0")]
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
dvm_config.SCRIPT = os.path.abspath(__file__)
@@ -129,6 +124,7 @@ class SpeechToTextGoogle(DVMTaskInterface):
return result
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -158,6 +154,8 @@ def build_example(name, identifier, admin_config):
return SpeechToTextGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -167,4 +165,4 @@ def process_venv():
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -1,13 +1,9 @@
import json
import os
import re
from pathlib import Path
import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -29,14 +25,11 @@ class TextExtractionPDF(DVMTaskInterface):
dependencies = [("nostr-dvm", "nostr-dvm"),
("pypdf", "pypdf==3.17.1")]
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
dvm_config.SCRIPT = os.path.abspath(__file__)
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
@@ -118,6 +111,7 @@ def build_example(name, identifier, admin_config):
return TextExtractionPDF(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -127,4 +121,4 @@ def process_venv():
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -0,0 +1,193 @@
import json
import os
import time
from multiprocessing.pool import ThreadPool
from nostr_dvm.backends.nova_server.utils import check_server_status, send_request_to_server, send_file_to_server
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.mediasource_utils import organize_input_media_data
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.definitions import EventDefinitions
"""
This File contains a Module to transform A media file input on n-server and receive results back.
Accepted Inputs: Url to media file (url)
Outputs: Transcribed text
"""
class SpeechToTextWhisperX(DVMTaskInterface):
KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
TASK: str = "speech-to-text"
FIX_COST: float = 10
PER_UNIT_COST: float = 0.1
def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
admin_config: AdminConfig = None, options=None):
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type != "url":
return False
elif tag.as_vec()[0] == 'output':
output = tag.as_vec()[1]
if output == "" or not (output == "text/plain"):
print("Output format not supported, skipping..")
return False
return True
def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""),
"trainerFilePath": r'modules\whisperx\whisperx_transcript.trainer'}
if self.options.get("default_model"):
model = self.options['default_model']
else:
model = "base"
if self.options.get("alignment"):
alignment = self.options['alignment']
else:
alignment = "raw"
url = ""
input_type = "url"
start_time = 0
end_time = 0
media_format = "audio/mp3"
for tag in event.tags():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
if input_type == "url":
url = tag.as_vec()[1]
elif tag.as_vec()[0] == 'param':
print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
if tag.as_vec()[1] == "alignment":
alignment = tag.as_vec()[2]
elif tag.as_vec()[1] == "model":
model = tag.as_vec()[2]
elif tag.as_vec()[1] == "range":
try:
t = time.strptime(tag.as_vec()[2], "%H:%M:%S")
seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
start_time = float(seconds)
except:
try:
t = time.strptime(tag.as_vec()[2], "%M:%S")
seconds = t.tm_min * 60 + t.tm_sec
start_time = float(seconds)
except:
start_time = tag.as_vec()[2]
try:
t = time.strptime(tag.as_vec()[3], "%H:%M:%S")
seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
end_time = float(seconds)
except:
try:
t = time.strptime(tag.as_vec()[3], "%M:%S")
seconds = t.tm_min * 60 + t.tm_sec
end_time = float(seconds)
except:
end_time = float(tag.as_vec()[3])
filepath = organize_input_media_data(url, input_type, start_time, end_time, dvm_config, client, True,
media_format)
path_on_server = send_file_to_server(os.path.realpath(filepath), self.options['server'])
io_input = {
"id": "audio",
"type": "input",
"src": "file:stream",
"uri": path_on_server
}
io_output = {
"id": "transcript",
"type": "output",
"src": "request:annotation:free"
}
request_form['data'] = json.dumps([io_input, io_output])
options = {
"model": model,
"alignment_mode": alignment,
}
request_form['options'] = json.dumps(options)
return request_form
def process(self, request_form):
try:
# Call the process route of NOVA-Server with our request form.
response = send_request_to_server(request_form, self.options['server'])
if bool(json.loads(response)['success']):
print("Job " + request_form['jobID'] + " sent to server")
pool = ThreadPool(processes=1)
thread = pool.apply_async(check_server_status, (request_form['jobID'], self.options['server']))
print("Wait for results of server...")
result = thread.get()
return result
except Exception as e:
raise Exception(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, server_address):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
admin_config.LUD16 = dvm_config.LN_ADDRESS
# A module might have options it can be initialized with, here we set a default model, and the server
# address it should use. These parameters can be freely defined in the task component
options = {'default_model': "base", 'server': server_address}
nip89info = {
"name": name,
"image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
"about": "I extract text from media files with WhisperX",
"encryptionSupported": True,
"cashuAccepted": True,
"nip90Params": {
"model": {
"required": False,
"values": ["base", "tiny", "small", "medium", "large-v1", "large-v2", "tiny.en", "base.en", "small.en",
"medium.en"]
},
"alignment": {
"required": False,
"values": ["raw", "segment", "word"]
}
}
}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["image"])
nip89config.CONTENT = json.dumps(nip89info)
return SpeechToTextWhisperX(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
dvm = SpeechToTextWhisperX(name="", dvm_config=dvm_config, nip89config=NIP89Config(), admin_config=None)
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()

View File

@@ -1,13 +1,8 @@
import json
import os
from pathlib import Path
import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag

View File

@@ -1,5 +1,6 @@
import json
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
from pathlib import Path
import urllib.request
@@ -79,7 +80,7 @@ class TextToSpeech(DVMTaskInterface):
from TTS.api import TTS
options = DVMTaskInterface.set_options(request_form)
device = "cuda" if torch.cuda.is_available() else "cpu"
#else "mps" if torch.backends.mps.is_available() \
# else "mps" if torch.backends.mps.is_available() \
print(TTS().list_models())
try:
@@ -102,7 +103,7 @@ def build_example(name, identifier, admin_config):
dvm_config = build_default_config(identifier)
admin_config.LUD16 = dvm_config.LN_ADDRESS
#use an alternative local wav file you want to use for cloning
# use an alternative local wav file you want to use for cloning
options = {'input_file': ""}
nip89info = {
@@ -134,5 +135,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -1,20 +1,14 @@
import json
import os
from pathlib import Path
import dotenv
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
from nostr_dvm.utils.nostr_utils import get_referenced_event_by_id, get_event_by_id
"""
This File contains a Module to call Google Translate Services locally on the DVM Machine
This File contains a Module to call Google Translate Services on the DVM Machine
Accepted Inputs: Text, Events, Jobs (Text Extraction, Summary, Translation)
Outputs: Text containing the TranslationGoogle in the desired language.
@@ -111,6 +105,7 @@ class TranslationGoogle(DVMTaskInterface):
return translated_text
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
@@ -128,12 +123,18 @@ def build_example(name, identifier, admin_config):
"nip90Params": {
"language": {
"required": False,
"values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo", "es",
"et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr", "ht",
"hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku", "ky",
"la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl",
"no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn", "so",
"sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur", "uz",
"values": ["en", "az", "be", "bg", "bn", "bs", "ca", "ceb", "co", "cs", "cy", "da", "de", "el", "eo",
"es",
"et", "eu", "fa", "fi", "fr", "fy", "ga", "gd", "gl", "gu", "ha", "haw", "hi", "hmn", "hr",
"ht",
"hu", "hy", "id", "ig", "is", "it", "he", "ja", "jv", "ka", "kk", "km", "kn", "ko", "ku",
"ky",
"la", "lb", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne",
"nl",
"no", "ny", "or", "pa", "pl", "ps", "pt", "ro", "ru", "sd", "si", "sk", "sl", "sm", "sn",
"so",
"sq", "sr", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "tl", "tr", "ug", "uk", "ur",
"uz",
"vi", "xh", "yi", "yo", "zh", "zu"]
}
}
@@ -145,7 +146,6 @@ def build_example(name, identifier, admin_config):
return TranslationGoogle(name=name, dvm_config=dvm_config, nip89config=nip89config, admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -155,4 +155,4 @@ def process_venv():
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -1,13 +1,9 @@
import json
import os
from pathlib import Path
import dotenv
import requests
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -156,5 +152,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -1,13 +1,9 @@
import json
import os
from pathlib import Path
import dotenv
from nostr_sdk import Tag
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -118,6 +114,7 @@ def build_example(name, identifier, admin_config):
return TrendingNotesNostrBand(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -125,5 +122,6 @@ def process_venv():
result = dvm.process(json.loads(args.request))
DVMTaskInterface.write_output(result, args.output)
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -1,17 +1,12 @@
import json
import os
import subprocess
from io import BytesIO
from pathlib import Path
import dotenv
import requests
import urllib.request
from PIL import Image
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.backend_utils import keep_alive
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag
@@ -39,9 +34,6 @@ class VideoGenerationReplicateSVD(DVMTaskInterface):
dvm_config.SCRIPT = os.path.abspath(__file__)
super().__init__(name, dvm_config, nip89config, admin_config, options)
def is_input_supported(self, tags):
for tag in tags:
if tag.as_vec()[0] == 'i':
@@ -140,6 +132,7 @@ def build_example(name, identifier, admin_config):
return VideoGenerationReplicateSVD(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config)
def process_venv():
args = DVMTaskInterface.process_args()
dvm_config = build_default_config(args.identifier)
@@ -149,4 +142,4 @@ def process_venv():
if __name__ == '__main__':
process_venv()
process_venv()

View File

@@ -2,6 +2,8 @@ import os
from dataclasses import dataclass
from nostr_sdk import Event
class EventDefinitions:
KIND_DM = 4
KIND_ZAP = 9735
@@ -14,8 +16,6 @@ class EventDefinitions:
KIND_NIP90_RESULT_SUMMARIZE_TEXT = KIND_NIP90_SUMMARIZE_TEXT + 1000
KIND_NIP90_TRANSLATE_TEXT = 5002
KIND_NIP90_RESULT_TRANSLATE_TEXT = KIND_NIP90_TRANSLATE_TEXT + 1000
KIND_NIP90_TEXT_TO_SPEECH = 5005
KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000
KIND_NIP90_GENERATE_TEXT = 5050
KIND_NIP90_RESULT_GENERATE_TEXT = KIND_NIP90_GENERATE_TEXT + 1000
KIND_NIP90_GENERATE_IMAGE = 5100
@@ -23,6 +23,8 @@ class EventDefinitions:
KIND_NIP90_CONVERT_VIDEO = 5200
KIND_NIP90_RESULT_CONVERT_VIDEO = KIND_NIP90_CONVERT_VIDEO + 1000
KIND_NIP90_GENERATE_VIDEO = 5202
KIND_NIP90_TEXT_TO_SPEECH = 5250
KIND_NIP90_RESULT_TEXT_TO_SPEECH = KIND_NIP90_TEXT_TO_SPEECH + 1000
KIND_NIP90_RESULT_GENERATE_VIDEO = KIND_NIP90_GENERATE_VIDEO + 1000
KIND_NIP90_CONTENT_DISCOVERY = 5300
KIND_NIP90_RESULT_CONTENT_DISCOVERY = KIND_NIP90_CONTENT_DISCOVERY + 1000
@@ -59,6 +61,7 @@ class JobToWatch:
payment_hash: str
expires: int
@dataclass
class RequiredJobToWatch:
event: Event

View File

@@ -13,10 +13,7 @@ setup(
author_email="believethehypeonnostr@proton.me",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
packages=find_packages(include=['nostr_dvm', 'nostr_dvm.interfaces', 'nostr_dvm.tasks',
'nostr_dvm.utils', 'nostr_dvm.utils.scrapper',
'nostr_dvm.backends', 'nostr_dvm.backends.mlx',
'nostr_dvm.backends.mlx.stablediffusion']),
packages=find_packages(include=['nostr_dvm/**']),
install_requires=["nostr-sdk==0.0.5",
"bech32==1.2.0",
"pycryptodome==3.19.0",