added whisperx (mediasources need some fixes)

2025-11-24 08:46:33 +01:00 · 2023-11-28 16:20:56 +01:00
parent a2433aa70c
commit 261bed427d
22 changed files with 1451 additions and 218 deletions
--- a/playground.py
+++ b/playground.py
@@ -6,6 +6,7 @@ from nostr_sdk import PublicKey, Keys
 from interfaces.dvmtaskinterface import DVMTaskInterface
 from tasks.imagegeneration_openai_dalle import ImageGenerationDALLE
 from tasks.imagegeneration_sdxl import ImageGenerationSDXL
+from tasks.textextraction_whisperx import SpeechToTextWhisperX
 from tasks.textextractionpdf import TextExtractionPDF
 from tasks.translation import Translation
 from utils.admin_utils import AdminConfig
@@ -125,6 +126,38 @@ def build_unstable_diffusion(name):
    return ImageGenerationSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config,
                               admin_config=admin_config, options=options)

+def build_whisperx(name):
+    dvm_config = DVMConfig()
+    dvm_config.PRIVATE_KEY = os.getenv("NOSTR_PRIVATE_KEY4")
+    dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
+    dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
+
+    # A module might have options it can be initialized with, here we set a default model, and the nova-server
+    # address it should use. These parameters can be freely defined in the task component
+    options = {'default_model': "base", 'nova_server': os.getenv("NOVA_SERVER")}
+
+    nip90params = {
+        "model": {
+            "required": False,
+            "values": ["base","tiny","small","medium","large-v1","large-v2","tiny.en","base.en","small.en","medium.en"]
+        },
+        "alignment": {
+            "required": False,
+            "values": ["raw", "segment","word"]
+        }
+    }
+    nip89info = {
+        "name": name,
+        "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
+        "about": "I am a test dvm to extract text from media files (very beta)",
+        "nip90Params": nip90params
+    }
+    nip89config = NIP89Config()
+    nip89config.DTAG = os.getenv("TASK_SPEECH_TO_TEXT_NIP89")
+    nip89config.CONTENT = json.dumps(nip89info)
+    return SpeechToTextWhisperX(name=name, dvm_config=dvm_config, nip89config=nip89config,
+                               admin_config=admin_config, options=options)
+

 def build_sketcher(name):
    dvm_config = DVMConfig()