added whisperx (mediasources need some fixes)

2025-07-12 17:52:27 +02:00 · 2023-11-28 16:20:56 +01:00
parent a2433aa70c
commit 261bed427d
22 changed files with 1451 additions and 218 deletions
--- a/.\outputs\audio.mp3
+++ b/.\outputs\audio.mp3
--- a/.\outputs\file.mp4
+++ b/.\outputs\file.mp4
--- a/backends/nova_server.py
+++ b/backends/nova_server.py
@ -3,6 +3,8 @@ import json
 import os
 import time
 import zipfile
+from pathlib import Path
+
 import pandas as pd
 import requests
 import PIL.Image as Image
@ -36,6 +38,18 @@ def send_request_to_nova_server(request_form, address):
    return response.text


+def send_file_to_nova_server(filepath, address):
+    print("Sending file to NOVA-Server")
+    url = ('http://' + address + '/upload')
+    fp = open(filepath, 'rb')
+    response = requests.post(url, files={'file': fp})
+    result = response.content.decode('utf-8')
+    print(result)
+    return result
+
+    # headers = {'Content-type': 'application/x-www-form-urlencoded'}
+
+
 """
 check_nova_server_status(request_form, address)
 Function that requests the status of the current process with the jobID (we use the Nostr event as jobID).
@ -44,7 +58,7 @@ We throw an exception on error
 """


-def check_nova_server_status(jobID, address):
+def check_nova_server_status(jobID, address) -> str | pd.DataFrame:
    headers = {'Content-type': 'application/x-www-form-urlencoded'}
    url_status = 'http://' + address + '/job_status'
    url_log = 'http://' + address + '/log'
@ -68,7 +82,6 @@ def check_nova_server_status(jobID, address):

    if status == 2:
        try:
-            result = ""
            url_fetch = 'http://' + address + '/fetch_result'
            print("Fetching Results from NOVA-Server...")
            data = {"jobID": jobID, "delete_after_download": True}
@ -79,10 +92,12 @@ def check_nova_server_status(jobID, address):
                image = Image.open(io.BytesIO(response.content))
                image.save("./outputs/image.jpg")
                result = upload_media_to_hoster("./outputs/image.jpg")
+                return result
                os.remove("./outputs/image.jpg")
            elif content_type == 'text/plain; charset=utf-8':
                result = response.content.decode('utf-8')
-            elif content_type == "zip":
+                return result
+            elif content_type == "application/x-zip-compressed":
                zf = zipfile.ZipFile(io.BytesIO(response.content), "r")

                for fileinfo in zf.infolist():
@ -92,14 +107,15 @@ def check_nova_server_status(jobID, address):
                            columns = ['from', 'to', 'name', 'conf']
                            result = pd.DataFrame([row.split(';') for row in anno_string.split('\n')],
                                                  columns=columns)
-                            print(result)
-                            with open("response.zip", "wb") as f:
-                                f.write(response.content)
+                            #print(str(result))
+                            return result
+                            #with open("response.zip", "wb") as f:
+                            #    f.write(response.content)
                        except Exception as e:
                            #zf.extractall()
                            print(e)

-            return result
+
        except Exception as e:
            print("Couldn't fetch result: " + str(e))

--- a/bot.py
+++ b/bot.py
@ -100,18 +100,21 @@ class Bot:

                    elif user.balance >= required_amount or required_amount == 0:
                        command = decrypted_text.replace(decrypted_text.split(' ')[0] + " ", "")
-                        input = command.split("-")[0].rstrip()
+                        input = command.split(" -")[0].rstrip()
+                        input_type = "text"
+                        if input.startswith("http"):
+                            input_type = "url"

-                        i_tag = Tag.parse(["i", input, "text"])
-                        bid = str(self.dvm_config.SUPPORTED_DVMS[index].COST * 1000)
-                        bid_tag = Tag.parse(['bid', bid, bid])
+                        i_tag = Tag.parse(["i", input, input_type])
+                        #bid = str(self.dvm_config.SUPPORTED_DVMS[index].COST * 1000)
+                        #bid_tag = Tag.parse(['bid', bid, bid])
                        relays_tag = Tag.parse(["relays", json.dumps(self.dvm_config.RELAY_LIST)])
                        alt_tag = Tag.parse(["alt", self.dvm_config.SUPPORTED_DVMS[index].TASK])

-                        tags = [i_tag.as_vec(), bid_tag.as_vec(), relays_tag.as_vec(), alt_tag.as_vec()]
+                        tags = [i_tag.as_vec(), relays_tag.as_vec(), alt_tag.as_vec()]

                        remaining_text = command.replace(input, "")
-                        params = remaining_text.rstrip().split("-")
+                        params = remaining_text.split(" -")

                        for i in params:
                            if i != " ":
--- a/dvm.py
+++ b/dvm.py
@ -1,7 +1,7 @@
 import json
-import typing
 from datetime import timedelta

+import pandas as pd
 from nostr_sdk import PublicKey, Keys, Client, Tag, Event, EventBuilder, Filter, HandleNotification, Timestamp, \
    init_logger, LogLevel, Options, nip04_encrypt

@ -12,6 +12,7 @@ from utils.dvmconfig import DVMConfig
 from utils.admin_utils import admin_make_database_updates, AdminConfig
 from utils.backend_utils import get_amount_per_task, check_task_is_supported, get_task
 from utils.database_utils import create_sql_table, get_or_add_user, update_user_balance, update_sql_table
+from utils.mediasource_utils import input_data_file_duration
 from utils.nostr_utils import get_event_by_id, get_referenced_event_by_id, send_event, check_and_decrypt_tags
 from utils.output_utils import post_process_result, build_status_reaction
 from utils.zap_utils import check_bolt11_ln_bits_is_paid, create_bolt11_ln_bits, parse_zap_event_tags, redeem_cashu
@ -91,9 +92,9 @@ class DVM:
                elif tag.as_vec()[0] == "p":
                    p_tag_str = tag.as_vec()[1]

-            task_supported, task, duration = check_task_is_supported(nip90_event, client=self.client,
-                                                                     get_duration=(not user.iswhitelisted),
-                                                                     config=self.dvm_config)
+            task_supported, task = check_task_is_supported(nip90_event, client=self.client,
+                                                           config=self.dvm_config)
+

            if user.isblacklisted:
                send_job_status_reaction(nip90_event, "error", client=self.client, dvm_config=self.dvm_config)
@ -101,6 +102,8 @@ class DVM:

            elif task_supported:
                print("[" + self.dvm_config.NIP89.name + "] Received new Request: " + task + " from " + user.name)
+                duration = input_data_file_duration(nip90_event, dvm_config=self.dvm_config, client=self.client)
+                print("File Duration: " + str(duration))
                amount = get_amount_per_task(task, self.dvm_config, duration)
                if amount is None:
                    return
@ -169,8 +172,8 @@ class DVM:
                        send_job_status_reaction(nip90_event, "payment-required",
                                                 False, amount, client=self.client, dvm_config=self.dvm_config)

-            #else:
-                #print("[" + self.dvm_config.NIP89.name + "] Task " + task + " not supported on this DVM, skipping..")
+            # else:
+            # print("[" + self.dvm_config.NIP89.name + "] Task " + task + " not supported on this DVM, skipping..")

        def handle_zap(zap_event):
            try:
@ -180,7 +183,6 @@ class DVM:
                                                                                           self.client, self.dvm_config)
                user = get_or_add_user(db=self.dvm_config.DB, npub=sender, client=self.client, config=self.dvm_config)

-
                if zapped_event is not None:
                    if zapped_event.kind() == EventDefinitions.KIND_FEEDBACK:

@ -201,10 +203,8 @@ class DVM:

                            # if a reaction by us got zapped

-                        task_supported, task, duration = check_task_is_supported(job_event,
-                                                                                 client=self.client,
-                                                                                 get_duration=False,
-                                                                                 config=self.dvm_config)
+                        task_supported, task = check_task_is_supported(job_event, client=self.client,
+                                                                       config=self.dvm_config)
                        if job_event is not None and task_supported:
                            print("Zap received for NIP90 task: " + str(invoice_amount) + " Sats from " + str(
                                user.name))
@ -257,8 +257,7 @@ class DVM:
                print("[" + self.dvm_config.NIP89.name + "] Error during content decryption: " + str(e))

        def check_event_has_not_unfinished_job_input(nevent, append, client, dvmconfig):
-            task_supported, task, duration = check_task_is_supported(nevent, client, False,
-                                                                     config=dvmconfig)
+            task_supported, task = check_task_is_supported(nevent, client, config=dvmconfig)
            if not task_supported:
                return False

@ -312,6 +311,7 @@ class DVM:
                    break

            try:
+
                post_processed_content = post_process_result(data, original_event)
                send_nostr_reply_event(post_processed_content, original_event_str)
            except Exception as e:
--- a/interfaces/dvmtaskinterface.py
+++ b/interfaces/dvmtaskinterface.py
@ -75,5 +75,4 @@ class DVMTaskInterface:
        if request_form.get("options"):
            opts = json.loads(request_form["options"])
            print(opts)
-
        return dict(opts)
--- a/main.py
+++ b/main.py
@ -9,7 +9,8 @@ import dotenv
 from nostr_sdk import Keys

 from bot import Bot
-from playground import build_pdf_extractor, build_translator, build_unstable_diffusion, build_sketcher, build_dalle
+from playground import build_pdf_extractor, build_translator, build_unstable_diffusion, build_sketcher, build_dalle, \
+    build_whisperx
 from utils.dvmconfig import DVMConfig


@ -48,6 +49,13 @@ def run_nostr_dvm_with_local_config():
        bot_config.SUPPORTED_DVMS.append(sketcher)  # We also add Sketcher to the bot
        sketcher.run()

+    if os.getenv("NOVA_SERVER") is not None and os.getenv("NOVA_SERVER") != "":
+        whisperer = build_whisperx("Whisperer")
+        bot_config.SUPPORTED_DVMS.append(whisperer)  # We also add Sketcher to the bot
+        whisperer.run()
+
+
+
    # Spawn DVM5, this one requires an OPENAI API Key and balance with OpenAI, you will move the task to them and pay
    # per call. Make sure you have enough balance and the DVM's cost is set higher than what you pay yourself, except, you know,
    # you're being generous.
--- a/outputs/image.jpg
+++ b/outputs/image.jpg
--- a/outputs/test.mp4
+++ b/outputs/test.mp4
--- a/playground.py
+++ b/playground.py
@ -6,6 +6,7 @@ from nostr_sdk import PublicKey, Keys
 from interfaces.dvmtaskinterface import DVMTaskInterface
 from tasks.imagegeneration_openai_dalle import ImageGenerationDALLE
 from tasks.imagegeneration_sdxl import ImageGenerationSDXL
+from tasks.textextraction_whisperx import SpeechToTextWhisperX
 from tasks.textextractionpdf import TextExtractionPDF
 from tasks.translation import Translation
 from utils.admin_utils import AdminConfig
@ -125,6 +126,38 @@ def build_unstable_diffusion(name):
    return ImageGenerationSDXL(name=name, dvm_config=dvm_config, nip89config=nip89config,
                               admin_config=admin_config, options=options)

+def build_whisperx(name):
+    dvm_config = DVMConfig()
+    dvm_config.PRIVATE_KEY = os.getenv("NOSTR_PRIVATE_KEY4")
+    dvm_config.LNBITS_INVOICE_KEY = os.getenv("LNBITS_INVOICE_KEY")
+    dvm_config.LNBITS_URL = os.getenv("LNBITS_HOST")
+
+    # A module might have options it can be initialized with, here we set a default model, and the nova-server
+    # address it should use. These parameters can be freely defined in the task component
+    options = {'default_model': "base", 'nova_server': os.getenv("NOVA_SERVER")}
+
+    nip90params = {
+        "model": {
+            "required": False,
+            "values": ["base","tiny","small","medium","large-v1","large-v2","tiny.en","base.en","small.en","medium.en"]
+        },
+        "alignment": {
+            "required": False,
+            "values": ["raw", "segment","word"]
+        }
+    }
+    nip89info = {
+        "name": name,
+        "image": "https://image.nostr.build/c33ca6fc4cc038ca4adb46fdfdfda34951656f87ee364ef59095bae1495ce669.jpg",
+        "about": "I am a test dvm to extract text from media files (very beta)",
+        "nip90Params": nip90params
+    }
+    nip89config = NIP89Config()
+    nip89config.DTAG = os.getenv("TASK_SPEECH_TO_TEXT_NIP89")
+    nip89config.CONTENT = json.dumps(nip89info)
+    return SpeechToTextWhisperX(name=name, dvm_config=dvm_config, nip89config=nip89config,
+                               admin_config=admin_config, options=options)
+

 def build_sketcher(name):
    dvm_config = DVMConfig()
--- a/requirements.txt
+++ b/requirements.txt
@ -1,35 +1,58 @@
+anyio==3.7.1
 beautifulsoup4==4.12.2
 bech32==1.2.0
+bitarray==2.8.3
+bitstring==4.1.3
 blessed==1.20.0
+cassidy==0.1.4
 certifi==2023.7.22
 charset-normalizer==3.3.2
+click==8.1.7
+distro==1.8.0
 emoji==2.8.0
+enumb==0.1.5
+eva-decord==0.6.1
+exceptiongroup==1.2.0
+expo==0.1.2
 ffmpegio==0.8.5
 ffmpegio-core==0.8.5
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.1
 idna==3.4
 inquirer==3.1.3
 install==1.3.5
+instaloader==4.10.1
+lnurl==0.4.1
+mediatype==0.1.6
 nostr-sdk==0.0.5
 numpy==1.26.2
+openai==1.3.5
 packaging==23.2
 pandas==2.1.3
 Pillow==10.1.0
 pluggy==1.3.0
 pycryptodome==3.19.0
+pydantic==1.10.13
+pydantic_core==2.14.5
 pypdf==3.17.1
 python-dateutil==2.8.2
 python-dotenv==1.0.0
 python-editor==1.0.4
+pytube==15.0.0
 pytz==2023.3.post1
-PyUpload~=0.1.4
+PyUpload==0.1.4
 pyuseragents==1.0.5
 readchar==4.0.5
 requests==2.31.0
+requests-toolbelt==1.0.0
 safeIO==1.2
 six==1.16.0
+sniffio==1.3.0
 soupsieve==2.5
+tqdm==4.66.1
 translatepy==2.3
+typing_extensions==4.8.0
 tzdata==2023.3
 urllib3==2.1.0
 wcwidth==0.2.10
-
--- a/tasks/imagegeneration_openai_dalle.py
+++ b/tasks/imagegeneration_openai_dalle.py
@ -31,9 +31,22 @@ class ImageGenerationDALLE(DVMTaskInterface):
                 admin_config: AdminConfig = None, options=None):
        super().__init__(name, dvm_config, nip89config, admin_config, options)

-    def is_input_supported(self, input_type, input_content):
-        if input_type != "text":
-            return False
+    def is_input_supported(self, tags):
+        for tag in tags:
+            if tag.as_vec()[0] == 'i':
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "text":
+                    return False
+
+            elif tag.as_vec()[0] == 'output':
+                output = tag.as_vec()[1]
+                if (output == "" or
+                        not (output == "image/png" or "image/jpg"
+                             or output == "image/png;format=url" or output == "image/jpg;format=url")):
+                    print("Output format not supported, skipping..")
+                    return False
+
        return True

    def create_request_form_from_nostr_event(self, event, client=None, dvm_config=None):
--- a/tasks/imagegeneration_sdxl.py
+++ b/tasks/imagegeneration_sdxl.py
@ -30,15 +30,19 @@ class ImageGenerationSDXL(DVMTaskInterface):
    def is_input_supported(self, tags):
        for tag in tags:
            if tag.as_vec()[0] == 'i':
-                if len(tag.as_vec()) < 3:
-                    print("Job Event missing/malformed i tag, skipping..")
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "text":
+                    return False
+
+            elif tag.as_vec()[0] == 'output':
+                output = tag.as_vec()[1]
+                if (output == "" or
+                        not (output == "image/png" or "image/jpg"
+                             or output == "image/png;format=url" or output == "image/jpg;format=url")):
+                    print("Output format not supported, skipping..")
                    return False
-                else:
-                    input_value = tag.as_vec()[1]
-                    input_type = tag.as_vec()[2]

-        if input_type != "text":
-            return False
        return True

    def create_request_form_from_nostr_event(self, event, client=None, dvm_config=None):
@ -134,11 +138,6 @@ class ImageGenerationSDXL(DVMTaskInterface):
        }
        request_form['options'] = json.dumps(options)

-        # old format, deprecated, will remove
-        request_form["optStr"] = ('model=' + model + ';ratio=' + str(ratio_width) + '-' + str(ratio_height) + ';size=' +
-                                  str(width) + '-' + str(height) + ';strength=' + str(strength) + ';guidance_scale=' +
-                                  str(guidance_scale) + ';lora=' + lora + ';lora_weight=' + lora_weight)
-
        return request_form

    def process(self, request_form):
@ -152,7 +151,7 @@ class ImageGenerationSDXL(DVMTaskInterface):
            thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
            print("Wait for results of NOVA-Server...")
            result = thread.get()
-            return str(result)
+            return result

        except Exception as e:
            raise Exception(e)
--- a/tasks/textextraction_whisperx.py
+++ b/tasks/textextraction_whisperx.py
@ -0,0 +1,143 @@
+import json
+import os
+import time
+from multiprocessing.pool import ThreadPool
+from pathlib import Path
+
+from backends.nova_server import check_nova_server_status, send_request_to_nova_server, send_file_to_nova_server
+from interfaces.dvmtaskinterface import DVMTaskInterface
+from utils.admin_utils import AdminConfig
+from utils.dvmconfig import DVMConfig
+from utils.mediasource_utils import organize_input_data
+from utils.nip89_utils import NIP89Config
+from utils.definitions import EventDefinitions
+
+"""
+This File contains a Module to transform Text input on NOVA-Server and receive results back. 
+
+Accepted Inputs: Prompt (text)
+Outputs: An url to an Image
+Params: -model         # models: juggernaut, dynavision, colossusProject, newreality, unstable
+        -lora          # loras (weights on top of models) voxel, 
+"""
+
+
+class SpeechToTextWhisperX(DVMTaskInterface):
+    KIND: int = EventDefinitions.KIND_NIP90_EXTRACT_TEXT
+    TASK: str = "speech-to-text"
+    COST: int = 1
+
+    def __init__(self, name, dvm_config: DVMConfig, nip89config: NIP89Config,
+                 admin_config: AdminConfig = None, options=None):
+        super().__init__(name, dvm_config, nip89config, admin_config, options)
+
+    def is_input_supported(self, tags):
+        for tag in tags:
+            if tag.as_vec()[0] == 'i':
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "url":
+                    return False
+
+            elif tag.as_vec()[0] == 'output':
+                output = tag.as_vec()[1]
+                if (output == "" or not (output == "text/plain")):
+                    print("Output format not supported, skipping..")
+                    return False
+
+        return True
+
+    def create_request_form_from_nostr_event(self, event, client=None, dvm_config=None):
+        request_form = {"jobID": event.id().to_hex() + "_" + self.NAME.replace(" ", ""),
+                        "trainerFilePath": 'modules\\whisperx\\whisperx_transcript.trainer'}
+
+        if self.options.get("default_model"):
+            model = self.options['default_model']
+        else:
+            model = "base"
+        if self.options.get("alignment"):
+            alignment = self.options['alignment']
+        else:
+            alignment = "raw"
+
+        url = ""
+        input_type = "url"
+        start_time = 0
+        end_time = 0
+
+        for tag in event.tags():
+            if tag.as_vec()[0] == 'i':
+                input_type = tag.as_vec()[2]
+                if input_type == "url":
+                    url = tag.as_vec()[1]
+
+            elif tag.as_vec()[0] == 'param':
+                print("Param: " + tag.as_vec()[1] + ": " + tag.as_vec()[2])
+                if tag.as_vec()[1] == "alignment":
+                    alignment = tag.as_vec()[2]
+                elif tag.as_vec()[1] == "model":
+                    model = tag.as_vec()[2]
+                elif tag.as_vec()[1] == "range": #hui
+                    try:
+                        t = time.strptime(tag.as_vec()[2], "%H:%M:%S")
+                        seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
+                        start_time = float(seconds)
+                    except:
+                        try:
+                            t = time.strptime(tag.as_vec()[2], "%M:%S")
+                            seconds = t.tm_min * 60 + t.tm_sec
+                            start_time = float(seconds)
+                        except:
+                            start_time = tag.as_vec()[2]
+                            try:
+                                t = time.strptime(tag.as_vec()[3], "%H:%M:%S")
+                                seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
+                                end_time = float(seconds)
+                            except:
+                                try:
+                                    t = time.strptime(tag.as_vec()[3], "%M:%S")
+                                    seconds = t.tm_min * 60 + t.tm_sec
+                                    end_time = float(seconds)
+                                except:
+                                    end_time = float(tag.as_vec()[3])
+
+        filepath = organize_input_data(url, input_type, start_time, end_time, dvm_config, client)
+        pathonserver = send_file_to_nova_server(filepath, self.options['nova_server'])
+
+        io_input = {
+            "id": "audio",
+            "type": "input",
+            "src": "file:stream",
+            "uri": pathonserver
+        }
+
+        io_output = {
+            "id": "transcript",
+            "type": "output",
+            "src": "request:annotation:free"
+        }
+
+        request_form['data'] = json.dumps([io_input, io_output])
+
+        options = {
+            "model": model,
+            "alignment_mode": alignment,
+        }
+        request_form['options'] = json.dumps(options)
+        return request_form
+
+    def process(self, request_form):
+        try:
+            # Call the process route of NOVA-Server with our request form.
+            response = send_request_to_nova_server(request_form, self.options['nova_server'])
+            if bool(json.loads(response)['success']):
+                print("Job " + request_form['jobID'] + " sent to NOVA-server")
+
+            pool = ThreadPool(processes=1)
+            thread = pool.apply_async(check_nova_server_status, (request_form['jobID'], self.options['nova_server']))
+            print("Wait for results of NOVA-Server...")
+            result = thread.get()
+            return result
+
+        except Exception as e:
+            raise Exception(e)
--- a/tasks/textextractionpdf.py
+++ b/tasks/textextractionpdf.py
@ -29,10 +29,13 @@ class TextExtractionPDF(DVMTaskInterface):
                 admin_config: AdminConfig = None, options=None):
        super().__init__(name, dvm_config, nip89config, admin_config, options)

-
-    def is_input_supported(self, input_type, input_content):
-        if input_type != "url" and input_type != "event":
-            return False
+    def is_input_supported(self, tags):
+        for tag in tags:
+            if tag.as_vec()[0] == 'i':
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "url" and input_type != "event":
+                    return False
        return True

    def create_request_form_from_nostr_event(self, event, client=None, dvm_config=None):
--- a/tasks/translation.py
+++ b/tasks/translation.py
@ -27,11 +27,15 @@ class Translation(DVMTaskInterface):
                 admin_config: AdminConfig = None, options=None):
        super().__init__(name, dvm_config, nip89config, admin_config, options)

-    def is_input_supported(self, input_type, input_content):
-        if input_type != "event" and input_type != "job" and input_type != "text":
-            return False
-        if input_type != "text" and len(input_content) > 4999:
-            return False
+    def is_input_supported(self, tags):
+        for tag in tags:
+            if tag.as_vec()[0] == 'i':
+                input_value = tag.as_vec()[1]
+                input_type = tag.as_vec()[2]
+                if input_type != "event" and input_type != "job" and input_type != "text":
+                    return False
+                if input_type != "text" and len(input_value) > 4999:
+                    return False
        return True

    def create_request_form_from_nostr_event(self, event, client=None, dvm_config=None):
--- a/utils/backend_utils.py
+++ b/utils/backend_utils.py
@ -4,114 +4,109 @@ import requests
 from nostr_sdk import Event, Tag

 from utils.definitions import EventDefinitions
+from utils.mediasource_utils import check_source_type, media_source
 from utils.nostr_utils import get_event_by_id


 def get_task(event, client, dvm_config):
-    if event.kind() == EventDefinitions.KIND_NIP90_GENERIC:  # use this for events that have no id yet, inclufr j tag
-        for tag in event.tags():
-            if tag.as_vec()[0] == 'j':
-                return tag.as_vec()[1]
-        else:
-            return "unknown job: " + event.as_json()
-    elif event.kind() == EventDefinitions.KIND_DM:  # dm
-        for tag in event.tags():
-            if tag.as_vec()[0] == 'j':
-                return tag.as_vec()[1]
-        else:
-            return "unknown job: " + event.as_json()
+    try:
+        if event.kind() == EventDefinitions.KIND_NIP90_GENERIC:  # use this for events that have no id yet, inclufr j tag
+            for tag in event.tags():
+                if tag.as_vec()[0] == 'j':
+                    return tag.as_vec()[1]
+            else:
+                return "unknown job: " + event.as_json()
+        elif event.kind() == EventDefinitions.KIND_DM:  # dm
+            for tag in event.tags():
+                if tag.as_vec()[0] == 'j':
+                    return tag.as_vec()[1]
+            else:
+                return "unknown job: " + event.as_json()

-    # This looks a bit more complicated, but we do several tasks for text-extraction in the future
-    elif event.kind() == EventDefinitions.KIND_NIP90_EXTRACT_TEXT:
-        for tag in event.tags:
-            if tag.as_vec()[0] == "i":
-                if tag.as_vec()[2] == "url":
-                    file_type = check_url_is_readable(tag.as_vec()[1])
-                    if file_type == "pdf":
-                        return "pdf-to-text"
+        # This looks a bit more complicated, but we do several tasks for text-extraction in the future
+        elif event.kind() == EventDefinitions.KIND_NIP90_EXTRACT_TEXT:
+            for tag in event.tags():
+                if tag.as_vec()[0] == "i":
+                    if tag.as_vec()[2] == "url":
+                        file_type = check_url_is_readable(tag.as_vec()[1])
+                        print(file_type)
+                        if file_type == "pdf":
+                            return "pdf-to-text"
+                        elif file_type == "audio" or file_type == "video":
+                            return "speech-to-text"
+                        else:
+                            return "unknown job"
+                    elif tag.as_vec()[2] == "event":
+                        evt = get_event_by_id(tag.as_vec()[1], client=client, config=dvm_config)
+                        if evt is not None:
+                            if evt.kind() == 1063:
+                                for tg in evt.tags():
+                                    if tg.as_vec()[0] == 'url':
+                                        file_type = check_url_is_readable(tg.as_vec()[1])
+                                        if file_type == "pdf":
+                                            return "pdf-to-text"
+                                        elif file_type == "audio" or file_type == "video":
+                                            return "speech-to-text"
+                                        else:
+                                            return "unknown job"
+                            else:
+                                return "unknown type"
                    else:
                        return "unknown job"
-                elif tag.as_vec()[2] == "event":
-                    evt = get_event_by_id(tag.as_vec()[1], client=client, config=dvm_config)
-                    if evt is not None:
-                        if evt.kind() == 1063:
-                            for tg in evt.tags():
-                                if tg.as_vec()[0] == 'url':
-                                    file_type = check_url_is_readable(tg.as_vec()[1])
-                                    if file_type == "pdf":
-                                        return "pdf-to-text"
-                                    else:
-                                        return "unknown job"
-                        else:
-                            return "unknown type"
-    #  TODO if a task can consist of multiple inputs add them here
-    #  else if kind is supported, simply return task
-    else:
-        for dvm in dvm_config.SUPPORTED_DVMS:
-            if dvm.KIND == event.kind():
-                return dvm.TASK
-        return "unknown type"
+
+        #  TODO if a task can consist of multiple inputs add them here
+        #  else if kind is supported, simply return task
+        else:
+
+            for dvm in dvm_config.SUPPORTED_DVMS:
+                if dvm.KIND == event.kind():
+                    return dvm.TASK
+    except Exception as e:
+        print("Get task: " + str(e))
+
+    return "unknown type"


-def is_input_supported__generic(tags, client, dvm_config) -> bool:
-      for tag in tags:
+def is_input_supported_generic(tags, client, dvm_config) -> bool:
+    try:
+        for tag in tags:
            if tag.as_vec()[0] == 'i':
                if len(tag.as_vec()) < 3:
                    print("Job Event missing/malformed i tag, skipping..")
                    return False
-            else:
-                input_value = tag.as_vec()[1]
-                input_type = tag.as_vec()[2]
+                else:
+                    input_value = tag.as_vec()[1]
+                    input_type = tag.as_vec()[2]
+                    if input_type == "event":
+                        evt = get_event_by_id(input_value, client=client, config=dvm_config)
+                        if evt is None:
+                            print("Event not found")
+                            return False
+                    # TODO check_url_is_readable might be more relevant per task in the future
+                    # if input_type == 'url' and check_url_is_readable(input_value) is None:
+                    #    print("Url not readable / supported")
+                    #    return False

-                if input_type == "event":
-                    evt = get_event_by_id(input_value, client=client, config=dvm_config)
-                    if evt is None:
-                        print("Event not found")
-
-      return True
+        return True
+    except Exception as e:
+        print("Generic input check: " + str(e))


-
-def check_task_is_supported(event: Event, client, get_duration=False, config=None):
+def check_task_is_supported(event: Event, client, config=None):
    try:
        dvm_config = config
-        input_value = ""
-        input_type = ""
-        duration = 1
        task = get_task(event, client=client, dvm_config=dvm_config)
-
-        if not is_input_supported__generic(event.tags(), client, dvm_config):
-            return False, "", 0
-
-
-        for tag in event.tags():
-            if tag.as_vec()[0] == 'i':
-                input_value = tag.as_vec()[1]
-                input_type = tag.as_vec()[2]
-                if input_type == 'url' and check_url_is_readable(input_value) is None:
-                    print("Url not readable / supported")
-                    return False, task, duration  #
-
-            elif tag.as_vec()[0] == 'output':
-                # TODO move this to individual modules
-                output = tag.as_vec()[1]
-                if not (output == "text/plain"
-                        or output == "text/json" or output == "json"
-                        or output == "image/png" or "image/jpg"
-                        or output == "image/png;format=url" or output == "image/jpg;format=url"
-                        or output == ""):
-                    print("Output format not supported, skipping..")
-                    return False, "", 0
-
        if task not in (x.TASK for x in dvm_config.SUPPORTED_DVMS):
-            return False, task, duration
+            return False, task

+        if not is_input_supported_generic(event.tags(), client, dvm_config):
+            return False, ""
        for dvm in dvm_config.SUPPORTED_DVMS:
            if dvm.TASK == task:
                if not dvm.is_input_supported(event.tags()):
-                    return False, task, duration
+                    return False, task

-        return True, task, duration
+        return True, task


    except Exception as e:
@ -121,30 +116,40 @@ def check_task_is_supported(event: Event, client, get_duration=False, config=Non
 def check_url_is_readable(url):
    if not str(url).startswith("http"):
        return None
-    # If link is comaptible with one of these file formats, move on.
-    req = requests.get(url)
-    content_type = req.headers['content-type']
-    if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(url).endswith(
-            ".mp3") or content_type == 'audio/ogg' or str(url).endswith(".ogg"):
-        return "audio"
-    elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(url).endswith(
-            ".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or content_type == 'image/png' or
-          str(url).endswith(".png")):
-        return "image"
-    elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(url).endswith(
-            ".avi") or content_type == 'video/mov' or str(url).endswith(".mov"):
-        return "video"
-    elif (str(url)).endswith(".pdf"):
-        return "pdf"
+
+    source = check_source_type(url)
+    type = media_source(source)
+
+    if type == "url":
+        # If link is comaptible with one of these file formats, move on.
+        req = requests.get(url)
+        content_type = req.headers['content-type']
+        if content_type == 'audio/x-wav' or str(url).endswith(".wav") or content_type == 'audio/mpeg' or str(url).endswith(
+                ".mp3") or content_type == 'audio/ogg' or str(url).endswith(".ogg"):
+            return "audio"
+        elif (content_type == 'image/png' or str(url).endswith(".png") or content_type == 'image/jpg' or str(url).endswith(
+                ".jpg") or content_type == 'image/jpeg' or str(url).endswith(".jpeg") or content_type == 'image/png' or
+              str(url).endswith(".png")):
+            return "image"
+        elif content_type == 'video/mp4' or str(url).endswith(".mp4") or content_type == 'video/avi' or str(url).endswith(
+                ".avi") or content_type == 'video/mov' or str(url).endswith(".mov"):
+            return "video"
+        elif (str(url)).endswith(".pdf"):
+            return "pdf"
+    else:
+        return type

    # Otherwise we will not offer to do the job.
    return None


 def get_amount_per_task(task, dvm_config, duration=1):
+    #  duration is either static 1 (for images etc) or in seconds
+    if duration == 0:
+        duration = 1
    for dvm in dvm_config.SUPPORTED_DVMS:  # this is currently just one
        if dvm.TASK == task:
-            amount = dvm.COST * duration
+            amount = dvm.COST * int(duration)
            return amount
    else:
        print("[" + dvm_config.SUPPORTED_DVMS[
--- a/utils/mediasource_utils.py
+++ b/utils/mediasource_utils.py
@ -0,0 +1,330 @@
+import os
+import urllib
+from datetime import time
+from urllib.parse import urlparse
+import ffmpegio
+from decord import AudioReader, cpu
+import requests
+from utils.nostr_utils import get_event_by_id
+
+
+def input_data_file_duration(event, dvm_config, client, start=0, end=0):
+    input_value = ""
+    input_type = "url"
+    for tag in event.tags():
+        if tag.as_vec()[0] == 'i':
+            input_value = tag.as_vec()[1]
+            input_type = tag.as_vec()[2]
+
+    if input_type == "event":  # NIP94 event
+        evt = get_event_by_id(input_value, client=client, config=dvm_config)
+        if evt is not None:
+            input_value, input_type = check_nip94_event_for_media(evt, input_value, input_type)
+
+
+    if input_type == "url":
+        source_type = check_source_type(input_value)
+
+        filename, start, end, type = get_file_start_end_type(input_value, source_type, start, end)
+        if type != "audio" and type != "video":
+            return 1
+        if filename == "" or filename is None:
+            return 0
+        try:
+            file_reader = AudioReader(filename, ctx=cpu(0), mono=False)
+            duration = float(file_reader.duration())
+        except Exception as e:
+            print(e)
+            return 0
+        print("Original Duration of the Media file: " + str(duration))
+        start_time, end_time, new_duration = (
+            convert_media_length(start, end, duration))
+        print("New Duration of the Media file: " + str(new_duration))
+        return new_duration
+
+    return 1
+
+
+
+
+def organize_input_data(input_value, input_type, start, end, dvm_config, client, process=True) -> str:
+    if input_type == "event":  # NIP94 event
+        evt = get_event_by_id(input_value, client=client, config=dvm_config)
+        if evt is not None:
+            input_value, input_type = check_nip94_event_for_media(evt, input_value, input_type)
+
+    if input_type == "url":
+        source_type = check_source_type(input_value)
+        filename, start, end, type = get_file_start_end_type(input_value, source_type, start, end)
+        if filename == "" or filename is None:
+            return ""
+        try:
+            file_reader = AudioReader(filename, ctx=cpu(0), mono=False)
+            duration = float(file_reader.duration())
+        except Exception as e:
+            print(e)
+            return ""
+
+        print("Original Duration of the Media file: " + str(duration))
+        start_time, end_time, new_duration = (
+            convert_media_length(start, end, duration))
+        print("New Duration of the Media file: " + str(new_duration))
+
+
+        # TODO if already in a working format and time is 0 0, dont convert
+        print("Converting from " + str(start_time) + " until " + str(end_time))
+        # for now, we cut and convert all files to mp3
+        final_filename = '.\\outputs\\audio.mp3'
+        print(final_filename)
+        fs, x = ffmpegio.audio.read(filename, ss=start_time, to=end_time, sample_fmt='dbl', ac=1)
+        ffmpegio.audio.write(final_filename, fs, x, overwrite=True)
+        return final_filename
+
+def check_nip94_event_for_media(evt, input_value, input_type):
+    # Parse NIP94 event for url, if found, use it.
+    if evt.kind() == 1063:
+        for tag in evt.tags():
+            if tag.as_vec()[0] == 'url':
+                input_type = "url"
+                input_value = tag.as_vec()[1]
+                return input_value, input_type
+
+    return input_value, input_type
+
+def convert_media_length(start: float, end: float, duration: float):
+    if end == 0.0:
+        end_time = duration
+    elif end > duration:
+        end_time = duration
+    else:
+        end_time = end
+    if start <= 0.0 or start > end_time:
+        start_time = 0.0
+    else:
+        start_time = start
+    dur = end_time - start_time
+    return start_time, end_time, dur
+
+
+def get_file_start_end_type(url, source_type, start, end) -> (str, str):
+    #  Overcast
+    if source_type == "overcast":
+        name, start, end = get_overcast(url, start, end)
+        return name, start, end, "audio"
+    #  Youtube
+    elif source_type == "youtube":
+        audio_only = True
+
+        name, start, end = get_youtube(url, start, end, audio_only)
+
+        return name, start, end, "audio"
+    #  Xitter
+    elif source_type == "xitter":
+        name, start, end = get_Twitter(url, start, end)
+        return name, start, end, "video"
+    #  Tiktok
+    elif source_type == "tiktok":
+        name, start, end = get_TikTok(url, start, end)
+        return name, start, end, "video"
+    #  Instagram
+    elif source_type == "instagram":
+        name, start, end = get_Instagram(url, start, end)
+        if name.endswith("jpg"):
+            type = "image"
+        else:
+            type = "video"
+        return name, start, end, type
+    #  A file link
+    else:
+        filename, filetype = get_media_link(url)
+        return filename, start, end, filetype
+
+
+def media_source(source_type):
+    if source_type == "overcast":
+        return "audio"
+    elif source_type == "youtube":
+        return "audio"
+    elif source_type == "xitter":
+        return "video"
+    elif source_type == "tiktok":
+        return "video"
+    elif source_type == "instagram":
+        return "video"
+    else:
+        return "url"
+
+
+def check_source_type(url):
+    if str(url).startswith("https://overcast.fm/"):
+        return "overcast"
+    elif str(url).replace("http://", "").replace("https://", "").replace(
+            "www.", "").replace("youtu.be/", "youtube.com?v=")[0:11] == "youtube.com":
+        return "youtube"
+    elif str(url).startswith("https://x.com") or str(url).startswith("https://twitter.com"):
+        return "xitter"
+    elif str(url).startswith("https://vm.tiktok.com") or str(url).startswith(
+            "https://www.tiktok.com") or str(url).startswith("https://m.tiktok.com"):
+        return "tiktok"
+    elif str(url).startswith("https://www.instagram.com") or str(url).startswith(
+            "https://instagram.com"):
+        return "instagram"
+    else:
+        return "url"
+
+
+def get_overcast(input_value, start, end):
+    filename = '.\\outputs\\' + ".originalaudio.mp3"
+    print("Found overcast.fm Link.. downloading")
+    start_time = start
+    end_time = end
+    downloadOvercast(input_value, filename)
+    finaltag = str(input_value).replace("https://overcast.fm/", "").split('/')
+    if start == 0.0:
+        if len(finaltag) > 1:
+            t = time.strptime(finaltag[1], "%H:%M:%S")
+            seconds = t.tm_hour * 60 * 60 + t.tm_min * 60 + t.tm_sec
+            start_time = float(seconds)
+            print("Setting start time automatically to " + str(start_time))
+            if end > 0.0:
+                end_time = float(seconds + end)
+                print("Moving end time automatically to " + str(end_time))
+
+    return filename, start_time, end_time
+
+
+def get_TikTok(input_value, start, end):
+    filepath = '.\\outputs\\'
+    try:
+        filename = downloadTikTok(input_value, filepath)
+        print(filename)
+    except Exception as e:
+        print(e)
+        return "", start, end
+    return filename, start, end
+
+
+def get_Instagram(input_value, start, end):
+    filepath = '.\\outputs\\'
+    try:
+        filename = downloadInstagram(input_value, filepath)
+        print(filename)
+    except Exception as e:
+        print(e)
+        return "", start, end
+    return filename, start, end
+
+
+def get_Twitter(input_value, start, end):
+    filepath = '.\\outputs\\'
+    cleanlink = str(input_value).replace("twitter.com", "x.com")
+    try:
+        filename = downloadTwitter(cleanlink, filepath)
+        print(filename)
+    except Exception as e:
+        print(e)
+        return "", start, end
+    return filename, start, end
+
+
+def get_youtube(input_value, start, end, audioonly=True):
+    filepath = '.\\outputs\\'
+    filename = ""
+    try:
+        filename = downloadYouTube(input_value, filepath, audioonly)
+
+    except Exception as e:
+        print("Youtube" + str(e))
+        return filename, start, end
+    try:
+        o = urlparse(input_value)
+        q = urllib.parse.parse_qs(o.query)
+        if start == 0.0:
+            if o.query.find('?t=') != -1:
+                start = q['t'][0]  # overwrite from link.. why not..
+                print("Setting start time automatically to " + start)
+                if end > 0.0:
+                    end = float(q['t'][0]) + end
+                    print("Moving end time automatically to " + str(end))
+
+    except Exception as e:
+        print(e)
+        return filename, start, end
+
+    return filename, start, end
+
+
+def get_media_link(url) -> (str, str):
+    req = requests.get(url)
+    content_type = req.headers['content-type']
+    print(content_type)
+    if content_type == 'audio/x-wav' or str(url).lower().endswith(".wav"):
+        ext = "wav"
+        file_type = "audio"
+        with open('.\\outputs\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+    elif content_type == 'audio/mpeg' or str(url).lower().endswith(".mp3"):
+        ext = "mp3"
+        file_type = "audio"
+        with open('.\\outputs\\file.' + '\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+    elif content_type == 'audio/ogg' or str(url).lower().endswith(".ogg"):
+        ext = "ogg"
+        file_type = "audio"
+        with open('.\\outputs\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+    elif content_type == 'video/mp4' or str(url).lower().endswith(".mp4"):
+        ext = "mp4"
+        file_type = "video"
+        with open('.\\outputs\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+    elif content_type == 'video/avi' or str(url).lower().endswith(".avi"):
+        ext = "avi"
+        file_type = "video"
+        with open('.\\outputs\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+    elif content_type == 'video/quicktime' or str(url).lower().endswith(".mov"):
+        ext = "mov"
+        file_type = "video"
+        with open('.\\outputs\\file.' + ext, 'wb') as fd:
+            fd.write(req.content)
+        return '.\\outputs\\file.' + ext, file_type
+
+    else:
+        print(str(url).lower())
+        return None, None
+
+
+def downloadOvercast(source_url, target_location):
+    from utils.scrapper.media_scrapper import OvercastDownload
+    result = OvercastDownload(source_url, target_location)
+    return result
+
+
+def downloadTwitter(videourl, path):
+    from utils.scrapper.media_scrapper import XitterDownload
+    result = XitterDownload(videourl, path + "x.mp4")
+    return result
+
+
+def downloadTikTok(videourl, path):
+    from utils.scrapper.media_scrapper import TiktokDownloadAll
+    result = TiktokDownloadAll([videourl], path)
+    return result
+
+
+def downloadInstagram(videourl, path):
+    from utils.scrapper.media_scrapper import InstagramDownload
+    result = InstagramDownload(videourl, "insta", path)
+    return result
+
+
+def downloadYouTube(link, path, audioonly=True):
+    from utils.scrapper.media_scrapper import YouTubeDownload
+    result = YouTubeDownload(link, path, audio_only=True)
+    return result
--- a/utils/output_utils.py
+++ b/utils/output_utils.py
@ -17,82 +17,87 @@ Post process results to either given output format or a Nostr readable plain tex
 def post_process_result(anno, original_event):
    print("Post-processing...")
    if isinstance(anno, pandas.DataFrame):  # if input is an anno we parse it to required output format
-        for tag in original_event.tags:
+        print("Pandas Dataframe...")
+        has_output_tag = False
+        output_format = "text/plain"
+
+        for tag in original_event.tags():
            if tag.as_vec()[0] == "output":
                output_format = tag.as_vec()[1]
-                print("requested output is " + str(tag.as_vec()[1]) + "...")
-                try:
-                    if output_format == "text/plain":
-                        result = ""
-                        for each_row in anno['name']:
-                            if each_row is not None:
-                                for i in str(each_row).split('\n'):
-                                    result = result + i + "\n"
-                        result = replace_broken_words(
-                            str(result).replace("\"", "").replace('[', "").replace(']',
-                                                                                   "").lstrip(None))
-                        return result
+                has_output_tag = True
+                print("requested output is " + str(output_format) + "...")

-                    elif output_format == "text/vtt":
-                        print(str(anno))
-                        result = "WEBVTT\n\n"
-                        for element in anno:
-                            name = element["name"]  # name
-                            start = float(element["from"])
-                            convertstart = str(datetime.timedelta(seconds=start))
-                            end = float(element["to"])
-                            convertend = str(datetime.timedelta(seconds=end))
-                            print(str(convertstart) + " --> " + str(convertend))
-                            cleared_name = str(name).lstrip("\'").rstrip("\'")
-                            result = result + str(convertstart) + " --> " + str(
-                                convertend) + "\n" + cleared_name + "\n\n"
-                        result = replace_broken_words(
-                            str(result).replace("\"", "").replace('[', "").replace(']',
-                                                                                   "").lstrip(None))
-                        return result
-
-                    elif output_format == "text/json" or output_format == "json":
-                        # result = json.dumps(json.loads(anno.data.to_json(orient="records")))
-                        result = replace_broken_words(json.dumps(anno.data.tolist()))
-                        return result
-                    # TODO add more
-                    else:
-                        result = ""
-                        for element in anno.data:
-                            element["name"] = str(element["name"]).lstrip()
-                            element["from"] = (format(float(element["from"]), '.2f')).lstrip()  # name
-                            element["to"] = (format(float(element["to"]), '.2f')).lstrip()  # name
-                            result = result + "(" + str(element["from"]) + "," + str(element["to"]) + ")" + " " + str(
-                                element["name"]) + "\n"
-
-                        print(result)
-                        result = replace_broken_words(result)
-                        return result
-
-                except Exception as e:
-                    print(e)
-                    result = replace_broken_words(str(anno.data))
+        if has_output_tag:
+            print("Output Tag found: " + output_format)
+            try:
+                if output_format == "text/plain":
+                    result = pandas_to_plaintext(anno)
+                    result = replace_broken_words(
+                    str(result).replace("\"", "").replace('[', "").replace(']',
+                                                                               "").lstrip(None))
                    return result

-        else:
-            result = ""
-            for element in anno.data:
-                element["name"] = str(element["name"]).lstrip()
-                element["from"] = (format(float(element["from"]), '.2f')).lstrip()  # name
-                element["to"] = (format(float(element["to"]), '.2f')).lstrip()  # name
-                result = result + "(" + str(element["from"]) + "," + str(element["to"]) + ")" + " " + str(
-                    element["name"]) + "\n"
+                elif output_format == "text/vtt":
+                    print(str(anno))
+                    result = "WEBVTT\n\n"
+                    for element in anno:
+                        name = element["name"]  # name
+                        start = float(element["from"])
+                        convertstart = str(datetime.timedelta(seconds=start))
+                        end = float(element["to"])
+                        convertend = str(datetime.timedelta(seconds=end))
+                        print(str(convertstart) + " --> " + str(convertend))
+                        cleared_name = str(name).lstrip("\'").rstrip("\'")
+                        result = result + str(convertstart) + " --> " + str(
+                            convertend) + "\n" + cleared_name + "\n\n"
+                    result = replace_broken_words(
+                        str(result).replace("\"", "").replace('[', "").replace(']',
+                                                                               "").lstrip(None))
+                    return result

+                elif output_format == "text/json" or output_format == "json":
+                    # result = json.dumps(json.loads(anno.data.to_json(orient="records")))
+                    result = replace_broken_words(json.dumps(anno.data.tolist()))
+                    return result
+                # TODO add more
+                else:
+                    print("Pandas Dataframe but output tag not supported.. falling back to default..")
+                    result = pandas_to_plaintext(anno)
+                    print(result)
+                    result = str(result).replace("\"", "").replace('[', "").replace(']',
+                                                                                    "").lstrip(None)
+                    return result
+
+            except Exception as e:
+                print(e)
+                result = replace_broken_words(str(anno.data))
+                return result
+
+        else:
+            print("Pandas Dataframe but no output tag set.. falling back to default..")
+            result = pandas_to_plaintext(anno)
            print(result)
-            result = replace_broken_words(result)
+            result = str(result).replace("\"", "").replace('[', "").replace(']',
+                                                                               "").lstrip(None)
            return result
    elif isinstance(anno, NoneType):
        return "An error occurred"
    else:
+        print("Nonetype")
        result = replace_broken_words(anno)  # TODO
        return result


+def pandas_to_plaintext(anno):
+    result = ""
+    for each_row in anno['name']:
+        if each_row is not None:
+            for i in str(each_row).split('\n'):
+                result = result + i + "\n"
+    return result
+
+
+
 '''
 Convenience function to replace words like Noster with Nostr
 '''
--- a/utils/scrapper/media_scrapper.py
+++ b/utils/scrapper/media_scrapper.py
@ -0,0 +1,599 @@
+import json
+import os
+import re
+import sys
+import urllib.parse
+from typing import Any
+from urllib.request import urlopen, Request
+
+import requests
+import instaloader
+from pytube import YouTube
+
+
+def XitterDownload(source_url, target_location):
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    request_details_file = f"{script_dir}{os.sep}request_details.json"
+    request_details = json.load(open(request_details_file, "r"))  # test
+    features, variables = request_details["features"], request_details["variables"]
+
+    def get_tokens(tweet_url):
+        html = requests.get(tweet_url)
+
+        assert (
+                html.status_code == 200
+        ), f"Failed to get tweet page.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Status code: {html.status_code}.  Tweet url: {tweet_url}"
+
+        mainjs_url = re.findall(
+            r"https://abs.twimg.com/responsive-web/client-web-legacy/main.[^\.]+.js",
+            html.text,
+        )
+
+        assert (
+                mainjs_url is not None and len(mainjs_url) > 0
+        ), f"Failed to find main.js file.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Tweet url: {tweet_url}"
+
+        mainjs_url = mainjs_url[0]
+
+        mainjs = requests.get(mainjs_url)
+
+        assert (
+                mainjs.status_code == 200
+        ), f"Failed to get main.js file.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Status code: {mainjs.status_code}.  Tweet url: {tweet_url}"
+
+        bearer_token = re.findall(r'AAAAAAAAA[^"]+', mainjs.text)
+
+        assert (
+                bearer_token is not None and len(bearer_token) > 0
+        ), f"Failed to find bearer token.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Tweet url: {tweet_url}, main.js url: {mainjs_url}"
+
+        bearer_token = bearer_token[0]
+
+        # get the guest token
+        with requests.Session() as s:
+            s.headers.update(
+                {
+                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0",
+                    "accept": "*/*",
+                    "accept-language": "de,en-US;q=0.7,en;q=0.3",
+                    "accept-encoding": "gzip, deflate, br",
+                    "te": "trailers",
+                }
+            )
+
+            s.headers.update({"authorization": f"Bearer {bearer_token}"})
+
+            # activate bearer token and get guest token
+            guest_token = s.post("https://api.twitter.com/1.1/guest/activate.json").json()[
+                "guest_token"
+            ]
+
+        assert (
+                guest_token is not None
+        ), f"Failed to find guest token.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Tweet url: {tweet_url}, main.js url: {mainjs_url}"
+
+        return bearer_token, guest_token
+
+    def get_details_url(tweet_id, features, variables):
+        # create a copy of variables - we don't want to modify the original
+        variables = {**variables}
+        variables["tweetId"] = tweet_id
+
+        return f"https://twitter.com/i/api/graphql/0hWvDhmW8YQ-S_ib3azIrw/TweetResultByRestId?variables={urllib.parse.quote(json.dumps(variables))}&features={urllib.parse.quote(json.dumps(features))}"
+
+    def get_tweet_details(tweet_url, guest_token, bearer_token):
+        tweet_id = re.findall(r"(?<=status/)\d+", tweet_url)
+        assert (
+                tweet_id is not None and len(tweet_id) == 1
+        ), f"Could not parse tweet id from your url.  Make sure you are using the correct url.  If you are, then file a GitHub issue and copy and paste this message.  Tweet url: {tweet_url}"
+
+        tweet_id = tweet_id[0]
+
+        # the url needs a url encoded version of variables and features as a query string
+        url = get_details_url(tweet_id, features, variables)
+
+        details = requests.get(
+            url,
+            headers={
+                "authorization": f"Bearer {bearer_token}",
+                "x-guest-token": guest_token,
+            },
+        )
+
+        max_retries = 10
+        cur_retry = 0
+        while details.status_code == 400 and cur_retry < max_retries:
+            try:
+                error_json = json.loads(details.text)
+            except json.JSONDecodeError:
+                assert (
+                    False
+                ), f"Failed to parse json from details error. details text: {details.text}  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Status code: {details.status_code}.  Tweet url: {tweet_url}"
+
+            assert (
+                    "errors" in error_json
+            ), f"Failed to find errors in details error json.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Status code: {details.status_code}.  Tweet url: {tweet_url}"
+
+            needed_variable_pattern = re.compile(r"Variable '([^']+)'")
+            needed_features_pattern = re.compile(
+                r'The following features cannot be null: ([^"]+)'
+            )
+
+            for error in error_json["errors"]:
+                needed_vars = needed_variable_pattern.findall(error["message"])
+                for needed_var in needed_vars:
+                    variables[needed_var] = True
+
+                needed_features = needed_features_pattern.findall(error["message"])
+                for nf in needed_features:
+                    for feature in nf.split(","):
+                        features[feature.strip()] = True
+
+            url = get_details_url(tweet_id, features, variables)
+
+            details = requests.get(
+                url,
+                headers={
+                    "authorization": f"Bearer {bearer_token}",
+                    "x-guest-token": guest_token,
+                },
+            )
+
+            cur_retry += 1
+
+            if details.status_code == 200:
+                # save new variables
+                request_details["variables"] = variables
+                request_details["features"] = features
+
+                with open(request_details_file, "w") as f:
+                    json.dump(request_details, f, indent=4)
+
+        assert (
+                details.status_code == 200
+        ), f"Failed to get tweet details.  If you are using the correct Twitter URL this suggests a bug in the script.  Please open a GitHub issue and copy and paste this message.  Status code: {details.status_code}.  Tweet url: {tweet_url}"
+
+        return details
+
+    def get_tweet_status_id(tweet_url):
+        sid_patern = r"https://x\.com/[^/]+/status/(\d+)"
+        if tweet_url[len(tweet_url) - 1] != "/":
+            tweet_url = tweet_url + "/"
+
+        match = re.findall(sid_patern, tweet_url)
+        if len(match) == 0:
+            print("error, could not get status id from this tweet url :", tweet_url)
+            exit()
+        status_id = match[0]
+        return status_id
+
+    def get_associated_media_id(j, tweet_url):
+        sid = get_tweet_status_id(tweet_url)
+        pattern = (
+                r'"expanded_url"\s*:\s*"https://x\.com/[^/]+/status/'
+                + sid
+                + '/[^"]+",\s*"id_str"\s*:\s*"\d+",'
+        )
+        matches = re.findall(pattern, j)
+        if len(matches) > 0:
+            target = matches[0]
+            target = target[0: len(target) - 1]  # remove the coma at the end
+            return json.loads("{" + target + "}")["id_str"]
+        return None
+
+    def extract_mp4s(j, tweet_url, target_all_mp4s=False):
+        # pattern looks like https://video.twimg.com/amplify_video/1638969830442237953/vid/1080x1920/lXSFa54mAVp7KHim.mp4?tag=16 or https://video.twimg.com/ext_tw_video/1451958820348080133/pu/vid/720x1280/GddnMJ7KszCQQFvA.mp4?tag=12
+        amplitude_pattern = re.compile(
+            r"(https://video.twimg.com/amplify_video/(\d+)/vid/(\d+x\d+)/[^.]+.mp4\?tag=\d+)"
+        )
+        ext_tw_pattern = re.compile(
+            r"(https://video.twimg.com/ext_tw_video/(\d+)/pu/vid/(avc1/)?(\d+x\d+)/[^.]+.mp4\?tag=\d+)"
+        )
+        # format - https://video.twimg.com/tweet_video/Fvh6brqWAAQhU9p.mp4
+        tweet_video_pattern = re.compile(r'https://video.twimg.com/tweet_video/[^"]+')
+
+        # https://video.twimg.com/ext_tw_video/1451958820348080133/pu/pl/b-CiC-gZClIwXgDz.m3u8?tag=12&container=fmp4
+        container_pattern = re.compile(r'https://video.twimg.com/[^"]*container=fmp4')
+        media_id = get_associated_media_id(j, tweet_url)
+        # find all the matches
+        matches = amplitude_pattern.findall(j)
+        matches += ext_tw_pattern.findall(j)
+        container_matches = container_pattern.findall(j)
+
+        tweet_video_matches = tweet_video_pattern.findall(j)
+
+        if len(matches) == 0 and len(tweet_video_matches) > 0:
+            return tweet_video_matches
+
+        results = {}
+
+        for match in matches:
+            url, tweet_id, _, resolution = match
+            if tweet_id not in results:
+                results[tweet_id] = {"resolution": resolution, "url": url}
+            else:
+                # if we already have a higher resolution video, then don't overwrite it
+                my_dims = [int(x) for x in resolution.split("x")]
+                their_dims = [int(x) for x in results[tweet_id]["resolution"].split("x")]
+
+                if my_dims[0] * my_dims[1] > their_dims[0] * their_dims[1]:
+                    results[tweet_id] = {"resolution": resolution, "url": url}
+
+        if media_id:
+            all_urls = []
+            for twid in results:
+                all_urls.append(results[twid]["url"])
+            all_urls += container_matches
+
+            url_with_media_id = []
+            for url in all_urls:
+                if url.__contains__(media_id):
+                    url_with_media_id.append(url)
+
+            if len(url_with_media_id) > 0:
+                return url_with_media_id
+
+        if len(container_matches) > 0 and not target_all_mp4s:
+            return container_matches
+
+        if target_all_mp4s:
+            urls = [x["url"] for x in results.values()]
+            urls += container_matches
+            return urls
+
+        return [x["url"] for x in results.values()]
+
+    def download_parts(url, output_filename):
+        resp = requests.get(url, stream=True)
+
+        # container begins with / ends with fmp4 and has a resolution in it we want to capture
+        pattern = re.compile(r"(/[^\n]*/(\d+x\d+)/[^\n]*container=fmp4)")
+
+        matches = pattern.findall(resp.text)
+
+        max_res = 0
+        max_res_url = None
+
+        for match in matches:
+            url, resolution = match
+            width, height = resolution.split("x")
+            res = int(width) * int(height)
+            if res > max_res:
+                max_res = res
+                max_res_url = url
+
+        assert (
+                max_res_url is not None
+        ), f"Could not find a url to download from.  Make sure you are using the correct url.  If you are, then file a GitHub issue and copy and paste this message.  Tweet url: {url}"
+
+        video_part_prefix = "https://video.twimg.com"
+
+        resp = requests.get(video_part_prefix + max_res_url, stream=True)
+
+        mp4_pattern = re.compile(r"(/[^\n]*\.mp4)")
+        mp4_parts = mp4_pattern.findall(resp.text)
+
+        assert (
+                len(mp4_parts) == 1
+        ), f"There should be exactly 1 mp4 container at this point.  Instead, found {len(mp4_parts)}.  Please open a GitHub issue and copy and paste this message into it.  Tweet url: {url}"
+
+        mp4_url = video_part_prefix + mp4_parts[0]
+
+        m4s_part_pattern = re.compile(r"(/[^\n]*\.m4s)")
+        m4s_parts = m4s_part_pattern.findall(resp.text)
+
+        with open(output_filename, "wb") as f:
+            r = requests.get(mp4_url, stream=True)
+            for chunk in r.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+                    f.flush()
+
+            for part in m4s_parts:
+                part_url = video_part_prefix + part
+                r = requests.get(part_url, stream=True)
+                for chunk in r.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+                        f.flush()
+
+        return True
+
+    def repost_check(j, exclude_replies=True):
+        try:
+            # This line extract the index of the first reply
+            reply_index = j.index('"conversationthread-')
+        except ValueError:
+            # If there are no replies we use the enrire response data length
+            reply_index = len(j)
+        # We truncate the response data to exclude replies
+        if exclude_replies:
+            j = j[0:reply_index]
+
+        # We use this regular expression to extract the source status
+        source_status_pattern = r'"source_status_id_str"\s*:\s*"\d+"'
+        matches = re.findall(source_status_pattern, j)
+
+        if len(matches) > 0 and exclude_replies:
+            # We extract the source status id (ssid)
+            ssid = json.loads("{" + matches[0] + "}")["source_status_id_str"]
+            # We plug it in this regular expression to find expanded_url (the original tweet url)
+            expanded_url_pattern = (
+                    r'"expanded_url"\s*:\s*"https://x\.com/[^/]+/status/' + ssid + '[^"]+"'
+            )
+            matches2 = re.findall(expanded_url_pattern, j)
+
+            if len(matches2) > 0:
+                # We extract the url and return it
+                status_url = json.loads("{" + matches2[0] + "}")["expanded_url"]
+                return status_url
+
+        if not exclude_replies:
+            # If we include replies we'll have to get all ssids and remove duplicates
+            ssids = []
+            for match in matches:
+                ssids.append(json.loads("{" + match + "}")["source_status_id_str"])
+            # we remove duplicates (this line is messy but it's the easiest way to do it)
+            ssids = list(set(ssids))
+            if len(ssids) > 0:
+                for ssid in ssids:
+                    expanded_url_pattern = (
+                            r'"expanded_url"\s*:\s*"https://x\.com/[^/]+/status/'
+                            + ssid
+                            + '[^"]+"'
+                    )
+                    matches2 = re.findall(expanded_url_pattern, j)
+                    if len(matches2) > 0:
+                        status_urls = []
+                        for match in matches2:
+                            status_urls.append(
+                                json.loads("{" + match + "}")["expanded_url"]
+                            )
+                        # We remove duplicates another time
+                        status_urls = list(set(status_urls))
+                        return status_urls
+
+        # If we don't find source_status_id_str, the tweet doesn't feature a reposted video
+        return None
+
+    def download_video_from_x(tweet_url, output_file, target_all_videos=False):
+        bearer_token, guest_token = get_tokens(tweet_url)
+        resp = get_tweet_details(tweet_url, guest_token, bearer_token)
+        mp4s = extract_mp4s(resp.text, tweet_url, target_all_videos)
+        if target_all_videos:
+            video_counter = 1
+            original_urls = repost_check(resp.text, exclude_replies=False)
+
+            if len(original_urls) > 0:
+                for url in original_urls:
+                    download_video_from_x(
+                        url, output_file.replace(".mp4", f"_{video_counter}.mp4")
+                    )
+                    video_counter += 1
+                if len(mp4s) > 0:
+                    for mp4 in mp4s:
+                        output_file = output_file.replace(".mp4", f"_{video_counter}.mp4")
+                        if "container" in mp4:
+                            download_parts(mp4, output_file)
+
+                        else:
+                            r = requests.get(mp4, stream=True)
+                            with open(output_file, "wb") as f:
+                                for chunk in r.iter_content(chunk_size=1024):
+                                    if chunk:
+                                        f.write(chunk)
+                                        f.flush()
+                        video_counter += 1
+        else:
+            original_url = repost_check(resp.text)
+
+            if original_url:
+                download_video_from_x(original_url, output_file)
+            else:
+                assert (
+                        len(mp4s) > 0
+                ), f"Could not find any mp4s to download.  Make sure you are using the correct url.  If you are, then file a GitHub issue and copy and paste this message.  Tweet url: {tweet_url}"
+
+                mp4 = mp4s[0]
+                if "container" in mp4:
+                    download_parts(mp4, output_file)
+                else:
+                    # use a stream to download the file
+                    r = requests.get(mp4, stream=True)
+                    with open(output_file, "wb") as f:
+                        for chunk in r.iter_content(chunk_size=1024):
+                            if chunk:
+                                f.write(chunk)
+                                f.flush()
+        return target_location
+
+    return download_video_from_x(source_url, target_location)
+
+
+# TIKTOK/INSTA
+def getDict() -> dict:
+    response = requests.get('https://ttdownloader.com/')
+    point = response.text.find('<input type="hidden" id="token" name="token" value="') + \
+            len('<input type="hidden" id="token" name="token" value="')
+    token = response.text[point:point + 64]
+    TTDict = {
+        'token': token,
+    }
+
+    for i in response.cookies:
+        TTDict[str(i).split()[1].split('=')[0].strip()] = str(
+            i).split()[1].split('=')[1].strip()
+    return TTDict
+
+
+def createHeader(parseDict) -> tuple[dict[str, Any], dict[str | Any, str | Any], dict[str, str | Any]]:
+    cookies = {
+        'PHPSESSID': parseDict['PHPSESSID'],
+        # 'popCookie': parseDict['popCookie'],
+    }
+    headers = {
+        'authority': 'ttdownloader.com',
+        'accept': '*/*',
+        'accept-language': 'en-US,en;q=0.9',
+        'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
+        'origin': 'https://ttdownloader.com',
+        'referer': 'https://ttdownloader.com/',
+        'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
+        'sec-ch-ua-mobile': '?0',
+        'sec-ch-ua-platform': '"Windows"',
+        'sec-fetch-dest': 'empty',
+        'sec-fetch-mode': 'cors',
+        'sec-fetch-site': 'same-origin',
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                      'Chrome/108.0.0.0 Safari/537.36',
+        'x-requested-with': 'XMLHttpRequest',
+    }
+    data = {
+        'url': '',
+        'format': '',
+        'token': parseDict['token'],
+    }
+    return cookies, headers, data
+
+
+def TikTokDownload(cookies, headers, data, name, path) -> str:
+    response = requests.post('https://ttdownloader.com/search/',
+                             cookies=cookies, headers=headers, data=data)
+    parsed_link = [i for i in str(response.text).split()
+                   if i.startswith("href=")][0]
+
+    response = requests.get(parsed_link[6:-10])
+    with open(path + "tiktok" + name + ".mp4", "wb") as f:
+        f.write(response.content)
+    return path + "tiktok" + name + ".mp4"
+
+
+def TiktokDownloadAll(linkList, path) -> str:
+    parseDict = getDict()
+    cookies, headers, data = createHeader(parseDict)
+    # linkList = getLinkDict()['tiktok']
+    for i in linkList:
+        try:
+            data['url'] = i
+            result = TikTokDownload(cookies, headers, data, str(linkList.index(i)), path)
+            return result
+        except IndexError:
+            parseDict = getDict()
+            cookies, headers, data = createHeader(parseDict)
+        except Exception as err:
+            print(err)
+            exit(1)
+
+
+def InstagramDownload(url, name, path) -> str:
+    obj = instaloader.Instaloader()
+    post = instaloader.Post.from_shortcode(obj.context, url.split("/")[-2])
+    photo_url = post.url
+    video_url = post.video_url
+    print(video_url)
+    if video_url:
+        response = requests.get(video_url)
+        with open(path + "insta" + name + ".mp4", "wb") as f:
+            f.write(response.content)
+            return path + "insta" + name + ".mp4"
+    elif photo_url:
+        response = requests.get(photo_url)
+        with open(path + "insta" + name + ".jpg", "wb") as f:
+            f.write(response.content)
+            return path + "insta" + name + ".jpg"
+
+
+def InstagramDownloadAll(linklist, path) -> str:
+    for i in linklist:
+        try:
+            print(str(linklist.index(i)))
+            print(str(linklist[i]))
+            result = InstagramDownload(i, str(linklist.index(i)), path)
+            return result
+        except Exception as err:
+            print(err)
+            exit(1)
+
+
+# YOUTUBE
+def YouTubeDownload(link, path, audio_only=True):
+    youtubeObject = YouTube(link)
+    if audio_only:
+        youtubeObject = youtubeObject.streams.get_audio_only()
+        youtubeObject.download(path, "yt.mp3")
+        print("Download is completed successfully")
+        return path + "yt.mp3"
+    else:
+        youtubeObject = youtubeObject.streams.get_highest_resolution()
+        youtubeObject.download(path, "yt.mp4")
+        print("Download is completed successfully")
+        return path + "yt.mp4"
+
+
+def checkYoutubeLinkValid(link):
+    try:
+        # TODO find a way to test without fully downloading the file
+        youtubeObject = YouTube(link)
+        youtubeObject = youtubeObject.streams.get_audio_only()
+        youtubeObject.download(".", "yt.mp3")
+        os.remove("yt.mp3")
+        return True
+
+    except Exception as e:
+        print(str(e))
+        return False
+
+
+# OVERCAST
+def OvercastDownload(source_url, target_location):
+    def get_title(html_str):
+        """Get the title from the meta tags"""
+
+        title = re.findall(r"<meta name=\"og:title\" content=\"(.+)\"", html_str)
+        if len(title) == 1:
+            return title[0].replace("&mdash;", "-")
+        return None
+
+    def get_description(html_str):
+        """Get the description from the Meta tag"""
+
+        desc_re = r"<meta name=\"og:description\" content=\"(.+)\""
+        description = re.findall(desc_re, html_str)
+        if len(description) == 1:
+            return description[0]
+        return None
+
+    def get_url(html_string):
+        """Find the URL from the <audio><source>.... tag"""
+
+        url = re.findall(r"<source src=\"(.+?)\"", html_string)
+        if len(url) == 1:
+            # strip off the last 4 characters to cater for the #t=0 in the URL
+            # which urlretrieve flags as invalid
+            return url[0][:-4]
+        return None
+
+    """Given a Overcast source URL fetch the file it points to"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
+                      "AppleWebKit/537.11 (KHTML, like Gecko) "
+                      "Chrome/23.0.1271.64 Safari/537.11",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
+        "Accept-Encoding": "none",
+        "Accept-Language": "en-US,en;q=0.8",
+        "Connection": "keep-alive",
+    }
+    req = Request(source_url, None, headers)
+    source_data = urlopen(req).read().decode('utf-8')
+    title = get_title(source_data)
+    url = get_url(source_data)
+
+    if url is None or title is None:
+        sys.exit("Could not find parse URL")
+    if not os.path.exists(target_location):
+        req = requests.get(url)
+        file = open(target_location, 'wb')
+        for chunk in req.iter_content(100000):
+            file.write(chunk)
+        file.close()
--- a/utils/scrapper/request_details.json
+++ b/utils/scrapper/request_details.json
@ -0,0 +1,40 @@
+{
+    "features": {
+        "responsive_web_graphql_exclude_directive_enabled": true,
+        "verified_phone_label_enabled": false,
+        "responsive_web_graphql_timeline_navigation_enabled": true,
+        "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
+        "tweetypie_unmention_optimization_enabled": true,
+        "vibe_api_enabled": false,
+        "responsive_web_edit_tweet_api_enabled": false,
+        "graphql_is_translatable_rweb_tweet_is_translatable_enabled": false,
+        "view_counts_everywhere_api_enabled": true,
+        "longform_notetweets_consumption_enabled": true,
+        "tweet_awards_web_tipping_enabled": false,
+        "freedom_of_speech_not_reach_fetch_enabled": false,
+        "standardized_nudges_misinfo": false,
+        "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
+        "interactive_text_enabled": false,
+        "responsive_web_twitter_blue_verified_badge_is_enabled": true,
+        "responsive_web_text_conversations_enabled": false,
+        "longform_notetweets_richtext_consumption_enabled": false,
+        "responsive_web_enhance_cards_enabled": false,
+        "longform_notetweets_inline_media_enabled": true,
+        "longform_notetweets_rich_text_read_enabled": true,
+        "responsive_web_media_download_video_enabled": true,
+        "responsive_web_twitter_article_tweet_consumption_enabled": true,
+        "creator_subscriptions_tweet_preview_api_enabled": true
+    },
+    "variables": {
+        "with_rux_injections": false,
+        "includePromotedContent": true,
+        "withCommunity": true,
+        "withQuickPromoteEligibilityTweetFields": true,
+        "withBirdwatchNotes": true,
+        "withDownvotePerspective": false,
+        "withReactionsMetadata": false,
+        "withReactionsPerspective": false,
+        "withVoice": true,
+        "withV2Timeline": true
+    }
+}
--- a/utils/zap_utils.py
+++ b/utils/zap_utils.py
@ -91,7 +91,11 @@ def create_bolt11_ln_bits(sats: int, config: DVMConfig) -> (str, str):
    try:
        res = requests.post(url, json=data, headers=headers)
        obj = json.loads(res.text)
-        return obj["payment_request"], obj["payment_hash"]
+        if obj.get("payment_request") and obj.get("payment_hash"):
+            return obj["payment_request"], obj["payment_hash"]#
+        else:
+            print(res.text)
+            return None, None
    except Exception as e:
        print("LNBITS: " + str(e))
        return None, None
@ -121,7 +125,10 @@ def check_bolt11_ln_bits_is_paid(payment_hash: str, config: DVMConfig):
    try:
        res = requests.get(url, headers=headers)
        obj = json.loads(res.text)
-        return obj["paid"]
+        if obj.get("paid"):
+            return obj["paid"]
+        else:
+            return False
    except Exception as e:
        return None

@ -133,7 +140,10 @@ def pay_bolt11_ln_bits(bolt11: str, config: DVMConfig):
    try:
        res = requests.post(url, json=data, headers=headers)
        obj = json.loads(res.text)
-        return obj["payment_hash"]
+        if obj.get("payment_hash"):
+            return obj["payment_hash"]
+        else:
+            return "Error"
    except Exception as e:
        print("LNBITS: " + str(e))
        return None, None