diff --git a/nostr_dvm/utils/mediasource_utils.py b/nostr_dvm/utils/mediasource_utils.py index 0d799c2..ffc146d 100644 --- a/nostr_dvm/utils/mediasource_utils.py +++ b/nostr_dvm/utils/mediasource_utils.py @@ -7,7 +7,7 @@ from decord import AudioReader, cpu import requests from nostr_dvm.utils.nostr_utils import get_event_by_id from nostr_dvm.utils.scrapper.media_scrapper import OvercastDownload, XitterDownload, TiktokDownloadAll, \ - InstagramDownload, YouTubeDownload + InstagramDownload, YouTubeDownload, XDownload def input_data_file_duration(event, dvm_config, client, start=0, end=0): @@ -344,7 +344,8 @@ def download_overcast(source_url, target_location): def download_twitter(videourl, path): - result = XitterDownload(videourl, path + "x.mp4") + result = XDownload(videourl, path + "x.mp4") + #result = XitterDownload(videourl, path + "x.mp4") return result diff --git a/nostr_dvm/utils/scrapper/media_scrapper.py b/nostr_dvm/utils/scrapper/media_scrapper.py index 9b7cb03..d656517 100644 --- a/nostr_dvm/utils/scrapper/media_scrapper.py +++ b/nostr_dvm/utils/scrapper/media_scrapper.py @@ -9,8 +9,18 @@ from urllib.request import urlopen, Request import requests import instaloader from pytube import YouTube +import sys +import os +import re + +import requests +import bs4 + +from tqdm import tqdm +from pathlib import Path +# Depricated, currently not functional def XitterDownload(source_url, target_location): script_dir = os.path.dirname(os.path.realpath(__file__)) request_details_file = f"{script_dir}{os.sep}request_details.json" @@ -450,6 +460,57 @@ def XitterDownload(source_url, target_location): return download_video_from_x(source_url, target_location) +def download_xvideo(url, target_location) -> None: + """Download a video from a URL into a filename. + + Args: + url (str): The video URL to download + file_name (str): The file name or path to save the video to. + """ + + response = requests.get(url, stream=True) + total_size = int(response.headers.get("content-length", 0)) + block_size = 1024 + progress_bar = tqdm(total=total_size, unit="B", unit_scale=True) + + #download_path = os.path.join(Path.home(), "Downloads", file_name) + download_path = target_location + with open(download_path, "wb") as file: + for data in response.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + + progress_bar.close() + print("Video downloaded successfully!") + + +def XDownload(url, filepath=""): + """Extract the highest quality video url to download into a file + + Args: + url (str): The twitter post URL to download from + """ + + api_url = f"https://twitsave.com/info?url={url}" + + response = requests.get(api_url) + data = bs4.BeautifulSoup(response.text, "html.parser") + download_button = data.find_all("div", class_="origin-top-right")[0] + quality_buttons = download_button.find_all("a") + highest_quality_url = quality_buttons[0].get("href") # Highest quality video url + + #file_name = data.find_all("div", class_="leading-tight")[0].find_all("p", class_="m-2")[0].text # Video file name + #file_name = re.sub(r"[^a-zA-Z0-9]+", ' ', file_name).strip() + ".mp4" # Remove special characters from file name + + download_xvideo(highest_quality_url, filepath) + return filepath + + + + + + + # TIKTOK/INSTA def getDict() -> dict: response = requests.get('https://ttdownloader.com/') diff --git a/setup.py b/setup.py index f2a9598..ec0b0a4 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -VERSION = '0.6.7' +VERSION = '0.6.8' DESCRIPTION = 'A framework to build and run Nostr NIP90 Data Vending Machines' LONG_DESCRIPTION = ('A framework to build and run Nostr NIP90 Data Vending Machines. See the github repository for more information') @@ -18,6 +18,7 @@ setup( "bech32==1.2.0", "networkx==3.3", "scipy==1.13.1", + "beautifulsoup4==4.12.3", "pycryptodome==3.20.0", "python-dotenv==1.0.0", "emoji==2.8.0", diff --git a/tests/discovery.py b/tests/discovery.py index 2de4cb4..67e54c9 100644 --- a/tests/discovery.py +++ b/tests/discovery.py @@ -46,7 +46,7 @@ AVOID_PAID_OUTBOX_RELAY_LIST = ["wss://nostrelay.yeghro.site", "wss://nostr.wine "wss://relay.nostriches.org", "wss://happytavern.co", "wss://onlynotes.lol", "wss://offchain.pub", "wss://purplepag.es", "wss://relay.plebstr.com", "wss://poster.place/relay", "wss://relayable.org", "wss://bbb.santos.lol", "wss://relay.bitheaven.social", "wss://theforest.nostr1.com", "wss://relay.nostrati.com", "wss://purplerelay.com", "wss://hist.nostr.land", "wss://creatr.nostr.wine", "ws://localhost:4869", - "wss://pyramid.fiatjaf.com", "wss://relay.nos.social", "wss://nostr.thank.eu" + "wss://pyramid.fiatjaf.com", "wss://relay.nos.social", "wss://nostr.thank.eu", "wss://inbox.nostr.wine" ] RECONCILE_DB_RELAY_LIST = ["wss://relay.damus.io", @@ -366,7 +366,6 @@ def build_example_top_zapped(name, identifier, admin_config, options, image, cos def playground(): - #DB Scheduler, do not announce, just use it to update the DB for the other DVMs. admin_config_db_scheduler= AdminConfig() options_animal = {