From fbedb7a73fdf9e5c84ec7b91fe54d9bb179b9da6 Mon Sep 17 00:00:00 2001 From: Believethehype <1097224+believethehype@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:30:30 +0100 Subject: [PATCH] run scrapper on browserless systems (where supported) --- nostr_dvm/utils/scrapper/media_scrapper.py | 76 +++++++++++++++------- setup.py | 2 +- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/nostr_dvm/utils/scrapper/media_scrapper.py b/nostr_dvm/utils/scrapper/media_scrapper.py index 5967c06..03f3f2d 100644 --- a/nostr_dvm/utils/scrapper/media_scrapper.py +++ b/nostr_dvm/utils/scrapper/media_scrapper.py @@ -11,6 +11,7 @@ import yt_dlp from tqdm import tqdm browser = "chrome" # "firefox" +usebrowser = False def download_xvideo(url, target_location) -> None: @@ -124,9 +125,13 @@ def YTDownload(link, path, audio_only=True): def get_media_duration(url): try: # ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions - ydl_opts = { - 'cookiesfrombrowser': (browser, None, None, None), - } + if usebrowser: + ydl_opts = { + 'cookiesfrombrowser': (browser, None, None, None), + } + else: + ydl_opts = {} + with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) @@ -139,9 +144,12 @@ def get_media_duration(url): def get_media_info(url): try: # ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions - ydl_opts = { - 'cookiesfrombrowser': (browser, None, None, None), - } + if usebrowser: + ydl_opts = { + 'cookiesfrombrowser': (browser, None, None, None), + } + else: + ydl_opts = {} with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) @@ -153,17 +161,29 @@ def get_media_info(url): def get_audio(URLS): try: - ydl_opts = { - 'cookiesfrombrowser': (browser, None, None, None), - 'format': 'm4a/bestaudio/best', - "outtmpl": 'outputs/audio', - 'overwrites': 'True', - # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments - 'postprocessors': [{ # Extract audio using ffmpeg - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - }] - } + if usebrowser: + ydl_opts = { + 'cookiesfrombrowser': (browser, None, None, None), + 'format': 'm4a/bestaudio/best', + "outtmpl": 'outputs/audio', + 'overwrites': 'True', + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + }] + } + else: + ydl_opts = { + 'format': 'm4a/bestaudio/best', + "outtmpl": 'outputs/audio', + 'overwrites': 'True', + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + }] + } with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) @@ -174,13 +194,21 @@ def get_audio(URLS): def get_video(URLS): try: - ydl_opts = { - 'cookiesfrombrowser': (browser, None, None, None), - 'format': 'mp4', - 'overwrites': 'True', - # "outtmpl": '/%(uploader)s_%(title)s.%(ext)s', - "outtmpl": 'outputs/video.mp4', - } + + if usebrowser: + ydl_opts = { + 'cookiesfrombrowser': (browser, None, None, None), + 'format': 'mp4', + 'overwrites': 'True', + "outtmpl": 'outputs/video.mp4', + } + else: + ydl_opts = { + 'format': 'mp4', + 'overwrites': 'True', + "outtmpl": 'outputs/video.mp4', + } + with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) diff --git a/setup.py b/setup.py index 316f4d3..1e43a4a 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ setup( install_requires=["nostr-sdk==0.36.0", "bech32==1.2.0", "pycryptodome==3.20.0", - "yt-dlp==2024.5.27", + "yt-dlp==2024.11.04", "python-dotenv==1.0.0", "emoji==2.12.1", "ffmpegio==0.9.1",