mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Move pings to FileDownloader
This commit is contained in:
parent
424818a3dc
commit
1015d0ab03
3 changed files with 87 additions and 29 deletions
|
@ -35,6 +35,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .websocket import WebSocketFragmentFD
|
from .websocket import WebSocketFragmentFD
|
||||||
from .youtube_live_chat import YoutubeLiveChatFD
|
from .youtube_live_chat import YoutubeLiveChatFD
|
||||||
|
from .bunnycdn import BunnyCdnFD
|
||||||
|
|
||||||
PROTOCOL_MAP = {
|
PROTOCOL_MAP = {
|
||||||
'rtmp': RtmpFD,
|
'rtmp': RtmpFD,
|
||||||
|
@ -55,6 +56,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||||
'websocket_frag': WebSocketFragmentFD,
|
'websocket_frag': WebSocketFragmentFD,
|
||||||
'youtube_live_chat': YoutubeLiveChatFD,
|
'youtube_live_chat': YoutubeLiveChatFD,
|
||||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||||
|
'bunnycdn': BunnyCdnFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
64
yt_dlp/downloader/bunnycdn.py
Normal file
64
yt_dlp/downloader/bunnycdn.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
import hashlib
|
||||||
|
import random
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from . import HlsFD
|
||||||
|
from ..networking import Request
|
||||||
|
|
||||||
|
|
||||||
|
class BunnyCdnFD(FileDownloader):
|
||||||
|
"""Downloads from BunnyCDN with required pings"""
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
|
||||||
|
|
||||||
|
fd = HlsFD(self.ydl, self.params)
|
||||||
|
|
||||||
|
success = download_complete = False
|
||||||
|
timer = [None]
|
||||||
|
ping_lock = threading.Lock()
|
||||||
|
current_time = [0]
|
||||||
|
|
||||||
|
ping_url = info_dict['_bunnycdn_ping_data']['url']
|
||||||
|
headers = info_dict['_bunnycdn_ping_data']['headers']
|
||||||
|
secret = info_dict['_bunnycdn_ping_data']['secret']
|
||||||
|
context_id = info_dict['_bunnycdn_ping_data']['context_id']
|
||||||
|
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
|
||||||
|
ping_interval = 2
|
||||||
|
|
||||||
|
def send_ping():
|
||||||
|
time = current_time[0] + round(random.random(), 6)
|
||||||
|
# Hard coded resolution as it doesn't seem to matter
|
||||||
|
res = 1080
|
||||||
|
paused = 'false'
|
||||||
|
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
||||||
|
|
||||||
|
request = Request(
|
||||||
|
f'{ping_url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}',
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.ydl.urlopen(request).read()
|
||||||
|
except Exception:
|
||||||
|
self.to_screen(f'[{self.FD_NAME}] Ping failed')
|
||||||
|
|
||||||
|
with ping_lock:
|
||||||
|
if not download_complete:
|
||||||
|
current_time[0] += ping_interval
|
||||||
|
timer[0] = threading.Timer(ping_interval, send_ping)
|
||||||
|
timer[0].start()
|
||||||
|
|
||||||
|
# Start ping loop
|
||||||
|
self.to_screen(f'[{self.FD_NAME}] Starting pings with {ping_interval} second interval...')
|
||||||
|
try:
|
||||||
|
send_ping()
|
||||||
|
success = fd.real_download(filename, info_dict)
|
||||||
|
finally:
|
||||||
|
with ping_lock:
|
||||||
|
if timer[0]:
|
||||||
|
timer[0].cancel()
|
||||||
|
download_complete = True
|
||||||
|
|
||||||
|
return success
|
|
@ -1,6 +1,4 @@
|
||||||
import hashlib
|
|
||||||
import json
|
import json
|
||||||
import random
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
@ -32,6 +30,7 @@ class BunnyCdnIE(InfoExtractor):
|
||||||
'duration': 7.0,
|
'duration': 7.0,
|
||||||
'upload_date': '20230828',
|
'upload_date': '20230828',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
|
'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -44,6 +43,7 @@ class BunnyCdnIE(InfoExtractor):
|
||||||
'upload_date': '20230804',
|
'upload_date': '20230804',
|
||||||
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
# Stream requires activation and pings
|
# Stream requires activation and pings
|
||||||
'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
|
'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||||
|
@ -57,20 +57,9 @@ class BunnyCdnIE(InfoExtractor):
|
||||||
'upload_date': '20240221',
|
'upload_date': '20240221',
|
||||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}]
|
}]
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
'url': 'https://www.queisser.de/unternehmen/neue-firmenzentrale',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'd3e06f96-9972-45a0-a261-1e565bf72778',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': '',
|
|
||||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/d3e06f96-9972-45a0-a261-1e565bf72778/thumbnail_512bb53f\.jpg',
|
|
||||||
'upload_date': '20221214',
|
|
||||||
'duration': 134.0,
|
|
||||||
'timestamp': 1671016982,
|
|
||||||
'title': 'Zeitraffer Abriss 1080p',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# Stream requires Referer
|
# Stream requires Referer
|
||||||
'url': 'https://conword.io/',
|
'url': 'https://conword.io/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -83,30 +72,23 @@ class BunnyCdnIE(InfoExtractor):
|
||||||
'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
|
'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
|
||||||
'timestamp': 1698783879,
|
'timestamp': 1698783879,
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
# URL requires token and expires
|
# URL requires token and expires
|
||||||
'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
|
'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
|
'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': r're:^https?://.*\.b-cdn\.net//0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
|
||||||
'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
|
'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
|
||||||
'upload_date': '20240531',
|
'upload_date': '20240531',
|
||||||
'duration': 18.0,
|
'duration': 18.0,
|
||||||
'timestamp': 1717152269,
|
'timestamp': 1717152269,
|
||||||
'description': '',
|
'description': '',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _send_ping(self, ping_url, video_id, headers, secret, context_id, time, paused='false'):
|
|
||||||
# Hard coded, since it doesn't seem to matter
|
|
||||||
res = 1080
|
|
||||||
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
|
||||||
self._download_webpage(
|
|
||||||
ping_url, video_id, note=f'Sending ping at {time}',
|
|
||||||
query={'hash': md5_hash, 'time': time, 'paused': paused, 'resolution': res},
|
|
||||||
headers=headers)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_embed_urls(cls, url, webpage):
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||||
|
@ -157,21 +139,30 @@ def _real_extract(self, url):
|
||||||
r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
|
r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
|
||||||
secret = traverse_obj(parse_qs(src_url), ('secret', 0))
|
secret = traverse_obj(parse_qs(src_url), ('secret', 0))
|
||||||
context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
|
context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
|
||||||
|
ping_data = {}
|
||||||
if src_url and activation_url and ping_url and secret and context_id:
|
if src_url and activation_url and ping_url and secret and context_id:
|
||||||
self._send_ping(ping_url, video_id, headers, secret, context_id, 0, 'true')
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
activation_url, video_id, headers=headers, note='Downloading activation data')
|
activation_url, video_id, headers=headers, note='Downloading activation data')
|
||||||
# Sending first couple pings ahead of time seems to be enough
|
|
||||||
for i in range(0, 30, 4):
|
|
||||||
self._send_ping(ping_url, video_id, headers, secret, context_id, i + round(random.random(), 6))
|
|
||||||
|
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
|
src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
|
||||||
for fmt in fmts:
|
for fmt in fmts:
|
||||||
fmt['http_headers'] = headers
|
fmt.update({
|
||||||
|
'protocol': 'bunnycdn',
|
||||||
|
'http_headers': headers,
|
||||||
|
})
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
ping_data = {
|
||||||
|
'_bunnycdn_ping_data': {
|
||||||
|
'url': ping_url,
|
||||||
|
'headers': headers,
|
||||||
|
'secret': secret,
|
||||||
|
'context_id': context_id,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -180,5 +171,6 @@ def _real_extract(self, url):
|
||||||
'title': ('data-plyr-config', {json.loads}, 'title', {str}),
|
'title': ('data-plyr-config', {json.loads}, 'title', {str}),
|
||||||
'thumbnail': ('data-poster', {url_or_none}),
|
'thumbnail': ('data-poster', {url_or_none}),
|
||||||
})),
|
})),
|
||||||
|
**ping_data,
|
||||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue