1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

[fd/hls] Support --write-pages for m3u8 media playlists

Authored by: bashonly
This commit is contained in:
bashonly 2025-02-09 03:02:21 -06:00
parent 2081634474
commit 25d2cc59c3
No known key found for this signature in database
GPG key ID: 783F096F253D15B0
3 changed files with 28 additions and 22 deletions

View file

@ -5,6 +5,7 @@
import errno
import fileinput
import functools
import hashlib
import http.cookiejar
import io
import itertools
@ -4425,3 +4426,20 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
if ret and not write_all:
break
return ret
def _request_dump_filename(self, url, video_id, data=None):
if data is not None:
data = hashlib.md5(data).hexdigest()
basen = join_nonempty(video_id, data, url, delim='_')
trim_length = self.params.get('trim_file_name') or 240
if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode()).hexdigest()
basen = basen[:trim_length - len(h)] + h
filename = sanitize_filename(f'{basen}.dump', restricted=True)
# Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if os.name == 'nt':
absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259:
filename = fR'\\?\{absfilepath}'
return filename

View file

@ -80,7 +80,13 @@ def real_download(self, filename, info_dict):
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
s = urlh.read().decode('utf-8', 'ignore')
s_bytes = urlh.read()
if self.params.get('write_pages'):
dump_filename = self.ydl._request_dump_filename(man_url, info_dict['id'], None)
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
with open(dump_filename, 'wb') as outf:
outf.write(s_bytes)
s = s_bytes.decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download:

View file

@ -2,7 +2,6 @@
import collections
import functools
import getpass
import hashlib
import http.client
import http.cookiejar
import http.cookies
@ -78,7 +77,6 @@
parse_iso8601,
parse_m3u8_attributes,
parse_resolution,
sanitize_filename,
sanitize_url,
smuggle_url,
str_or_none,
@ -1022,23 +1020,6 @@ def __check_blocked(self, content):
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
def _request_dump_filename(self, url, video_id, data=None):
if data is not None:
data = hashlib.md5(data).hexdigest()
basen = join_nonempty(video_id, data, url, delim='_')
trim_length = self.get_param('trim_file_name') or 240
if len(basen) > trim_length:
h = '___' + hashlib.md5(basen.encode()).hexdigest()
basen = basen[:trim_length - len(h)] + h
filename = sanitize_filename(f'{basen}.dump', restricted=True)
# Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if os.name == 'nt':
absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259:
filename = fR'\\?\{absfilepath}'
return filename
def __decode_webpage(self, webpage_bytes, encoding, headers):
if not encoding:
encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
@ -1067,7 +1048,7 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
if self.get_param('write_pages'):
if isinstance(url_or_request, Request):
data = self._create_request(url_or_request, data).data
filename = self._request_dump_filename(urlh.url, video_id, data)
filename = self._downloader._request_dump_filename(urlh.url, video_id, data)
self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
@ -1128,7 +1109,8 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
impersonate=None, require_impersonation=False):
if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
filename = self._downloader._request_dump_filename(
url_or_request.url, video_id, url_or_request.data)
self.to_screen(f'Loading request from {filename}')
try:
with open(filename, 'rb') as dumpf: