diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3293a9076..d589db562 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -5,6 +5,7 @@ import errno import fileinput import functools +import hashlib import http.cookiejar import io import itertools @@ -4425,3 +4426,20 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None if ret and not write_all: break return ret + + def _request_dump_filename(self, url, video_id, data=None): + if data is not None: + data = hashlib.md5(data).hexdigest() + basen = join_nonempty(video_id, data, url, delim='_') + trim_length = self.params.get('trim_file_name') or 240 + if len(basen) > trim_length: + h = '___' + hashlib.md5(basen.encode()).hexdigest() + basen = basen[:trim_length - len(h)] + h + filename = sanitize_filename(f'{basen}.dump', restricted=True) + # Working around MAX_PATH limitation on Windows (see + # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) + if os.name == 'nt': + absfilepath = os.path.abspath(filename) + if len(absfilepath) > 259: + filename = fR'\\?\{absfilepath}' + return filename diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 7a47f8f83..1e9ab911f 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -80,7 +80,13 @@ def real_download(self, filename, info_dict): self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url - s = urlh.read().decode('utf-8', 'ignore') + s_bytes = urlh.read() + if self.params.get('write_pages'): + dump_filename = self.ydl._request_dump_filename(man_url, info_dict['id'], None) + self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}') + with open(dump_filename, 'wb') as outf: + outf.write(s_bytes) + s = s_bytes.decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3e7734ce1..a809537c4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2,7 +2,6 @@ import collections import functools import getpass -import hashlib import http.client import http.cookiejar import http.cookies @@ -78,7 +77,6 @@ parse_iso8601, parse_m3u8_attributes, parse_resolution, - sanitize_filename, sanitize_url, smuggle_url, str_or_none, @@ -1022,23 +1020,6 @@ def __check_blocked(self, content): 'Visit http://blocklist.rkn.gov.ru/ for a block reason.', expected=True) - def _request_dump_filename(self, url, video_id, data=None): - if data is not None: - data = hashlib.md5(data).hexdigest() - basen = join_nonempty(video_id, data, url, delim='_') - trim_length = self.get_param('trim_file_name') or 240 - if len(basen) > trim_length: - h = '___' + hashlib.md5(basen.encode()).hexdigest() - basen = basen[:trim_length - len(h)] + h - filename = sanitize_filename(f'{basen}.dump', restricted=True) - # Working around MAX_PATH limitation on Windows (see - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) - if os.name == 'nt': - absfilepath = os.path.abspath(filename) - if len(absfilepath) > 259: - filename = fR'\\?\{absfilepath}' - return filename - def __decode_webpage(self, webpage_bytes, encoding, headers): if not encoding: encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes) @@ -1067,7 +1048,7 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno if self.get_param('write_pages'): if isinstance(url_or_request, Request): data = self._create_request(url_or_request, data).data - filename = self._request_dump_filename(urlh.url, video_id, data) + filename = self._downloader._request_dump_filename(urlh.url, video_id, data) self.to_screen(f'Saving request to {filename}') with open(filename, 'wb') as outf: outf.write(webpage_bytes) @@ -1128,7 +1109,8 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, impersonate=None, require_impersonation=False): if self.get_param('load_pages'): url_or_request = self._create_request(url_or_request, data, headers, query) - filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data) + filename = self._downloader._request_dump_filename( + url_or_request.url, video_id, url_or_request.data) self.to_screen(f'Loading request from {filename}') try: with open(filename, 'rb') as dumpf: