From 6cd95babef4a150a8c07db5fab572029d18ff5c3 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 8 Jan 2025 16:32:40 -0800 Subject: [PATCH 1/4] feat: first pass at handling redirects in process_ie_result() --- yt_dlp/YoutubeDL.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 764baf3a0..510f0cbae 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1822,6 +1822,18 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): + if 'redirect_count' in extra_info: + extra_info['redirect_count'] = 1 + extra_info.get('redirect_count', 0) + else: + extra_info['redirect_count'] = 0 + + # TODO: make command line arg with large or infinite default + if extra_info['redirect_count'] >= 5: + raise ExtractorError( + f"Too many redirects for URL: {ie_result['url']}", + expected=True, + ) + ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') if ie_result.get('original_url') and not extra_info.get('original_url'): From 050b6c1f29c874e05f485f1af7d18158c4baae79 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 8 Jan 2025 16:44:22 -0800 Subject: [PATCH 2/4] chore: refactor attribute naming. This is currently representing something closer to depth of extraction, and not redirects. --- yt_dlp/YoutubeDL.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 510f0cbae..79f8b2a10 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1822,15 +1822,15 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): - if 'redirect_count' in extra_info: - extra_info['redirect_count'] = 1 + extra_info.get('redirect_count', 0) + if 'extraction_depth' in extra_info: + extra_info['extraction_depth'] = 1 + extra_info.get('extraction_depth', 0) else: - extra_info['redirect_count'] = 0 + extra_info['extraction_depth'] = 0 # TODO: make command line arg with large or infinite default - if extra_info['redirect_count'] >= 5: + if extra_info['extraction_depth'] >= 20: raise ExtractorError( - f"Too many redirects for URL: {ie_result['url']}", + f"Too many hops for URL: {ie_result['url']}", expected=True, ) From 148b36a0394a7bfe519a0b7e9ec67d0362477c26 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 22 Jan 2025 16:17:40 -0800 Subject: [PATCH 3/4] chore: replace hard-coded limit with command line option --- yt_dlp/YoutubeDL.py | 20 ++++++++++---------- yt_dlp/__init__.py | 2 ++ yt_dlp/options.py | 5 +++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 79f8b2a10..0844548d4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1822,17 +1822,17 @@ def process_ie_result(self, ie_result, download=True, extra_info=None): result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): - if 'extraction_depth' in extra_info: - extra_info['extraction_depth'] = 1 + extra_info.get('extraction_depth', 0) - else: - extra_info['extraction_depth'] = 0 + if self.params.get('max_extraction_depth', -1) > 0: + if 'extraction_depth' in extra_info: + extra_info['extraction_depth'] = 1 + extra_info.get('extraction_depth', 0) + else: + extra_info['extraction_depth'] = 0 - # TODO: make command line arg with large or infinite default - if extra_info['extraction_depth'] >= 20: - raise ExtractorError( - f"Too many hops for URL: {ie_result['url']}", - expected=True, - ) + if extra_info['extraction_depth'] >= self.params.get('max_extraction_depth'): + raise ExtractorError( + f"Reached maximum extraction depth for URL: {ie_result['url']}", + expected=True, + ) ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..0dc9f19a0 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -268,6 +268,7 @@ def parse_retries(name, value): opts.retries = parse_retries('download', opts.retries) opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) + opts.max_extraction_depth = parse_retries('extractor', opts.max_extraction_depth) opts.file_access_retries = parse_retries('file access', opts.file_access_retries) # Retry sleep function @@ -841,6 +842,7 @@ def parse_options(argv=None): 'file_access_retries': opts.file_access_retries, 'fragment_retries': opts.fragment_retries, 'extractor_retries': opts.extractor_retries, + 'max_extraction_depth': opts.max_extraction_depth, 'retry_sleep_functions': opts.retry_sleep, 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'keep_fragments': opts.keep_fragments, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 06b65e0ea..4bd703e48 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1883,6 +1883,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--extractor-retries', dest='extractor_retries', metavar='RETRIES', default=3, help='Number of retries for known extractor errors (default is %default), or "infinite"') + extractor.add_option( + '--max-extraction-depth', + dest='max_extraction_depth', default=-1, + help='Maximum depth when recursing into non-video url chains (default is unlimited)', + ) extractor.add_option( '--allow-dynamic-mpd', '--no-ignore-dynamic-mpd', action='store_true', dest='dynamic_mpd', default=True, From 440b4466ff64e6589596e35dea7c076d76698dde Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Wed, 22 Jan 2025 17:29:55 -0800 Subject: [PATCH 4/4] fix: adjust max_extraction_depth default value --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 4bd703e48..106df3065 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1885,7 +1885,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): help='Number of retries for known extractor errors (default is %default), or "infinite"') extractor.add_option( '--max-extraction-depth', - dest='max_extraction_depth', default=-1, + dest='max_extraction_depth', default='inf', help='Maximum depth when recursing into non-video url chains (default is unlimited)', ) extractor.add_option(