diff --git a/test/test_download.py b/test/test_download.py index 5c6d4f99d..773172663 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -25,13 +25,14 @@ import yt_dlp.YoutubeDL # isort: split from yt_dlp.extractor import get_info_extractor -from yt_dlp.jsinterp.common import filter_jsi_feature, filter_jsi_include +from yt_dlp.jsinterp.common import filter_jsi_keys from yt_dlp.networking.exceptions import HTTPError, TransportError from yt_dlp.utils import ( DownloadError, ExtractorError, UnavailableVideoError, YoutubeDLError, + filter_dict, format_bytes, join_nonempty, ) @@ -83,24 +84,26 @@ def __str__(self): # Dynamically generate tests def generator(test_case, tname): - def generate_sub_case(jsi_key): - sub_case = {k: v for k, v in test_case.items() if not k.startswith('jsi_matrix')} - sub_case['params'] = {**test_case.get('params', {}), 'jsi_preference': [jsi_key]} - return generator(sub_case, f'{tname}_{jsi_key}') # setting `jsi_matrix` to True, `jsi_matrix_features` to list, or - # setting `jsi_matrix_only_include` or `jsi_matrix_exclude` to non-empty - # to trigger matrix behavior + # setting `jsi_matrix_only_include` or `jsi_matrix_exclude` to non-empty list + # to trigger matrix behavior for JSI if isinstance(test_case.get('jsi_matrix_features'), list) or any(test_case.get(key) for key in [ 'jsi_matrix', 'jsi_matrix_only_include', 'jsi_matrix_exclude', ]): - jsi_keys = filter_jsi_feature(test_case.get('jsi_matrix_features', []), filter_jsi_include( - test_case.get('jsi_matrix_only_include', None), test_case.get('jsi_matrix_exclude', None))) + jsi_keys = filter_jsi_keys( + test_case.get('jsi_matrix_features'), test_case.get('jsi_matrix_only_include'), + test_case.get('jsi_matrix_exclude')) + + def generate_jsi_sub_case(jsi_key): + sub_case = filter_dict(test_case, lambda k, _: not k.startswith('jsi_matrix')) + sub_case['params'] = {**test_case.get('params', {}), 'jsi_preference': [jsi_key]} + return generator(sub_case, f'{tname}_{jsi_key}') def run_sub_cases(self): for i, jsi_key in enumerate(jsi_keys): print(f'Running case {tname} using JSI: {jsi_key} ({i + 1}/{len(jsi_keys)})') - generate_sub_case(jsi_key)(self) + generate_jsi_sub_case(jsi_key)(self) return run_sub_cases def test_template(self): diff --git a/test/test_jsi_external.py b/test/test_jsi_external.py index e4b8f9c17..02098a6a3 100644 --- a/test/test_jsi_external.py +++ b/test/test_jsi_external.py @@ -20,6 +20,8 @@ variadic, ) from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.jsinterp import _JSI_HANDLERS +assert set(_JSI_HANDLERS) == {'Deno', 'DenoJSDom', 'PhantomJS'} from yt_dlp.jsinterp.common import ExternalJSI, _ALL_FEATURES from yt_dlp.jsinterp._deno import DenoJSI, DenoJSDomJSI from yt_dlp.jsinterp._phantomjs import PhantomJSJSI diff --git a/yt_dlp/jsinterp/__init__.py b/yt_dlp/jsinterp/__init__.py index fd90b92ad..8133cfeef 100644 --- a/yt_dlp/jsinterp/__init__.py +++ b/yt_dlp/jsinterp/__init__.py @@ -2,6 +2,7 @@ from .native import JSInterpreter from .common import _JSI_PREFERENCES, _JSI_HANDLERS, JSIWrapper from ._phantomjs import PhantomJSwrapper +from . import _deno # ensure jsi registration __all__ = [ diff --git a/yt_dlp/jsinterp/_helper.py b/yt_dlp/jsinterp/_helper.py index 389204f9e..ff2e1b87e 100644 --- a/yt_dlp/jsinterp/_helper.py +++ b/yt_dlp/jsinterp/_helper.py @@ -74,6 +74,7 @@ def random_string(length: int = 10) -> str: def override_navigator_js(user_agent: str) -> str: + """Generate js snippet to override navigator properties based on user_agent string""" return '\n'.join([ 'Object.defineProperty(navigator, "%s", { value: %s, configurable: true });' % (k, json.dumps(v)) for k, v in { @@ -113,7 +114,7 @@ def extract_script_tags(html: str) -> tuple[str, list[str]]: def prepare_wasm_jsmodule(js_mod: str, wasm: bytes) -> str: """ - Prepare wasm init for js wrapper module generated by rust wasm-pack + Sanitize js wrapper module generated by rust wasm-pack for wasm init removes export and import.meta and inlines wasm binary as Uint8Array See test/test_data/jsi_external/hello_wasm.js for example diff --git a/yt_dlp/jsinterp/common.py b/yt_dlp/jsinterp/common.py index f95b9ab63..248fbe556 100644 --- a/yt_dlp/jsinterp/common.py +++ b/yt_dlp/jsinterp/common.py @@ -31,6 +31,18 @@ def get_jsi_keys(jsi_or_keys: typing.Iterable[str | type[JSI] | JSI]) -> list[st return [jok if isinstance(jok, str) else jok.JSI_KEY for jok in jsi_or_keys] +def filter_jsi_keys(features=None, only_include=None, exclude=None): + keys = list(_JSI_HANDLERS) + if features: + keys = [key for key in keys if key in _JSI_HANDLERS + and _JSI_HANDLERS[key]._SUPPORTED_FEATURES.issuperset(features)] + if only_include: + keys = [key for key in keys if key in get_jsi_keys(only_include)] + if exclude: + keys = [key for key in keys if key not in get_jsi_keys(exclude)] + return keys + + def filter_jsi_include(only_include: typing.Iterable[str] | None, exclude: typing.Iterable[str] | None): keys = get_jsi_keys(only_include) if only_include else _JSI_HANDLERS.keys() return [key for key in keys if key not in (exclude or [])] @@ -123,9 +135,7 @@ def __init__( self.report_warning(f'`{invalid_key}` is not a valid JSI, ignoring preference setting') user_prefs.remove(invalid_key) - jsi_keys = filter_jsi_include(only_include, exclude) - self.write_debug(f'Allowed JSI keys: {jsi_keys}') - handler_classes = [_JSI_HANDLERS[key] for key in filter_jsi_feature(self._features, jsi_keys)] + handler_classes = [_JSI_HANDLERS[key] for key in filter_jsi_keys(self._features, only_include, exclude)] self.write_debug(f'Select JSI for features={self._features}: {get_jsi_keys(handler_classes)}, ' f'included: {get_jsi_keys(only_include) or "all"}, excluded: {get_jsi_keys(exclude)}') if not handler_classes: @@ -208,12 +218,8 @@ def execute(self, jscode: str, video_id: str | None, note: str | None = None, @param html: html to load as document, requires `dom` feature @param cookiejar: cookiejar to read and set cookies, requires `cookies` feature, pass `InfoExtractor.cookiejar` if you want to read and write cookies """ - kwargs = filter_dict({ - 'note': note, - 'html': html, - 'cookiejar': cookiejar, - }) - return self._dispatch_request('execute', jscode, video_id, **kwargs) + return self._dispatch_request('execute', jscode, video_id, **filter_dict({ + 'note': note, 'html': html, 'cookiejar': cookiejar})) class JSI(abc.ABC):