2024-08-25 08:55:24 -05:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import abc
|
|
|
|
import typing
|
2024-12-28 02:52:00 -06:00
|
|
|
import functools
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
from ..extractor.common import InfoExtractor
|
2024-12-30 04:41:09 -06:00
|
|
|
from ..utils import (
|
|
|
|
classproperty,
|
|
|
|
format_field,
|
|
|
|
get_exe_version,
|
|
|
|
variadic,
|
|
|
|
ExtractorError,
|
|
|
|
)
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
|
|
|
|
_JSI_HANDLERS: dict[str, type[JSI]] = {}
|
|
|
|
_JSI_PREFERENCES: set[JSIPreference] = set()
|
|
|
|
_ALL_FEATURES = {
|
|
|
|
'js',
|
|
|
|
'wasm',
|
2024-12-29 01:56:58 -06:00
|
|
|
'location',
|
2024-08-25 08:55:24 -05:00
|
|
|
'dom',
|
2024-12-29 19:27:00 -06:00
|
|
|
'cookies',
|
2024-08-25 08:55:24 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def get_jsi_keys(jsi_or_keys: typing.Iterable[str | type[JSI] | JSI]) -> list[str]:
|
|
|
|
return [jok if isinstance(jok, str) else jok.JSI_KEY for jok in jsi_or_keys]
|
|
|
|
|
|
|
|
|
|
|
|
def order_to_pref(jsi_order: typing.Iterable[str | type[JSI] | JSI], multiplier: int) -> JSIPreference:
|
|
|
|
jsi_order = reversed(get_jsi_keys(jsi_order))
|
|
|
|
pref_score = {jsi_cls: (i + 1) * multiplier for i, jsi_cls in enumerate(jsi_order)}
|
|
|
|
|
|
|
|
def _pref(jsi: JSI, *args):
|
|
|
|
return pref_score.get(jsi.JSI_KEY, 0)
|
|
|
|
return _pref
|
|
|
|
|
|
|
|
|
|
|
|
def join_jsi_name(jsi_list: typing.Iterable[str | type[JSI] | JSI], sep=', '):
|
|
|
|
return sep.join(get_jsi_keys(jok if isinstance(jok, str) else jok.JSI_NAME for jok in jsi_list))
|
|
|
|
|
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def require_features(param_features: dict[str, str | typing.Iterable[str]]):
|
|
|
|
assert all(_ALL_FEATURES.issuperset(variadic(kw_feature)) for kw_feature in param_features.values())
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def outer(func):
|
|
|
|
@functools.wraps(func)
|
|
|
|
def inner(self: JSInterp, *args, **kwargs):
|
|
|
|
for kw_name, kw_feature in param_features.items():
|
|
|
|
if kw_name in kwargs and not self._features.issuperset(variadic(kw_feature)):
|
|
|
|
raise ExtractorError(f'feature {kw_feature} is required for `{kw_name}` param but not declared')
|
|
|
|
return func(self, *args, **kwargs)
|
|
|
|
return inner
|
|
|
|
return outer
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
class JSInterp:
|
|
|
|
"""
|
|
|
|
Helper class to forward JS interp request to a concrete JSI that supports it.
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
@param dl_or_ie: `YoutubeDL` or `InfoExtractor` instance.
|
2024-08-25 08:55:24 -05:00
|
|
|
@param features: list of features that JSI must support.
|
2024-12-28 02:52:00 -06:00
|
|
|
@param only_include: limit JSI to choose from.
|
|
|
|
@param exclude: JSI to avoid using.
|
|
|
|
@param jsi_params: extra kwargs to pass to `JSI.__init__()` for each JSI, using jsi key as dict key.
|
2024-08-25 08:55:24 -05:00
|
|
|
@param preferred_order: list of JSI to use. First in list is tested first.
|
|
|
|
@param fallback_jsi: list of JSI that may fail and should act non-fatal and fallback to other JSI. Pass `"all"` to always fallback
|
2024-12-29 01:56:58 -06:00
|
|
|
@param timeout: timeout parameter for all chosen JSI
|
2024-12-29 19:34:05 -06:00
|
|
|
@param user_agent: override user-agent to use for supported JSI
|
2024-08-25 08:55:24 -05:00
|
|
|
"""
|
2024-12-29 01:56:58 -06:00
|
|
|
|
2024-08-25 08:55:24 -05:00
|
|
|
def __init__(
|
|
|
|
self,
|
2024-12-28 02:52:00 -06:00
|
|
|
dl_or_ie: YoutubeDL | InfoExtractor,
|
2024-08-25 08:55:24 -05:00
|
|
|
features: typing.Iterable[str] = [],
|
|
|
|
only_include: typing.Iterable[str | type[JSI]] = [],
|
|
|
|
exclude: typing.Iterable[str | type[JSI]] = [],
|
|
|
|
jsi_params: dict[str, dict] = {},
|
|
|
|
preferred_order: typing.Iterable[str | type[JSI]] = [],
|
|
|
|
fallback_jsi: typing.Iterable[str | type[JSI]] | typing.Literal['all'] = [],
|
2024-12-29 01:56:58 -06:00
|
|
|
timeout: float | int = 10,
|
2024-12-29 19:34:05 -06:00
|
|
|
user_agent: str | None = None,
|
2024-08-25 08:55:24 -05:00
|
|
|
):
|
2024-12-28 02:52:00 -06:00
|
|
|
self._downloader: YoutubeDL = dl_or_ie._downloader if isinstance(dl_or_ie, InfoExtractor) else dl_or_ie
|
|
|
|
self._features = set(features)
|
|
|
|
|
|
|
|
if unsupported_features := self._features - _ALL_FEATURES:
|
|
|
|
raise ExtractorError(f'Unsupported features: {unsupported_features}, allowed features: {_ALL_FEATURES}')
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
jsi_keys = [key for key in get_jsi_keys(only_include or _JSI_HANDLERS) if key not in get_jsi_keys(exclude)]
|
|
|
|
self.write_debug(f'Allowed JSI keys: {jsi_keys}')
|
2024-08-25 08:55:24 -05:00
|
|
|
handler_classes = [_JSI_HANDLERS[key] for key in jsi_keys
|
2024-12-30 16:54:45 -06:00
|
|
|
if _JSI_HANDLERS[key]._SUPPORTED_FEATURES.issuperset(self._features)]
|
2024-12-28 02:52:00 -06:00
|
|
|
self.write_debug(f'Selected JSI classes for given features: {get_jsi_keys(handler_classes)}, '
|
|
|
|
f'included: {get_jsi_keys(only_include) or "all"}, excluded: {get_jsi_keys(exclude)}')
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-30 04:52:47 -06:00
|
|
|
self._handler_dict = {
|
|
|
|
cls.JSI_KEY: cls(self._downloader, timeout=timeout, features=self._features, user_agent=user_agent,
|
|
|
|
**jsi_params.get(cls.JSI_KEY, {})) for cls in handler_classes}
|
2024-08-25 08:55:24 -05:00
|
|
|
self.preferences: set[JSIPreference] = {order_to_pref(preferred_order, 100)} | _JSI_PREFERENCES
|
|
|
|
self._fallback_jsi = get_jsi_keys(handler_classes) if fallback_jsi == 'all' else get_jsi_keys(fallback_jsi)
|
2024-12-28 02:52:00 -06:00
|
|
|
self._is_test = self._downloader.params.get('test', False)
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
def add_handler(self, handler: JSI):
|
|
|
|
"""Add a handler. If a handler of the same JSI_KEY exists, it will overwrite it"""
|
|
|
|
assert isinstance(handler, JSI), 'handler must be a JSI instance'
|
2024-12-30 16:54:45 -06:00
|
|
|
if not handler._SUPPORTED_FEATURES.issuperset(self._features):
|
2024-12-28 02:52:00 -06:00
|
|
|
raise ExtractorError(f'{handler.JSI_NAME} does not support all required features: {self._features}')
|
2024-08-25 08:55:24 -05:00
|
|
|
self._handler_dict[handler.JSI_KEY] = handler
|
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def write_debug(self, message, only_once=False):
|
|
|
|
return self._downloader.write_debug(f'[JSIDirector] {message}', only_once=only_once)
|
2024-08-25 08:55:24 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def report_warning(self, message, only_once=False):
|
|
|
|
return self._downloader.report_warning(f'[JSIDirector] {message}', only_once=only_once)
|
2024-09-19 18:24:50 -05:00
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def _get_handlers(self, method_name: str, *args, **kwargs) -> list[JSI]:
|
|
|
|
handlers = [h for h in self._handler_dict.values() if callable(getattr(h, method_name, None))]
|
|
|
|
self.write_debug(f'Choosing handlers for method `{method_name}`: {get_jsi_keys(handlers)}')
|
2024-08-25 08:55:24 -05:00
|
|
|
if not handlers:
|
2024-12-28 02:52:00 -06:00
|
|
|
raise ExtractorError(f'No JSI supports method `{method_name}`, '
|
|
|
|
f'included handlers: {get_jsi_keys(self._handler_dict.values())}')
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
preferences = {
|
2024-12-28 02:52:00 -06:00
|
|
|
handler.JSI_KEY: sum(pref_func(handler, method_name, args, kwargs) for pref_func in self.preferences)
|
2024-08-25 08:55:24 -05:00
|
|
|
for handler in handlers
|
|
|
|
}
|
2024-12-28 02:52:00 -06:00
|
|
|
self.write_debug('JSI preferences for `{}` request: {}'.format(
|
|
|
|
method_name, ', '.join(f'{key}={pref}' for key, pref in preferences.items())))
|
|
|
|
|
|
|
|
return sorted(handlers, key=lambda h: preferences[h.JSI_KEY], reverse=True)
|
|
|
|
|
|
|
|
def _dispatch_request(self, method_name: str, *args, **kwargs):
|
|
|
|
handlers = self._get_handlers(method_name, *args, **kwargs)
|
|
|
|
|
|
|
|
unavailable: list[JSI] = []
|
|
|
|
exceptions: list[tuple[JSI, Exception]] = []
|
|
|
|
test_results: list[tuple[JSI, typing.Any]] = []
|
|
|
|
|
|
|
|
for handler in handlers:
|
|
|
|
if not handler.is_available():
|
|
|
|
if self._is_test:
|
|
|
|
raise Exception(f'{handler.JSI_NAME} is not available for testing, '
|
|
|
|
f'add "{handler.JSI_KEY}" in `exclude` if it should not be used')
|
|
|
|
self.write_debug(f'{handler.JSI_NAME} is not available')
|
|
|
|
unavailable.append(handler)
|
|
|
|
continue
|
|
|
|
try:
|
|
|
|
self.write_debug(f'Dispatching `{method_name}` task to {handler.JSI_NAME}')
|
|
|
|
result = getattr(handler, method_name)(*args, **kwargs)
|
|
|
|
if self._is_test:
|
|
|
|
test_results.append((handler, result))
|
2024-09-19 18:24:50 -05:00
|
|
|
else:
|
2024-12-28 02:52:00 -06:00
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
|
|
if handler.JSI_KEY not in self._fallback_jsi:
|
|
|
|
raise
|
|
|
|
else:
|
|
|
|
exceptions.append((handler, e))
|
|
|
|
self.write_debug(f'{handler.JSI_NAME} encountered error, fallback to next handler: {e}')
|
|
|
|
|
|
|
|
if self._is_test and test_results:
|
|
|
|
ref_handler, ref_result = test_results[0]
|
|
|
|
for handler, result in test_results[1:]:
|
|
|
|
if result != ref_result:
|
|
|
|
self.report_warning(
|
|
|
|
f'Different JSI results produced from {ref_handler.JSI_NAME} and {handler.JSI_NAME}')
|
|
|
|
return ref_result
|
|
|
|
|
|
|
|
if not exceptions:
|
|
|
|
msg = f'No available JSI installed, please install one of: {join_jsi_name(unavailable)}'
|
|
|
|
else:
|
|
|
|
msg = f'Failed to perform {method_name}, total {len(exceptions)} errors'
|
|
|
|
if unavailable:
|
|
|
|
msg = f'{msg}. You can try installing one of unavailable JSI: {join_jsi_name(unavailable)}'
|
|
|
|
raise ExtractorError(msg)
|
|
|
|
|
2024-12-29 19:27:00 -06:00
|
|
|
@require_features({'location': 'location', 'html': 'dom', 'cookiejar': 'cookies'})
|
2024-12-29 01:56:58 -06:00
|
|
|
def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
|
2024-12-28 02:52:00 -06:00
|
|
|
"""
|
|
|
|
Execute JS code and return stdout from console.log
|
2024-12-29 01:56:58 -06:00
|
|
|
|
|
|
|
@param {str} jscode: JS code to execute
|
|
|
|
@param video_id: video id
|
|
|
|
@param note: note
|
2024-12-29 19:27:00 -06:00
|
|
|
@param {str} location: url to configure window.location, requires `location` feature
|
2024-12-29 01:56:58 -06:00
|
|
|
@param {str} html: html to load as document, requires `dom` feature
|
2024-12-29 19:27:00 -06:00
|
|
|
@param {YoutubeDLCookieJar} cookiejar: cookiejar to set cookies, requires url and `cookies` feature
|
2024-12-28 02:52:00 -06:00
|
|
|
"""
|
2024-12-29 01:56:58 -06:00
|
|
|
return self._dispatch_request('execute', jscode, video_id, **kwargs)
|
2024-12-28 02:52:00 -06:00
|
|
|
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
class JSI(abc.ABC):
|
2024-12-30 16:54:45 -06:00
|
|
|
_SUPPORTED_FEATURES: set[str] = set()
|
2024-08-25 08:55:24 -05:00
|
|
|
_BASE_PREFERENCE: int = 0
|
|
|
|
|
2024-12-30 04:52:47 -06:00
|
|
|
def __init__(self, downloader: YoutubeDL, timeout: float | int, features: set[str], user_agent=None):
|
2024-12-30 16:54:45 -06:00
|
|
|
if not self._SUPPORTED_FEATURES.issuperset(features):
|
2024-12-30 16:09:47 -06:00
|
|
|
raise ExtractorError(f'{self.JSI_NAME} does not support all required features: {features}')
|
2024-08-25 08:55:24 -05:00
|
|
|
self._downloader = downloader
|
2024-12-29 01:56:58 -06:00
|
|
|
self.timeout = timeout
|
2024-12-30 04:52:47 -06:00
|
|
|
self.features = features
|
2024-12-29 19:34:05 -06:00
|
|
|
self.user_agent: str = user_agent or self._downloader.params['http_headers']['User-Agent']
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
def is_available(self) -> bool:
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2024-12-29 01:56:58 -06:00
|
|
|
def write_debug(self, message, *args, **kwargs):
|
|
|
|
self._downloader.write_debug(f'[{self.JSI_KEY}] {message}', *args, **kwargs)
|
2024-12-28 02:52:00 -06:00
|
|
|
|
2024-12-29 01:56:58 -06:00
|
|
|
def report_warning(self, message, *args, **kwargs):
|
|
|
|
self._downloader.report_warning(f'[{self.JSI_KEY}] {message}', *args, **kwargs)
|
|
|
|
|
|
|
|
def to_screen(self, msg, *args, **kwargs):
|
|
|
|
self._downloader.to_screen(f'[{self.JSI_KEY}] {msg}', *args, **kwargs)
|
|
|
|
|
|
|
|
def report_note(self, video_id, note):
|
|
|
|
self.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
|
2024-12-28 02:52:00 -06:00
|
|
|
|
2024-08-25 08:55:24 -05:00
|
|
|
@classproperty
|
|
|
|
def JSI_NAME(cls) -> str:
|
|
|
|
return cls.__name__[:-3]
|
|
|
|
|
|
|
|
@classproperty
|
|
|
|
def JSI_KEY(cls) -> str:
|
|
|
|
assert cls.__name__.endswith('JSI'), 'JSI class names must end with "JSI"'
|
|
|
|
return cls.__name__[:-3]
|
|
|
|
|
|
|
|
|
2024-12-30 04:41:09 -06:00
|
|
|
class ExternalJSI(JSI, abc.ABC):
|
|
|
|
_EXE_NAME: str
|
|
|
|
|
|
|
|
@classproperty(cache=True)
|
|
|
|
def exe_version(cls):
|
|
|
|
return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)')
|
|
|
|
|
|
|
|
@classproperty
|
|
|
|
def exe(cls):
|
|
|
|
return cls._EXE_NAME if cls.exe_version else None
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def is_available(cls):
|
|
|
|
return bool(cls.exe)
|
|
|
|
|
|
|
|
|
2024-12-28 02:52:00 -06:00
|
|
|
def register_jsi(jsi_cls: JsiClass) -> JsiClass:
|
2024-08-25 08:55:24 -05:00
|
|
|
"""Register a JS interpreter class"""
|
2024-12-28 02:52:00 -06:00
|
|
|
assert issubclass(jsi_cls, JSI), f'{jsi_cls} must be a subclass of JSI'
|
|
|
|
assert jsi_cls.JSI_KEY not in _JSI_HANDLERS, f'JSI {jsi_cls.JSI_KEY} already registered'
|
2024-12-30 16:54:45 -06:00
|
|
|
assert jsi_cls._SUPPORTED_FEATURES.issubset(_ALL_FEATURES), f'{jsi_cls._SUPPORTED_FEATURES - _ALL_FEATURES} not declared in `_All_FEATURES`'
|
2024-12-28 02:52:00 -06:00
|
|
|
_JSI_HANDLERS[jsi_cls.JSI_KEY] = jsi_cls
|
|
|
|
return jsi_cls
|
2024-08-25 08:55:24 -05:00
|
|
|
|
|
|
|
|
|
|
|
def register_jsi_preference(*handlers: type[JSI]):
|
|
|
|
assert all(issubclass(handler, JSI) for handler in handlers), f'{handlers} must all be a subclass of JSI'
|
|
|
|
|
|
|
|
def outer(pref_func: JSIPreference) -> JSIPreference:
|
|
|
|
def inner(handler: JSI, *args):
|
|
|
|
if not handlers or isinstance(handler, handlers):
|
|
|
|
return pref_func(handler, *args)
|
|
|
|
return 0
|
|
|
|
_JSI_PREFERENCES.add(inner)
|
|
|
|
return inner
|
|
|
|
return outer
|
|
|
|
|
|
|
|
|
|
|
|
@register_jsi_preference()
|
|
|
|
def _base_preference(handler: JSI, *args):
|
|
|
|
return getattr(handler, '_BASE_PREFERENCE', 0)
|
|
|
|
|
|
|
|
|
|
|
|
if typing.TYPE_CHECKING:
|
|
|
|
from ..YoutubeDL import YoutubeDL
|
2024-12-28 02:56:12 -06:00
|
|
|
JsiClass = typing.TypeVar('JsiClass', bound=type[JSI])
|
2024-12-28 02:52:00 -06:00
|
|
|
|
|
|
|
class JSIPreference(typing.Protocol):
|
|
|
|
def __call__(self, handler: JSI, method_name: str, *args, **kwargs) -> int:
|
|
|
|
...
|