mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
test script tag
This commit is contained in:
parent
fb474064ee
commit
b87a0582c5
4 changed files with 64 additions and 13 deletions
|
@ -32,6 +32,48 @@ def jsi_available(self):
|
||||||
def test_execute(self):
|
def test_execute(self):
|
||||||
self.assertEqual(self.jsi.execute('console.log("Hello, world!");'), 'Hello, world!')
|
self.assertEqual(self.jsi.execute('console.log("Hello, world!");'), 'Hello, world!')
|
||||||
|
|
||||||
|
def test_execute_dom_parse(self):
|
||||||
|
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
|
||||||
|
self.skipTest('DOM not supported')
|
||||||
|
self.assertEqual(self.jsi.execute(
|
||||||
|
'console.log(document.getElementById("test-div").innerHTML);',
|
||||||
|
location='https://example.com',
|
||||||
|
html='<html><body><div id="test-div">Hello, world!</div></body></html>'),
|
||||||
|
'Hello, world!')
|
||||||
|
|
||||||
|
def test_execute_dom_script(self):
|
||||||
|
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
|
||||||
|
self.skipTest('DOM not supported')
|
||||||
|
self.assertEqual(self.jsi.execute(
|
||||||
|
'console.log(document.getElementById("test-div").innerHTML);',
|
||||||
|
location='https://example.com',
|
||||||
|
html='''<html><body>
|
||||||
|
<div id="test-div"></div>
|
||||||
|
<script src="https://example.com/script.js"></script>
|
||||||
|
<script type="text/javascript">
|
||||||
|
document.getElementById("test-div").innerHTML = "Hello, world!"
|
||||||
|
</script>
|
||||||
|
</body></html>'''),
|
||||||
|
'Hello, world!')
|
||||||
|
|
||||||
|
def test_execute_dom_script_with_error(self):
|
||||||
|
if 'dom' not in self.jsi._SUPPORTED_FEATURES:
|
||||||
|
self.skipTest('DOM not supported')
|
||||||
|
if self.jsi.JSI_KEY == 'PhantomJS':
|
||||||
|
self.skipTest('PhantomJS does not catch errors')
|
||||||
|
self.assertEqual(self.jsi.execute(
|
||||||
|
'console.log(document.getElementById("test-div").innerHTML);',
|
||||||
|
location='https://example.com',
|
||||||
|
html='''<html><body>
|
||||||
|
<div id="test-div"></div>
|
||||||
|
<script src="https://example.com/script.js"></script>
|
||||||
|
<script type="text/javascript">
|
||||||
|
document.getElementById("test-div").innerHTML = "Hello, world!"
|
||||||
|
a = b; // Undefined variable assignment
|
||||||
|
</script>
|
||||||
|
</body></html>'''),
|
||||||
|
'Hello, world!')
|
||||||
|
|
||||||
|
|
||||||
class TestDeno(Base.TestExternalJSI):
|
class TestDeno(Base.TestExternalJSI):
|
||||||
_JSI_CLASS = DenoJSI
|
_JSI_CLASS = DenoJSI
|
||||||
|
|
|
@ -137,6 +137,12 @@ def _ensure_jsdom(self):
|
||||||
self._run_deno(cmd)
|
self._run_deno(cmd)
|
||||||
self._JSDOM_IMPORT_CHECKED = True
|
self._JSDOM_IMPORT_CHECKED = True
|
||||||
|
|
||||||
|
def _parse_script_tags(self, html: str):
|
||||||
|
for match_start in re.finditer(r'<script[^>]*>', html, re.DOTALL):
|
||||||
|
end = html.find('</script>', match_start.end())
|
||||||
|
if end > match_start.end():
|
||||||
|
yield html[match_start.end():end]
|
||||||
|
|
||||||
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
|
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
|
||||||
self.report_note(video_id, note)
|
self.report_note(video_id, note)
|
||||||
self._ensure_jsdom()
|
self._ensure_jsdom()
|
||||||
|
@ -144,13 +150,13 @@ def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='
|
||||||
|
|
||||||
inline_scripts = '\n'.join([
|
inline_scripts = '\n'.join([
|
||||||
'try { %s } catch (e) {}' % script
|
'try { %s } catch (e) {}' % script
|
||||||
for script in re.findall(r'<script[^>]*>(.+?)</script>', html, re.DOTALL)
|
for script in self._parse_script_tags(html)
|
||||||
])
|
])
|
||||||
|
|
||||||
script = f'''{self._init_script};
|
script = f'''{self._init_script};
|
||||||
{self._override_navigator_js};
|
{self._override_navigator_js};
|
||||||
import jsdom from "{self._JSDOM_URL}";
|
import jsdom from "{self._JSDOM_URL}";
|
||||||
const {callback_varname} = (() => {{
|
let {callback_varname} = (() => {{
|
||||||
const jar = jsdom.CookieJar.deserializeSync({json.dumps(self.serialize_cookie(cookiejar, location))});
|
const jar = jsdom.CookieJar.deserializeSync({json.dumps(self.serialize_cookie(cookiejar, location))});
|
||||||
const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
|
const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
|
||||||
{'url: %s,' % json.dumps(str(location)) if location else ''}
|
{'url: %s,' % json.dumps(str(location)) if location else ''}
|
||||||
|
@ -158,15 +164,18 @@ def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='
|
||||||
}});
|
}});
|
||||||
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
|
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
|
||||||
delete window.jsdom;
|
delete window.jsdom;
|
||||||
|
return () => {{
|
||||||
const stdout = [];
|
const stdout = [];
|
||||||
const origLog = console.log;
|
const origLog = console.log;
|
||||||
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
|
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
|
||||||
return () => {{ origLog(JSON.stringify({{
|
return () => {{ origLog(JSON.stringify({{
|
||||||
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
|
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
|
||||||
|
}}
|
||||||
}})();
|
}})();
|
||||||
await (async () => {{
|
await (async () => {{
|
||||||
{inline_scripts}
|
{inline_scripts}
|
||||||
}})();
|
}})();
|
||||||
|
{callback_varname} = {callback_varname}();
|
||||||
await (async () => {{
|
await (async () => {{
|
||||||
{jscode}
|
{jscode}
|
||||||
}})().finally({callback_varname});
|
}})().finally({callback_varname});
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
@register_jsi
|
@register_jsi
|
||||||
class PhantomJSJSI(ExternalJSI):
|
class PhantomJSJSI(ExternalJSI):
|
||||||
_EXE_NAME = 'phantomjs'
|
_EXE_NAME = 'phantomjs'
|
||||||
_SUPPORTED_FEATURES = {'js', 'location', 'cookies'}
|
_SUPPORTED_FEATURES = {'js', 'location', 'cookies', 'dom'}
|
||||||
_BASE_PREFERENCE = 3
|
_BASE_PREFERENCE = 3
|
||||||
|
|
||||||
_BASE_JS = R'''
|
_BASE_JS = R'''
|
||||||
|
|
|
@ -92,7 +92,7 @@ def __init__(
|
||||||
jsi_keys = [key for key in get_jsi_keys(only_include or _JSI_HANDLERS) if key not in get_jsi_keys(exclude)]
|
jsi_keys = [key for key in get_jsi_keys(only_include or _JSI_HANDLERS) if key not in get_jsi_keys(exclude)]
|
||||||
self.write_debug(f'Allowed JSI keys: {jsi_keys}')
|
self.write_debug(f'Allowed JSI keys: {jsi_keys}')
|
||||||
handler_classes = [_JSI_HANDLERS[key] for key in jsi_keys
|
handler_classes = [_JSI_HANDLERS[key] for key in jsi_keys
|
||||||
if _JSI_HANDLERS[key]._SUPPORT_FEATURES.issuperset(self._features)]
|
if _JSI_HANDLERS[key]._SUPPORTED_FEATURES.issuperset(self._features)]
|
||||||
self.write_debug(f'Selected JSI classes for given features: {get_jsi_keys(handler_classes)}, '
|
self.write_debug(f'Selected JSI classes for given features: {get_jsi_keys(handler_classes)}, '
|
||||||
f'included: {get_jsi_keys(only_include) or "all"}, excluded: {get_jsi_keys(exclude)}')
|
f'included: {get_jsi_keys(only_include) or "all"}, excluded: {get_jsi_keys(exclude)}')
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ def __init__(
|
||||||
def add_handler(self, handler: JSI):
|
def add_handler(self, handler: JSI):
|
||||||
"""Add a handler. If a handler of the same JSI_KEY exists, it will overwrite it"""
|
"""Add a handler. If a handler of the same JSI_KEY exists, it will overwrite it"""
|
||||||
assert isinstance(handler, JSI), 'handler must be a JSI instance'
|
assert isinstance(handler, JSI), 'handler must be a JSI instance'
|
||||||
if not handler._SUPPORT_FEATURES.issuperset(self._features):
|
if not handler._SUPPORTED_FEATURES.issuperset(self._features):
|
||||||
raise ExtractorError(f'{handler.JSI_NAME} does not support all required features: {self._features}')
|
raise ExtractorError(f'{handler.JSI_NAME} does not support all required features: {self._features}')
|
||||||
self._handler_dict[handler.JSI_KEY] = handler
|
self._handler_dict[handler.JSI_KEY] = handler
|
||||||
|
|
||||||
|
@ -193,11 +193,11 @@ def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
|
||||||
|
|
||||||
|
|
||||||
class JSI(abc.ABC):
|
class JSI(abc.ABC):
|
||||||
_SUPPORT_FEATURES: set[str] = set()
|
_SUPPORTED_FEATURES: set[str] = set()
|
||||||
_BASE_PREFERENCE: int = 0
|
_BASE_PREFERENCE: int = 0
|
||||||
|
|
||||||
def __init__(self, downloader: YoutubeDL, timeout: float | int, features: set[str], user_agent=None):
|
def __init__(self, downloader: YoutubeDL, timeout: float | int, features: set[str], user_agent=None):
|
||||||
if not self._SUPPORT_FEATURES.issuperset(features):
|
if not self._SUPPORTED_FEATURES.issuperset(features):
|
||||||
raise ExtractorError(f'{self.JSI_NAME} does not support all required features: {features}')
|
raise ExtractorError(f'{self.JSI_NAME} does not support all required features: {features}')
|
||||||
self._downloader = downloader
|
self._downloader = downloader
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
@ -250,7 +250,7 @@ def register_jsi(jsi_cls: JsiClass) -> JsiClass:
|
||||||
"""Register a JS interpreter class"""
|
"""Register a JS interpreter class"""
|
||||||
assert issubclass(jsi_cls, JSI), f'{jsi_cls} must be a subclass of JSI'
|
assert issubclass(jsi_cls, JSI), f'{jsi_cls} must be a subclass of JSI'
|
||||||
assert jsi_cls.JSI_KEY not in _JSI_HANDLERS, f'JSI {jsi_cls.JSI_KEY} already registered'
|
assert jsi_cls.JSI_KEY not in _JSI_HANDLERS, f'JSI {jsi_cls.JSI_KEY} already registered'
|
||||||
assert jsi_cls._SUPPORT_FEATURES.issubset(_ALL_FEATURES), f'{jsi_cls._SUPPORT_FEATURES - _ALL_FEATURES} not declared in `_All_FEATURES`'
|
assert jsi_cls._SUPPORTED_FEATURES.issubset(_ALL_FEATURES), f'{jsi_cls._SUPPORTED_FEATURES - _ALL_FEATURES} not declared in `_All_FEATURES`'
|
||||||
_JSI_HANDLERS[jsi_cls.JSI_KEY] = jsi_cls
|
_JSI_HANDLERS[jsi_cls.JSI_KEY] = jsi_cls
|
||||||
return jsi_cls
|
return jsi_cls
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue