mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
jsdom
This commit is contained in:
parent
fdd98ba6e0
commit
03a99eefae
2 changed files with 86 additions and 35 deletions
|
@ -15,6 +15,7 @@
|
||||||
'wasm',
|
'wasm',
|
||||||
'location',
|
'location',
|
||||||
'dom',
|
'dom',
|
||||||
|
'cookies',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -167,7 +168,7 @@ def _dispatch_request(self, method_name: str, *args, **kwargs):
|
||||||
msg = f'{msg}. You can try installing one of unavailable JSI: {join_jsi_name(unavailable)}'
|
msg = f'{msg}. You can try installing one of unavailable JSI: {join_jsi_name(unavailable)}'
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
|
|
||||||
@require_features({'url': 'location', 'html': 'dom'})
|
@require_features({'location': 'location', 'html': 'dom', 'cookiejar': 'cookies'})
|
||||||
def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
|
def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
|
||||||
"""
|
"""
|
||||||
Execute JS code and return stdout from console.log
|
Execute JS code and return stdout from console.log
|
||||||
|
@ -175,24 +176,12 @@ def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
|
||||||
@param {str} jscode: JS code to execute
|
@param {str} jscode: JS code to execute
|
||||||
@param video_id: video id
|
@param video_id: video id
|
||||||
@param note: note
|
@param note: note
|
||||||
@param {str} url: url to set location to, requires `location` feature
|
@param {str} location: url to configure window.location, requires `location` feature
|
||||||
@param {str} html: html to load as document, requires `dom` feature
|
@param {str} html: html to load as document, requires `dom` feature
|
||||||
|
@param {YoutubeDLCookieJar} cookiejar: cookiejar to set cookies, requires url and `cookies` feature
|
||||||
"""
|
"""
|
||||||
return self._dispatch_request('execute', jscode, video_id, **kwargs)
|
return self._dispatch_request('execute', jscode, video_id, **kwargs)
|
||||||
|
|
||||||
@require_features({'url': 'location', 'html': 'dom'})
|
|
||||||
def evaluate(self, jscode: str, video_id: str | None, **kwargs) -> typing.Any:
|
|
||||||
"""
|
|
||||||
Evaluate JS code and return result
|
|
||||||
|
|
||||||
@param {str} jscode: JS code to execute
|
|
||||||
@param video_id: video id
|
|
||||||
@param note: note
|
|
||||||
@param {str} url: url to set location to, requires `location` feature
|
|
||||||
@param {str} html: html to load as document, requires `dom` feature
|
|
||||||
"""
|
|
||||||
return self._dispatch_request('evaluate', jscode, video_id, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class JSI(abc.ABC):
|
class JSI(abc.ABC):
|
||||||
_SUPPORT_FEATURES: set[str] = set()
|
_SUPPORT_FEATURES: set[str] = set()
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
import tempfile
|
import tempfile
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import typing
|
import typing
|
||||||
|
import http.cookiejar
|
||||||
|
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -19,6 +20,8 @@
|
||||||
get_exe_version,
|
get_exe_version,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
from .common import JSI, register_jsi
|
from .common import JSI, register_jsi
|
||||||
|
|
||||||
|
@ -126,7 +129,7 @@ def __init__(self, downloader: YoutubeDL, timeout=None, flags=[], replace_flags=
|
||||||
self._flags = flags if replace_flags else [*self._DENO_FLAGS, *flags]
|
self._flags = flags if replace_flags else [*self._DENO_FLAGS, *flags]
|
||||||
self._init_script = self._INIT_SCRIPT if init_script is None else init_script
|
self._init_script = self._INIT_SCRIPT if init_script is None else init_script
|
||||||
|
|
||||||
def _run_deno(self, cmd, video_id=None):
|
def _run_deno(self, cmd):
|
||||||
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
|
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
|
||||||
try:
|
try:
|
||||||
stdout, stderr, returncode = Popen.run(
|
stdout, stderr, returncode = Popen.run(
|
||||||
|
@ -136,15 +139,15 @@ def _run_deno(self, cmd, video_id=None):
|
||||||
if returncode:
|
if returncode:
|
||||||
raise ExtractorError(f'Failed with returncode {returncode}:\n{stderr}')
|
raise ExtractorError(f'Failed with returncode {returncode}:\n{stderr}')
|
||||||
elif stderr:
|
elif stderr:
|
||||||
self.report_warning(f'JS console error msg:\n{stderr.strip()}', video_id=video_id)
|
self.report_warning(f'JS console error msg:\n{stderr.strip()}')
|
||||||
return stdout.strip()
|
return stdout.strip()
|
||||||
|
|
||||||
def execute(self, jscode, video_id=None, note='Executing JS in Deno', url=None):
|
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location=None):
|
||||||
self.report_note(video_id, note)
|
self.report_note(video_id, note)
|
||||||
js_file = TempFileWrapper(f'{self._init_script};\n{jscode}', suffix='.js')
|
js_file = TempFileWrapper(f'{self._init_script};\n{jscode}', suffix='.js')
|
||||||
location_args = ['--location', url] if url else []
|
location_args = ['--location', location] if location else []
|
||||||
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
||||||
return self._run_deno(cmd, video_id=video_id)
|
return self._run_deno(cmd)
|
||||||
|
|
||||||
|
|
||||||
@register_jsi
|
@register_jsi
|
||||||
|
@ -159,34 +162,92 @@ def exe_version(cls):
|
||||||
|
|
||||||
|
|
||||||
class DenoJSDomJSI(DenoJSI):
|
class DenoJSDomJSI(DenoJSI):
|
||||||
_SUPPORTED_FEATURES = {'js', 'wasm', 'dom'}
|
_SUPPORTED_FEATURES = {'js', 'wasm', 'location', 'dom', 'cookies'}
|
||||||
_DENO_FLAGS = ['--cached-only', '--no-prompt', '--no-check']
|
_DENO_FLAGS = ['--cached-only', '--no-prompt', '--no-check']
|
||||||
_JSDOM_IMPORT = False
|
_JSDOM_IMPORT = False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def serialize_cookie(cookiejar: YoutubeDLCookieJar | None, url: str):
|
||||||
|
"""serialize netscape-compatible fields from cookiejar for tough-cookie loading"""
|
||||||
|
# JSDOM use tough-cookie as its CookieJar https://github.com/jsdom/jsdom/blob/main/lib/api.js
|
||||||
|
# tough-cookie use Cookie.fromJSON and Cookie.toJSON for cookie serialization
|
||||||
|
# https://github.com/salesforce/tough-cookie/blob/master/lib/cookie/cookie.ts
|
||||||
|
if not cookiejar:
|
||||||
|
return json.dumps({'cookies': []})
|
||||||
|
cookies: list[http.cookiejar.Cookie] = [cookie for cookie in cookiejar.get_cookies_for_url(url)]
|
||||||
|
return json.dumps({'cookies': [{
|
||||||
|
'key': cookie.name,
|
||||||
|
'value': cookie.value,
|
||||||
|
# leading dot must be removed, otherwise will fail to match
|
||||||
|
'domain': cookie.domain.lstrip('.') or urllib.parse.urlparse(url).hostname,
|
||||||
|
'expires': int_or_none(cookie.expires, invscale=1000),
|
||||||
|
'hostOnly': not cookie.domain_initial_dot,
|
||||||
|
'secure': bool(cookie.secure),
|
||||||
|
'path': cookie.path,
|
||||||
|
} for cookie in cookies if cookie.value]})
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def apply_cookies(cookiejar: YoutubeDLCookieJar | None, cookies: list[dict]):
|
||||||
|
"""apply cookies from serialized tough-cookie"""
|
||||||
|
# see serialize_cookie
|
||||||
|
if not cookiejar:
|
||||||
|
return
|
||||||
|
for cookie_dict in cookies:
|
||||||
|
if not all(cookie_dict.get(k) for k in ('key', 'value', 'domain')):
|
||||||
|
continue
|
||||||
|
if cookie_dict.get('hostOnly'):
|
||||||
|
cookie_dict['domain'] = cookie_dict['domain'].lstrip('.')
|
||||||
|
else:
|
||||||
|
cookie_dict['domain'] = '.' + cookie_dict['domain'].lstrip('.')
|
||||||
|
|
||||||
|
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
0, cookie_dict['key'], cookie_dict['value'],
|
||||||
|
None, False,
|
||||||
|
cookie_dict['domain'], True, not cookie_dict.get('hostOnly'),
|
||||||
|
cookie_dict.get('path', '/'), True,
|
||||||
|
bool(cookie_dict.get('secure')),
|
||||||
|
unified_timestamp(cookie_dict.get('expires')),
|
||||||
|
False, None, None, {}))
|
||||||
|
|
||||||
def _ensure_jsdom(self):
|
def _ensure_jsdom(self):
|
||||||
if self._JSDOM_IMPORT:
|
if self._JSDOM_IMPORT:
|
||||||
return
|
return
|
||||||
js_file = TempFileWrapper('import { JSDOM } from "https://cdn.esm.sh/jsdom"', suffix='.js')
|
js_file = TempFileWrapper('import jsdom from "https://cdn.esm.sh/jsdom"', suffix='.js')
|
||||||
cmd = [self.exe, 'run', js_file.name]
|
cmd = [self.exe, 'run', js_file.name]
|
||||||
self._run_deno(cmd)
|
self._run_deno(cmd)
|
||||||
self._JSDOM_IMPORT = True
|
self._JSDOM_IMPORT = True
|
||||||
|
|
||||||
def execute(self, jscode, video_id=None, note='Executing JS in Deno', url=None, html=None):
|
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
|
||||||
self.report_note(video_id, note)
|
self.report_note(video_id, note)
|
||||||
if html:
|
|
||||||
self._ensure_jsdom()
|
self._ensure_jsdom()
|
||||||
init_script = '''%s;
|
script = f'''{self._init_script};
|
||||||
import { JSDOM } from "https://cdn.esm.sh/jsdom";
|
import jsdom from "https://cdn.esm.sh/jsdom";
|
||||||
const dom = new JSDOM(%s);
|
const callback = (() => {{
|
||||||
Object.keys(dom.window).forEach((key) => {try {window[key] = dom.window[key]} catch (e) {}});
|
const jar = jsdom.CookieJar.deserializeSync({json.dumps(self.serialize_cookie(cookiejar, location))});
|
||||||
''' % (self._init_script, json.dumps(html))
|
const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
|
||||||
else:
|
{'url: %s,' % json.dumps(str(location)) if location else ''}
|
||||||
init_script = self._init_script
|
cookieJar: jar,
|
||||||
js_file = TempFileWrapper(f'{init_script};\n{jscode}', suffix='.js')
|
}});
|
||||||
|
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
|
||||||
|
delete window.jsdom;
|
||||||
|
const stdout = [];
|
||||||
|
const origLog = console.log;
|
||||||
|
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
|
||||||
|
return () => {{ origLog(JSON.stringify({{
|
||||||
|
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
|
||||||
|
}})();
|
||||||
|
await (async () => {{
|
||||||
|
{jscode}
|
||||||
|
}})().finally(callback);
|
||||||
|
'''
|
||||||
|
|
||||||
location_args = ['--location', url] if url else []
|
js_file = TempFileWrapper(script, suffix='.js')
|
||||||
|
|
||||||
|
location_args = ['--location', location] if location else []
|
||||||
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
|
||||||
return self._run_deno(cmd, video_id=video_id)
|
data = json.loads(self._run_deno(cmd))
|
||||||
|
self.apply_cookies(cookiejar, data['cookies'])
|
||||||
|
return data['stdout']
|
||||||
|
|
||||||
|
|
||||||
class PuppeteerJSI(ExternalJSI):
|
class PuppeteerJSI(ExternalJSI):
|
||||||
|
@ -464,3 +525,4 @@ def execute(self, jscode, video_id=None, *, note='Executing JS in PhantomJS'):
|
||||||
if typing.TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
from ..YoutubeDL import YoutubeDL
|
from ..YoutubeDL import YoutubeDL
|
||||||
from ..extractor.common import InfoExtractor
|
from ..extractor.common import InfoExtractor
|
||||||
|
from ..cookies import YoutubeDLCookieJar
|
||||||
|
|
Loading…
Reference in a new issue