1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00
This commit is contained in:
c-basalt 2024-12-29 20:27:00 -05:00
parent fdd98ba6e0
commit 03a99eefae
2 changed files with 86 additions and 35 deletions

View file

@ -15,6 +15,7 @@
'wasm',
'location',
'dom',
'cookies',
}
@ -167,7 +168,7 @@ def _dispatch_request(self, method_name: str, *args, **kwargs):
msg = f'{msg}. You can try installing one of unavailable JSI: {join_jsi_name(unavailable)}'
raise ExtractorError(msg)
@require_features({'url': 'location', 'html': 'dom'})
@require_features({'location': 'location', 'html': 'dom', 'cookiejar': 'cookies'})
def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
"""
Execute JS code and return stdout from console.log
@ -175,24 +176,12 @@ def execute(self, jscode: str, video_id: str | None, **kwargs) -> str:
@param {str} jscode: JS code to execute
@param video_id: video id
@param note: note
@param {str} url: url to set location to, requires `location` feature
@param {str} location: url to configure window.location, requires `location` feature
@param {str} html: html to load as document, requires `dom` feature
@param {YoutubeDLCookieJar} cookiejar: cookiejar to set cookies, requires url and `cookies` feature
"""
return self._dispatch_request('execute', jscode, video_id, **kwargs)
@require_features({'url': 'location', 'html': 'dom'})
def evaluate(self, jscode: str, video_id: str | None, **kwargs) -> typing.Any:
"""
Evaluate JS code and return result
@param {str} jscode: JS code to execute
@param video_id: video id
@param note: note
@param {str} url: url to set location to, requires `location` feature
@param {str} html: html to load as document, requires `dom` feature
"""
return self._dispatch_request('evaluate', jscode, video_id, **kwargs)
class JSI(abc.ABC):
_SUPPORT_FEATURES: set[str] = set()

View file

@ -9,6 +9,7 @@
import tempfile
import urllib.parse
import typing
import http.cookiejar
from ..utils import (
@ -19,6 +20,8 @@
get_exe_version,
is_outdated_version,
shell_quote,
int_or_none,
unified_timestamp,
)
from .common import JSI, register_jsi
@ -126,7 +129,7 @@ def __init__(self, downloader: YoutubeDL, timeout=None, flags=[], replace_flags=
self._flags = flags if replace_flags else [*self._DENO_FLAGS, *flags]
self._init_script = self._INIT_SCRIPT if init_script is None else init_script
def _run_deno(self, cmd, video_id=None):
def _run_deno(self, cmd):
self.write_debug(f'Deno command line: {shell_quote(cmd)}')
try:
stdout, stderr, returncode = Popen.run(
@ -136,15 +139,15 @@ def _run_deno(self, cmd, video_id=None):
if returncode:
raise ExtractorError(f'Failed with returncode {returncode}:\n{stderr}')
elif stderr:
self.report_warning(f'JS console error msg:\n{stderr.strip()}', video_id=video_id)
self.report_warning(f'JS console error msg:\n{stderr.strip()}')
return stdout.strip()
def execute(self, jscode, video_id=None, note='Executing JS in Deno', url=None):
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location=None):
self.report_note(video_id, note)
js_file = TempFileWrapper(f'{self._init_script};\n{jscode}', suffix='.js')
location_args = ['--location', url] if url else []
location_args = ['--location', location] if location else []
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
return self._run_deno(cmd, video_id=video_id)
return self._run_deno(cmd)
@register_jsi
@ -159,34 +162,92 @@ def exe_version(cls):
class DenoJSDomJSI(DenoJSI):
_SUPPORTED_FEATURES = {'js', 'wasm', 'dom'}
_SUPPORTED_FEATURES = {'js', 'wasm', 'location', 'dom', 'cookies'}
_DENO_FLAGS = ['--cached-only', '--no-prompt', '--no-check']
_JSDOM_IMPORT = False
@staticmethod
def serialize_cookie(cookiejar: YoutubeDLCookieJar | None, url: str):
"""serialize netscape-compatible fields from cookiejar for tough-cookie loading"""
# JSDOM use tough-cookie as its CookieJar https://github.com/jsdom/jsdom/blob/main/lib/api.js
# tough-cookie use Cookie.fromJSON and Cookie.toJSON for cookie serialization
# https://github.com/salesforce/tough-cookie/blob/master/lib/cookie/cookie.ts
if not cookiejar:
return json.dumps({'cookies': []})
cookies: list[http.cookiejar.Cookie] = [cookie for cookie in cookiejar.get_cookies_for_url(url)]
return json.dumps({'cookies': [{
'key': cookie.name,
'value': cookie.value,
# leading dot must be removed, otherwise will fail to match
'domain': cookie.domain.lstrip('.') or urllib.parse.urlparse(url).hostname,
'expires': int_or_none(cookie.expires, invscale=1000),
'hostOnly': not cookie.domain_initial_dot,
'secure': bool(cookie.secure),
'path': cookie.path,
} for cookie in cookies if cookie.value]})
@staticmethod
def apply_cookies(cookiejar: YoutubeDLCookieJar | None, cookies: list[dict]):
"""apply cookies from serialized tough-cookie"""
# see serialize_cookie
if not cookiejar:
return
for cookie_dict in cookies:
if not all(cookie_dict.get(k) for k in ('key', 'value', 'domain')):
continue
if cookie_dict.get('hostOnly'):
cookie_dict['domain'] = cookie_dict['domain'].lstrip('.')
else:
cookie_dict['domain'] = '.' + cookie_dict['domain'].lstrip('.')
cookiejar.set_cookie(http.cookiejar.Cookie(
0, cookie_dict['key'], cookie_dict['value'],
None, False,
cookie_dict['domain'], True, not cookie_dict.get('hostOnly'),
cookie_dict.get('path', '/'), True,
bool(cookie_dict.get('secure')),
unified_timestamp(cookie_dict.get('expires')),
False, None, None, {}))
def _ensure_jsdom(self):
if self._JSDOM_IMPORT:
return
js_file = TempFileWrapper('import { JSDOM } from "https://cdn.esm.sh/jsdom"', suffix='.js')
js_file = TempFileWrapper('import jsdom from "https://cdn.esm.sh/jsdom"', suffix='.js')
cmd = [self.exe, 'run', js_file.name]
self._run_deno(cmd)
self._JSDOM_IMPORT = True
def execute(self, jscode, video_id=None, note='Executing JS in Deno', url=None, html=None):
def execute(self, jscode, video_id=None, note='Executing JS in Deno', location='', html='', cookiejar=None):
self.report_note(video_id, note)
if html:
self._ensure_jsdom()
init_script = '''%s;
import { JSDOM } from "https://cdn.esm.sh/jsdom";
const dom = new JSDOM(%s);
Object.keys(dom.window).forEach((key) => {try {window[key] = dom.window[key]} catch (e) {}});
''' % (self._init_script, json.dumps(html))
else:
init_script = self._init_script
js_file = TempFileWrapper(f'{init_script};\n{jscode}', suffix='.js')
self._ensure_jsdom()
script = f'''{self._init_script};
import jsdom from "https://cdn.esm.sh/jsdom";
const callback = (() => {{
const jar = jsdom.CookieJar.deserializeSync({json.dumps(self.serialize_cookie(cookiejar, location))});
const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
{'url: %s,' % json.dumps(str(location)) if location else ''}
cookieJar: jar,
}});
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}});
delete window.jsdom;
const stdout = [];
const origLog = console.log;
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' '));
return () => {{ origLog(JSON.stringify({{
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
}})();
await (async () => {{
{jscode}
}})().finally(callback);
'''
location_args = ['--location', url] if url else []
js_file = TempFileWrapper(script, suffix='.js')
location_args = ['--location', location] if location else []
cmd = [self.exe, 'run', *self._flags, *location_args, js_file.name]
return self._run_deno(cmd, video_id=video_id)
data = json.loads(self._run_deno(cmd))
self.apply_cookies(cookiejar, data['cookies'])
return data['stdout']
class PuppeteerJSI(ExternalJSI):
@ -464,3 +525,4 @@ def execute(self, jscode, video_id=None, *, note='Executing JS in PhantomJS'):
if typing.TYPE_CHECKING:
from ..YoutubeDL import YoutubeDL
from ..extractor.common import InfoExtractor
from ..cookies import YoutubeDLCookieJar