1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

patch jsdom for iq

This commit is contained in:
c-basalt 2024-12-31 13:42:36 -05:00
parent b086b8635d
commit 02001159b8
5 changed files with 54 additions and 24 deletions

View file

@ -5,7 +5,7 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..jsinterp import PhantomJSwrapper from ..jsinterp import JSIWrapper
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
@ -449,7 +449,6 @@ class IqIE(InfoExtractor):
} }
_DASH_JS = ''' _DASH_JS = '''
console.log(page.evaluate(function() {
var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s"; var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s";
var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s"; var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s";
var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime(); var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime();
@ -515,9 +514,7 @@ class IqIE(InfoExtractor):
var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path); var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path);
dash_paths[bid] = dash_path; dash_paths[bid] = dash_path;
}); });
return JSON.stringify(dash_paths); console.log(JSON.stringify(dash_paths));
}));
saveAndExit();
''' '''
def _extract_vms_player_js(self, webpage, video_id): def _extract_vms_player_js(self, webpage, video_id):
@ -597,22 +594,22 @@ def _real_extract(self, url):
else: else:
ut_list = ['0'] ut_list = ['0']
jsi = JSIWrapper(self, url, ['dom'], timeout=120)
# bid 0 as an initial format checker # bid 0 as an initial format checker
dash_paths = self._parse_json(PhantomJSwrapper(self, timeout=120_000).get( dash_paths = self._parse_json(jsi.execute(self._DASH_JS % {
url, note2='Executing signature code (this may take a couple minutes)', 'tvid': video_info['tvId'],
html='<!DOCTYPE html>', video_id=video_id, jscode=self._DASH_JS % { 'vid': video_info['vid'],
'tvid': video_info['tvId'], 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'),
'vid': video_info['vid'], expected_type=str, default='04022001010011000000'),
'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'), 'uid': uid,
expected_type=str, default='04022001010011000000'), 'dfp': self._get_cookie('dfp', ''),
'uid': uid, 'mode': self._get_cookie('mod', 'intl'),
'dfp': self._get_cookie('dfp', ''), 'lang': self._get_cookie('lang', 'en_us'),
'mode': self._get_cookie('mod', 'intl'), 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']',
'lang': self._get_cookie('lang', 'en_us'), 'ut_list': '[' + ','.join(ut_list) + ']',
'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']', 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
'ut_list': '[' + ','.join(ut_list) + ']', }, video_id, html='<!DOCTYPE html>'), video_id)
'cmd5x_func': self._extract_cmd5x_function(webpage, video_id),
})[1].strip(), video_id)
formats, subtitles = [], {} formats, subtitles = [], {}
initial_format_data = self._download_json( initial_format_data = self._download_json(

View file

@ -1,6 +1,8 @@
# flake8: noqa: F401
from .native import JSInterpreter from .native import JSInterpreter
from .common import _JSI_PREFERENCES, _JSI_HANDLERS, JSIWrapper from .common import _JSI_PREFERENCES, _JSI_HANDLERS, JSIWrapper
from ._phantomjs import PhantomJSwrapper from ._phantomjs import PhantomJSwrapper
from . import _deno
__all__ = [ __all__ = [

View file

@ -70,6 +70,7 @@ def exe_version(cls):
return DenoJSI.exe_version return DenoJSI.exe_version
@register_jsi
class DenoJSDomJSI(DenoJSI): class DenoJSDomJSI(DenoJSI):
_SUPPORTED_FEATURES = {'js', 'wasm', 'location', 'dom', 'cookies'} _SUPPORTED_FEATURES = {'js', 'wasm', 'location', 'dom', 'cookies'}
_BASE_PREFERENCE = 4 _BASE_PREFERENCE = 4
@ -128,7 +129,7 @@ def _ensure_jsdom(self):
self._run_deno(cmd) self._run_deno(cmd)
self._JSDOM_IMPORT_CHECKED = True self._JSDOM_IMPORT_CHECKED = True
def execute(self, jscode, video_id=None, note='Executing JS in Deno', html='', cookiejar=None): def execute(self, jscode, video_id=None, note='Executing JS in Deno with jsdom', html='', cookiejar=None):
self.report_note(video_id, note) self.report_note(video_id, note)
self._ensure_jsdom() self._ensure_jsdom()
@ -148,15 +149,35 @@ def execute(self, jscode, video_id=None, note='Executing JS in Deno', html='', c
const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{ const dom = new jsdom.JSDOM({json.dumps(str(html))}, {{
{'url: %s,' % json.dumps(str(self._url)) if self._url else ''} {'url: %s,' % json.dumps(str(self._url)) if self._url else ''}
cookieJar: jar, cookieJar: jar,
pretendToBeVisual: true,
}}); }});
Object.keys(dom.window).forEach((key) => {{try {{window[key] = dom.window[key]}} catch (e) {{}}}}); Object.keys(dom.window).filter(key => !['atob', 'btoa'].includes(key)).forEach((key) => {{
try {{window[key] = dom.window[key]}} catch (e) {{}}
}});
window.screen = {{
availWidth: 1920,
availHeight: 1040,
width: 1920,
height: 1080,
colorDepth: 24,
isExtended: true,
onchange: null,
orientation: {{angle: 0, type: 'landscape-primary', onchange: null}},
pixelDepth: 24,
width: 1920,
}}
Object.defineProperty(document.body, 'clientWidth', {{value: 1903}});
Object.defineProperty(document.body, 'clientHeight', {{value: 1035}});
document.domain = location.hostname;
delete window.jsdom; delete window.jsdom;
const origLog = console.log; const origLog = console.log;
console.log = () => {{}}; console.log = () => {{}};
console.info = () => {{}}; console.info = () => {{}};
return () => {{ return () => {{
const stdout = []; const stdout = [];
console.log = (...msg) => stdout.push(msg.map(m => m.toString()).join(' ')); console.log = (...msg) => stdout.push(msg.map(m => '' + m).join(' '));
return () => {{ origLog(JSON.stringify({{ return () => {{ origLog(JSON.stringify({{
stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }} stdout: stdout.join('\\n'), cookies: jar.serializeSync().cookies}})); }}
}} }}

View file

@ -81,6 +81,16 @@ def override_navigator_js(user_agent: str) -> str:
'language': 'en-US', 'language': 'en-US',
'languages': ['en-US'], 'languages': ['en-US'],
'webdriver': False, 'webdriver': False,
'cookieEnabled': True,
'appCodeName': user_agent.split('/', maxsplit=1)[0],
'appName': 'Netscape',
'appVersion': user_agent.split('/', maxsplit=1)[-1],
'platform': 'Win32',
'product': 'Gecko',
'productSub': '20030107',
'vendor': 'Google Inc.',
'vendorSub': '',
'onLine': True,
}.items() }.items()
]) ])

View file

@ -198,7 +198,7 @@ def _version(cls):
return PhantomJSJSI.exe_version return PhantomJSJSI.exe_version
def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000): def __init__(self, extractor: InfoExtractor, required_version=None, timeout=10000):
self._jsi = PhantomJSJSI(extractor._downloader, timeout=timeout / 1000) self._jsi = PhantomJSJSI(extractor._downloader, '', timeout / 1000, {})
if not self._jsi.is_available(): if not self._jsi.is_available():
raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True) raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True)