mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
split and fixes for phantom
This commit is contained in:
parent
6d622d5481
commit
8c6d01f757
3 changed files with 147 additions and 118 deletions
71
yt_dlp/jsinterp/_helper.py
Normal file
71
yt_dlp/jsinterp/_helper.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
import contextlib
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import string
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
|
class TempFileWrapper:
|
||||||
|
"""
|
||||||
|
Wrapper for NamedTemporaryFile, auto closes file after io and deletes file upon wrapper object gc
|
||||||
|
|
||||||
|
@param {str | bytes | None} content: content to write to file upon creation
|
||||||
|
@param {bool} text: whether to open file in text mode
|
||||||
|
@param {str} encoding: encoding to use for text mode
|
||||||
|
@param {str | None} suffix: suffix for filename of temporary file
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, content: str | bytes | None = None, text: bool = True,
|
||||||
|
encoding='utf-8', suffix: str | None = None):
|
||||||
|
self.encoding = None if not text else encoding
|
||||||
|
self.text = text
|
||||||
|
self._file = tempfile.NamedTemporaryFile('w' if text else 'wb', encoding=self.encoding,
|
||||||
|
suffix=suffix, delete=False)
|
||||||
|
if content:
|
||||||
|
self._file.write(content)
|
||||||
|
self._file.close()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
return self._file.name
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def opened_file(self, mode, *, seek=None, seek_whence=0):
|
||||||
|
mode = mode if (self.text or 'b' in mode) else mode + 'b'
|
||||||
|
with open(self._file.name, mode, encoding=self.encoding) as f:
|
||||||
|
if seek is not None:
|
||||||
|
self._file.seek(seek, seek_whence)
|
||||||
|
yield f
|
||||||
|
|
||||||
|
def write(self, s, seek=None, seek_whence=0):
|
||||||
|
"""re-open file in write mode and write, optionally seek to position first"""
|
||||||
|
with self.opened_file('w', seek=seek, seek_whence=seek_whence) as f:
|
||||||
|
return f.write(s)
|
||||||
|
|
||||||
|
def append_write(self, s, seek=None, seek_whence=0):
|
||||||
|
"""re-open file in append mode and write, optionally seek to position first"""
|
||||||
|
with self.opened_file('a', seek=seek, seek_whence=seek_whence) as f:
|
||||||
|
return f.write(s)
|
||||||
|
|
||||||
|
def read(self, n=-1, seek=None, seek_whence=0):
|
||||||
|
"""re-open file and read, optionally seek to position first"""
|
||||||
|
with self.opened_file('r', seek=seek, seek_whence=seek_whence) as f:
|
||||||
|
return f.read(n)
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(self._file.name)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.cleanup()
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
|
self.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
def random_string(length: int = 10) -> str:
|
||||||
|
return ''.join(random.choices(string.ascii_letters, k=length))
|
|
@ -4,8 +4,14 @@
|
||||||
import typing
|
import typing
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
from ..utils import classproperty, format_field, variadic, ExtractorError
|
|
||||||
from ..extractor.common import InfoExtractor
|
from ..extractor.common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
classproperty,
|
||||||
|
format_field,
|
||||||
|
get_exe_version,
|
||||||
|
variadic,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_JSI_HANDLERS: dict[str, type[JSI]] = {}
|
_JSI_HANDLERS: dict[str, type[JSI]] = {}
|
||||||
|
@ -220,6 +226,22 @@ def JSI_KEY(cls) -> str:
|
||||||
return cls.__name__[:-3]
|
return cls.__name__[:-3]
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalJSI(JSI, abc.ABC):
|
||||||
|
_EXE_NAME: str
|
||||||
|
|
||||||
|
@classproperty(cache=True)
|
||||||
|
def exe_version(cls):
|
||||||
|
return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)')
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def exe(cls):
|
||||||
|
return cls._EXE_NAME if cls.exe_version else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def is_available(cls):
|
||||||
|
return bool(cls.exe)
|
||||||
|
|
||||||
|
|
||||||
def register_jsi(jsi_cls: JsiClass) -> JsiClass:
|
def register_jsi(jsi_cls: JsiClass) -> JsiClass:
|
||||||
"""Register a JS interpreter class"""
|
"""Register a JS interpreter class"""
|
||||||
assert issubclass(jsi_cls, JSI), f'{jsi_cls} must be a subclass of JSI'
|
assert issubclass(jsi_cls, JSI), f'{jsi_cls} must be a subclass of JSI'
|
||||||
|
|
|
@ -1,14 +1,11 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import abc
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
import urllib.parse
|
|
||||||
import typing
|
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import typing
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -16,109 +13,13 @@
|
||||||
Popen,
|
Popen,
|
||||||
classproperty,
|
classproperty,
|
||||||
format_field,
|
format_field,
|
||||||
get_exe_version,
|
int_or_none,
|
||||||
is_outdated_version,
|
is_outdated_version,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
int_or_none,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
from .common import JSI, register_jsi
|
from ._helper import TempFileWrapper, random_string
|
||||||
|
from .common import ExternalJSI, register_jsi
|
||||||
|
|
||||||
def cookie_to_dict(cookie):
|
|
||||||
cookie_dict = {
|
|
||||||
'name': cookie.name,
|
|
||||||
'value': cookie.value,
|
|
||||||
}
|
|
||||||
if cookie.port_specified:
|
|
||||||
cookie_dict['port'] = cookie.port
|
|
||||||
if cookie.domain_specified:
|
|
||||||
cookie_dict['domain'] = cookie.domain
|
|
||||||
if cookie.path_specified:
|
|
||||||
cookie_dict['path'] = cookie.path
|
|
||||||
if cookie.expires is not None:
|
|
||||||
cookie_dict['expires'] = cookie.expires
|
|
||||||
if cookie.secure is not None:
|
|
||||||
cookie_dict['secure'] = cookie.secure
|
|
||||||
if cookie.discard is not None:
|
|
||||||
cookie_dict['discard'] = cookie.discard
|
|
||||||
with contextlib.suppress(TypeError):
|
|
||||||
if (cookie.has_nonstandard_attr('httpOnly')
|
|
||||||
or cookie.has_nonstandard_attr('httponly')
|
|
||||||
or cookie.has_nonstandard_attr('HttpOnly')):
|
|
||||||
cookie_dict['httponly'] = True
|
|
||||||
return cookie_dict
|
|
||||||
|
|
||||||
|
|
||||||
def cookie_jar_to_list(cookie_jar):
|
|
||||||
return [cookie_to_dict(cookie) for cookie in cookie_jar]
|
|
||||||
|
|
||||||
|
|
||||||
class TempFileWrapper:
|
|
||||||
"""Wrapper for NamedTemporaryFile, auto closes file after io and deletes file upon wrapper object gc"""
|
|
||||||
|
|
||||||
def __init__(self, content: str | bytes | None = None, text: bool = True,
|
|
||||||
encoding='utf-8', suffix: str | None = None):
|
|
||||||
self.encoding = None if not text else encoding
|
|
||||||
self.text = text
|
|
||||||
self._file = tempfile.NamedTemporaryFile('w' if text else 'wb', encoding=self.encoding,
|
|
||||||
suffix=suffix, delete=False)
|
|
||||||
if content:
|
|
||||||
self._file.write(content)
|
|
||||||
self._file.close()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def name(self):
|
|
||||||
return self._file.name
|
|
||||||
|
|
||||||
@contextlib.contextmanager
|
|
||||||
def opened_file(self, mode, *, seek=None, seek_whence=0):
|
|
||||||
mode = mode if (self.text or 'b' in mode) else mode + 'b'
|
|
||||||
with open(self._file.name, mode, encoding=self.encoding) as f:
|
|
||||||
if seek is not None:
|
|
||||||
self._file.seek(seek, seek_whence)
|
|
||||||
yield f
|
|
||||||
|
|
||||||
def write(self, s, seek=None, seek_whence=0):
|
|
||||||
with self.opened_file('w', seek=seek, seek_whence=seek_whence) as f:
|
|
||||||
return f.write(s)
|
|
||||||
|
|
||||||
def append_write(self, s, seek=None, seek_whence=0):
|
|
||||||
with self.opened_file('a', seek=seek, seek_whence=seek_whence) as f:
|
|
||||||
return f.write(s)
|
|
||||||
|
|
||||||
def read(self, n=-1, seek=None, seek_whence=0):
|
|
||||||
with self.opened_file('r', seek=seek, seek_whence=seek_whence) as f:
|
|
||||||
return f.read(n)
|
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
with contextlib.suppress(OSError):
|
|
||||||
os.remove(self._file.name)
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
self.cleanup()
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
|
||||||
self.cleanup()
|
|
||||||
|
|
||||||
|
|
||||||
class ExternalJSI(JSI, abc.ABC):
|
|
||||||
_EXE_NAME: str
|
|
||||||
|
|
||||||
@classproperty(cache=True)
|
|
||||||
def exe_version(cls):
|
|
||||||
return get_exe_version(cls._EXE_NAME, args=getattr(cls, 'V_ARGS', ['--version']), version_re=r'([0-9.]+)')
|
|
||||||
|
|
||||||
@classproperty
|
|
||||||
def exe(cls):
|
|
||||||
return cls._EXE_NAME if cls.exe_version else None
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def is_available(cls):
|
|
||||||
return bool(cls.exe)
|
|
||||||
|
|
||||||
|
|
||||||
@register_jsi
|
@register_jsi
|
||||||
|
@ -360,9 +261,6 @@ class PhantomJSJSI(ExternalJSI):
|
||||||
var fs = require('fs');
|
var fs = require('fs');
|
||||||
var read = {{ mode: 'r', charset: 'utf-8' }};
|
var read = {{ mode: 'r', charset: 'utf-8' }};
|
||||||
var write = {{ mode: 'w', charset: 'utf-8' }};
|
var write = {{ mode: 'w', charset: 'utf-8' }};
|
||||||
JSON.parse(fs.read({cookies_fn}, read)).forEach(function(x) {{
|
|
||||||
phantom.addCookie(x);
|
|
||||||
}});
|
|
||||||
page.settings.resourceTimeout = {timeout};
|
page.settings.resourceTimeout = {timeout};
|
||||||
page.settings.userAgent = {ua};
|
page.settings.userAgent = {ua};
|
||||||
page.onLoadStarted = function() {{
|
page.onLoadStarted = function() {{
|
||||||
|
@ -381,20 +279,41 @@ class PhantomJSJSI(ExternalJSI):
|
||||||
page.setContent(fs.read({html_fn}, read), {url});
|
page.setContent(fs.read({html_fn}, read), {url});
|
||||||
}}
|
}}
|
||||||
else {{
|
else {{
|
||||||
|
JSON.parse(fs.read({cookies_fn}, read)).forEach(function(x) {{
|
||||||
|
phantom.addCookie(x);
|
||||||
|
}});
|
||||||
{jscode}
|
{jscode}
|
||||||
}}
|
}}
|
||||||
}};
|
}};
|
||||||
page.open("");
|
page.open("");
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def _save_cookies(self, url, cookiejar):
|
def _save_cookies(self, url, cookiejar: YoutubeDLCookieJar | None):
|
||||||
cookies = cookie_jar_to_list(cookiejar) if cookiejar else []
|
def _cookie_to_dict(cookie: http.cookiejar.Cookie):
|
||||||
for cookie in cookies:
|
cookie_dict = {
|
||||||
if 'path' not in cookie:
|
'name': cookie.name,
|
||||||
cookie['path'] = '/'
|
'value': cookie.value,
|
||||||
if 'domain' not in cookie:
|
'port': cookie.port,
|
||||||
cookie['domain'] = urllib.parse.urlparse(url).netloc
|
'domain': cookie.domain,
|
||||||
return json.dumps(cookies)
|
'path': cookie.path or '/',
|
||||||
|
'expires': int_or_none(cookie.expires, invscale=1000),
|
||||||
|
'secure': cookie.secure,
|
||||||
|
'discard': cookie.discard,
|
||||||
|
}
|
||||||
|
if not cookie_dict['domain']:
|
||||||
|
cookie_dict['domain'] = urllib.parse.urlparse(url).hostname
|
||||||
|
cookie_dict['port'] = urllib.parse.urlparse(url).port
|
||||||
|
for key in [key for key, value in cookie_dict.items() if value is None]:
|
||||||
|
cookie_dict.pop(key)
|
||||||
|
with contextlib.suppress(TypeError):
|
||||||
|
if (cookie.has_nonstandard_attr('httpOnly')
|
||||||
|
or cookie.has_nonstandard_attr('httponly')
|
||||||
|
or cookie.has_nonstandard_attr('HttpOnly')):
|
||||||
|
cookie_dict['httponly'] = True
|
||||||
|
return cookie_dict
|
||||||
|
|
||||||
|
cookies = cookiejar.get_cookies_for_url(url) if cookiejar else []
|
||||||
|
return json.dumps([_cookie_to_dict(cookie) for cookie in cookies])
|
||||||
|
|
||||||
def _load_cookies(self, cookies_json: str, cookiejar):
|
def _load_cookies(self, cookies_json: str, cookiejar):
|
||||||
if not cookiejar:
|
if not cookiejar:
|
||||||
|
@ -454,6 +373,23 @@ def _execute_html(self, jscode: str, url: str, html: str, cookiejar, video_id=No
|
||||||
def execute(self, jscode, video_id=None,
|
def execute(self, jscode, video_id=None,
|
||||||
note='Executing JS in PhantomJS', location=None, html='', cookiejar=None):
|
note='Executing JS in PhantomJS', location=None, html='', cookiejar=None):
|
||||||
if location:
|
if location:
|
||||||
|
jscode = '''console.log(page.evaluate(function() {
|
||||||
|
var %(std_var)s = [];
|
||||||
|
console.log = function() {
|
||||||
|
var values = '';
|
||||||
|
for (var i = 0; i < arguments.length; i++) {
|
||||||
|
values += arguments[i] + ' ';
|
||||||
|
}
|
||||||
|
%(std_var)s.push(values);
|
||||||
|
}
|
||||||
|
%(jscode)s;
|
||||||
|
return %(std_var)s.join('\\n');
|
||||||
|
|
||||||
|
}));
|
||||||
|
saveAndExit();''' % {
|
||||||
|
'std_var': f'__stdout__values_{random_string()}',
|
||||||
|
'jscode': jscode,
|
||||||
|
}
|
||||||
return self._execute_html(jscode, location, html, cookiejar, video_id=video_id, note=note)[1]
|
return self._execute_html(jscode, location, html, cookiejar, video_id=video_id, note=note)[1]
|
||||||
if html:
|
if html:
|
||||||
self.report_warning('`location` is required to use `html`')
|
self.report_warning('`location` is required to use `html`')
|
||||||
|
|
Loading…
Reference in a new issue