mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Merge branch 'yt-dlp:master' into pr/12359
This commit is contained in:
commit
b2c11e33f4
51 changed files with 1450 additions and 590 deletions
6
.github/workflows/codeql.yml
vendored
6
.github/workflows/codeql.yml
vendored
|
@ -33,7 +33,7 @@ jobs:
|
|||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
|
@ -47,7 +47,7 @@ jobs:
|
|||
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
@ -60,6 +60,6 @@ jobs:
|
|||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
|
|
|
@ -736,3 +736,9 @@ NecroRomnt
|
|||
pjrobertson
|
||||
subsense
|
||||
test20140
|
||||
arantius
|
||||
entourage8
|
||||
lfavole
|
||||
mp3butcher
|
||||
slipinthedove
|
||||
YoshiTabletopGamer
|
||||
|
|
43
Changelog.md
43
Changelog.md
|
@ -4,6 +4,49 @@ # Changelog
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.02.19
|
||||
|
||||
#### Core changes
|
||||
- **jsinterp**
|
||||
- [Add `js_number_to_string`](https://github.com/yt-dlp/yt-dlp/commit/0d9f061d38c3a4da61972e2adad317079f2f1c84) ([#12110](https://github.com/yt-dlp/yt-dlp/issues/12110)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Improve zeroise](https://github.com/yt-dlp/yt-dlp/commit/4ca8c44a073d5aa3a3e3112c35b2b23d6ce25ac6) ([#12313](https://github.com/yt-dlp/yt-dlp/issues/12313)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- **acast**: [Support shows.acast.com URLs](https://github.com/yt-dlp/yt-dlp/commit/57c717fee4bfbc9309845bbb48901b72e4b69304) ([#12223](https://github.com/yt-dlp/yt-dlp/issues/12223)) by [barsnick](https://github.com/barsnick)
|
||||
- **cwtv**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/18a28514e306e822eab4f3a79c76d515bf076406) ([#12207](https://github.com/yt-dlp/yt-dlp/issues/12207)) by [arantius](https://github.com/arantius)
|
||||
- movie: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/03c3d705778c07739e0034b51490877cffdc0983) ([#12227](https://github.com/yt-dlp/yt-dlp/issues/12227)) by [bashonly](https://github.com/bashonly)
|
||||
- **digiview**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f53553087d3fde9dcd61d6e9f98caf09db1d8ef2) ([#9902](https://github.com/yt-dlp/yt-dlp/issues/9902)) by [lfavole](https://github.com/lfavole)
|
||||
- **dropbox**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/861aeec449c8f3c062d962945b234ff0341f61f3) ([#12228](https://github.com/yt-dlp/yt-dlp/issues/12228)) by [bashonly](https://github.com/bashonly)
|
||||
- **francetv**
|
||||
- site
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/817483ccc68aed6049ed9c4a2ffae44ca82d2b1c) ([#12236](https://github.com/yt-dlp/yt-dlp/issues/12236)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/1295bbedd45fa8d9bc3f7a194864ae280297848e) ([#12316](https://github.com/yt-dlp/yt-dlp/issues/12316)) by [bashonly](https://github.com/bashonly)
|
||||
- **francetvinfo.fr**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5c4c2ddfaa47988b4d50c1ad4988badc0b4f30c2) ([#12402](https://github.com/yt-dlp/yt-dlp/issues/12402)) by [bashonly](https://github.com/bashonly)
|
||||
- **gem.cbc.ca**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5271ef48c6f61c145e03e18e960995d2e651d205) ([#12404](https://github.com/yt-dlp/yt-dlp/issues/12404)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf)
|
||||
- **generic**: [Extract `live_status` for DASH manifest URLs](https://github.com/yt-dlp/yt-dlp/commit/19edaa44fcd375f54e63d6227b092f5252d3e889) ([#12256](https://github.com/yt-dlp/yt-dlp/issues/12256)) by [mp3butcher](https://github.com/mp3butcher)
|
||||
- **globo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f8d0161455f00add65585ca1a476a7b5d56f5f96) ([#11795](https://github.com/yt-dlp/yt-dlp/issues/11795)) by [slipinthedove](https://github.com/slipinthedove), [YoshiTabletopGamer](https://github.com/YoshiTabletopGamer)
|
||||
- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d59f14a0a7a8b55e6bf468237def62b73ab4a517) ([#12237](https://github.com/yt-dlp/yt-dlp/issues/12237)) by [alard](https://github.com/alard)
|
||||
- **pbs**: [Support www.thirteen.org URLs](https://github.com/yt-dlp/yt-dlp/commit/9fb8ab2ff67fb699f60cce09163a580976e90c0e) ([#11191](https://github.com/yt-dlp/yt-dlp/issues/11191)) by [rohieb](https://github.com/rohieb)
|
||||
- **reddit**: [Bypass gated subreddit warning](https://github.com/yt-dlp/yt-dlp/commit/6ca23ffaa4663cb552f937f0b1e9769b66db11bd) ([#12335](https://github.com/yt-dlp/yt-dlp/issues/12335)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitter**: [Fix syndication token generation](https://github.com/yt-dlp/yt-dlp/commit/14cd7f3443c6da4d49edaefcc12da9dee86e243e) ([#12107](https://github.com/yt-dlp/yt-dlp/issues/12107)) by [Grub4K](https://github.com/Grub4K), [pjrobertson](https://github.com/pjrobertson)
|
||||
- **youtube**
|
||||
- [Retry on more critical requests](https://github.com/yt-dlp/yt-dlp/commit/d48e612609d012abbea3785be4d26d78a014abb2) ([#12339](https://github.com/yt-dlp/yt-dlp/issues/12339)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [nsig workaround for `tce` player JS](https://github.com/yt-dlp/yt-dlp/commit/ec17fb16e8d69d4e3e10fb73bf3221be8570dfee) ([#12401](https://github.com/yt-dlp/yt-dlp/issues/12401)) by [bashonly](https://github.com/bashonly)
|
||||
- **zdf**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/241ace4f104d50fdf7638f9203927aefcf57a1f7) ([#9565](https://github.com/yt-dlp/yt-dlp/issues/9565)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) (With fixes in [e7882b6](https://github.com/yt-dlp/yt-dlp/commit/e7882b682b959e476d8454911655b3e9b14c79b2) by [bashonly](https://github.com/bashonly))
|
||||
|
||||
#### Downloader changes
|
||||
- **hls**
|
||||
- [Fix `BYTERANGE` logic](https://github.com/yt-dlp/yt-dlp/commit/10b7ff68e98f17655e31952f6e17120b2d7dda96) ([#11972](https://github.com/yt-dlp/yt-dlp/issues/11972)) by [entourage8](https://github.com/entourage8)
|
||||
- [Support `--write-pages` for m3u8 media playlists](https://github.com/yt-dlp/yt-dlp/commit/be69468752ff598cacee57bb80533deab2367a5d) ([#12333](https://github.com/yt-dlp/yt-dlp/issues/12333)) by [bashonly](https://github.com/bashonly)
|
||||
- [Support `hls_media_playlist_data` format field](https://github.com/yt-dlp/yt-dlp/commit/c987be0acb6872c6561f28aa28171e803393d851) ([#12322](https://github.com/yt-dlp/yt-dlp/issues/12322)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- [Improve Issue/PR templates](https://github.com/yt-dlp/yt-dlp/commit/517ddf3c3f12560ab93e3d36244dc82db9f97818) ([#11499](https://github.com/yt-dlp/yt-dlp/issues/11499)) by [seproDev](https://github.com/seproDev) (With fixes in [4ecb833](https://github.com/yt-dlp/yt-dlp/commit/4ecb833472c90e078567b561fb7c089f1aa9587b) by [bashonly](https://github.com/bashonly))
|
||||
- **cleanup**: Miscellaneous: [4985a40](https://github.com/yt-dlp/yt-dlp/commit/4985a4041770eaa0016271809a1fd950dc809a55) by [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K), [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
|
||||
- **docs**: [Add note to `supportedsites.md`](https://github.com/yt-dlp/yt-dlp/commit/01a63629a21781458dcbd38779898e117678f5ff) ([#12382](https://github.com/yt-dlp/yt-dlp/issues/12382)) by [seproDev](https://github.com/seproDev)
|
||||
- **test**: download: [Validate and sort info dict fields](https://github.com/yt-dlp/yt-dlp/commit/208163447408c78673b08c172beafe5c310fb167) ([#12299](https://github.com/yt-dlp/yt-dlp/issues/12299)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
|
||||
### 2025.01.26
|
||||
|
||||
#### Core changes
|
||||
|
|
11
README.md
11
README.md
|
@ -337,10 +337,11 @@ ## General Options:
|
|||
--plugin-dirs PATH Path to an additional directory to search
|
||||
for plugins. This option can be used
|
||||
multiple times to add multiple directories.
|
||||
Note that this currently only works for
|
||||
extractor plugins; postprocessor plugins can
|
||||
only be loaded from the default plugin
|
||||
directories
|
||||
Use "default" to search the default plugin
|
||||
directories (default)
|
||||
--no-plugin-dirs Clear plugin directories to search,
|
||||
including defaults and those provided by
|
||||
previous --plugin-dirs
|
||||
--flat-playlist Do not extract a playlist's URL result
|
||||
entries; some entry metadata may be missing
|
||||
and downloading may be bypassed
|
||||
|
@ -1525,7 +1526,7 @@ ## Sorting Formats
|
|||
- `hasvid`: Gives priority to formats that have a video stream
|
||||
- `hasaud`: Gives priority to formats that have an audio stream
|
||||
- `ie_pref`: The format preference
|
||||
- `lang`: The language preference
|
||||
- `lang`: The language preference as determined by the extractor (e.g. original language preferred over audio description)
|
||||
- `quality`: The quality of the format
|
||||
- `source`: The preference of the source
|
||||
- `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`)
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
from inspect import getsource
|
||||
|
||||
from devscripts.utils import get_filename_args, read_file, write_file
|
||||
from yt_dlp.extractor import import_extractors
|
||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
from yt_dlp.globals import extractors
|
||||
|
||||
NO_ATTR = object()
|
||||
STATIC_CLASS_PROPERTIES = [
|
||||
|
@ -38,8 +41,7 @@ def main():
|
|||
|
||||
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
|
||||
|
||||
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
import_extractors()
|
||||
|
||||
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
||||
module_src = '\n'.join((
|
||||
|
@ -47,7 +49,7 @@ def main():
|
|||
' _module = None',
|
||||
*extra_ie_code(DummyInfoExtractor),
|
||||
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||
*build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
|
||||
*build_ies(list(extractors.value.values()), (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
|
||||
))
|
||||
|
||||
write_file(lazy_extractors_filename, f'{module_src}\n')
|
||||
|
@ -73,7 +75,7 @@ def build_ies(ies, bases, attr_base):
|
|||
if ie in ies:
|
||||
names.append(ie.__name__)
|
||||
|
||||
yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
|
||||
yield '\n_CLASS_LOOKUP = {%s}' % ', '.join(f'{name!r}: {name}' for name in names)
|
||||
|
||||
|
||||
def sort_ies(ies, ignored_bases):
|
||||
|
|
|
@ -10,10 +10,21 @@
|
|||
from devscripts.utils import get_filename_args, write_file
|
||||
from yt_dlp.extractor import list_extractor_classes
|
||||
|
||||
TEMPLATE = '''\
|
||||
# Supported sites
|
||||
|
||||
Below is a list of all extractors that are currently included with yt-dlp.
|
||||
If a site is not listed here, it might still be supported by yt-dlp's embed extraction or generic extractor.
|
||||
Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them.
|
||||
The only reliable way to check if a site is supported is to try it.
|
||||
|
||||
{ie_list}
|
||||
'''
|
||||
|
||||
|
||||
def main():
|
||||
out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
|
||||
write_file(get_filename_args(), f'# Supported sites\n{out}\n')
|
||||
write_file(get_filename_args(), TEMPLATE.format(ie_list=out))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -25,7 +25,8 @@ def parse_args():
|
|||
|
||||
|
||||
def run_tests(*tests, pattern=None, ci=False):
|
||||
run_core = 'core' in tests or (not pattern and not tests)
|
||||
# XXX: hatch uses `tests` if no arguments are passed
|
||||
run_core = 'core' in tests or 'tests' in tests or (not pattern and not tests)
|
||||
run_download = 'download' in tests
|
||||
|
||||
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
||||
|
|
|
@ -384,6 +384,7 @@ select = [
|
|||
"W391",
|
||||
"W504",
|
||||
]
|
||||
exclude = "*/extractor/lazy_extractors.py,*venv*,*/test/testdata/sigs/player-*.js,.idea,.vscode"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "-ra -v --strict-markers"
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
# Supported sites
|
||||
|
||||
Below is a list of all extractors that are currently included with yt-dlp.
|
||||
If a site is not listed here, it might still be supported by yt-dlp's embed extraction or generic extractor.
|
||||
Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them.
|
||||
The only reliable way to check if a site is supported is to try it.
|
||||
|
||||
- **17live**
|
||||
- **17live:clip**
|
||||
- **1News**: 1news.co.nz article videos
|
||||
|
@ -314,7 +320,8 @@ # Supported sites
|
|||
- **curiositystream**: [*curiositystream*](## "netrc machine")
|
||||
- **curiositystream:collections**: [*curiositystream*](## "netrc machine")
|
||||
- **curiositystream:series**: [*curiositystream*](## "netrc machine")
|
||||
- **CWTV**
|
||||
- **cwtv**
|
||||
- **cwtv:movie**
|
||||
- **Cybrary**: [*cybrary*](## "netrc machine")
|
||||
- **CybraryCourse**: [*cybrary*](## "netrc machine")
|
||||
- **DacastPlaylist**
|
||||
|
@ -349,6 +356,7 @@ # Supported sites
|
|||
- **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor
|
||||
- **DigitallySpeaking**
|
||||
- **Digiteka**
|
||||
- **Digiview**
|
||||
- **DiscogsReleasePlaylist**
|
||||
- **DiscoveryLife**
|
||||
- **DiscoveryNetworksDe**
|
||||
|
@ -465,9 +473,9 @@ # Supported sites
|
|||
- **fptplay**: fptplay.vn
|
||||
- **FranceCulture**
|
||||
- **FranceInter**
|
||||
- **FranceTV**
|
||||
- **francetv**
|
||||
- **francetv:site**
|
||||
- **francetvinfo.fr**
|
||||
- **FranceTVSite**
|
||||
- **Freesound**
|
||||
- **freespeech.org**
|
||||
- **freetv:series**
|
||||
|
@ -499,7 +507,7 @@ # Supported sites
|
|||
- **GediDigital**
|
||||
- **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
|
||||
- **gem.cbc.ca:live**
|
||||
- **gem.cbc.ca:playlist**
|
||||
- **gem.cbc.ca:playlist**: [*cbcgem*](## "netrc machine")
|
||||
- **Genius**
|
||||
- **GeniusLyrics**
|
||||
- **Germanupa**: germanupa.de
|
||||
|
|
176
test/helper.py
176
test/helper.py
|
@ -101,87 +101,109 @@ def getwebpagetestcases():
|
|||
md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
|
||||
|
||||
|
||||
def expect_value(self, got, expected, field):
|
||||
if isinstance(expected, str) and expected.startswith('re:'):
|
||||
match_str = expected[len('re:'):]
|
||||
match_rex = re.compile(match_str)
|
||||
|
||||
self.assertTrue(
|
||||
isinstance(got, str),
|
||||
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
|
||||
self.assertTrue(
|
||||
match_rex.match(got),
|
||||
f'field {field} (value: {got!r}) should match {match_str!r}')
|
||||
elif isinstance(expected, str) and expected.startswith('startswith:'):
|
||||
start_str = expected[len('startswith:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, str),
|
||||
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
|
||||
self.assertTrue(
|
||||
got.startswith(start_str),
|
||||
f'field {field} (value: {got!r}) should start with {start_str!r}')
|
||||
elif isinstance(expected, str) and expected.startswith('contains:'):
|
||||
contains_str = expected[len('contains:'):]
|
||||
self.assertTrue(
|
||||
isinstance(got, str),
|
||||
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
|
||||
self.assertTrue(
|
||||
contains_str in got,
|
||||
f'field {field} (value: {got!r}) should contain {contains_str!r}')
|
||||
elif isinstance(expected, type):
|
||||
self.assertTrue(
|
||||
isinstance(got, expected),
|
||||
f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}')
|
||||
elif isinstance(expected, dict) and isinstance(got, dict):
|
||||
expect_dict(self, got, expected)
|
||||
elif isinstance(expected, list) and isinstance(got, list):
|
||||
self.assertEqual(
|
||||
len(expected), len(got),
|
||||
f'Expect a list of length {len(expected)}, but got a list of length {len(got)} for field {field}')
|
||||
for index, (item_got, item_expected) in enumerate(zip(got, expected)):
|
||||
type_got = type(item_got)
|
||||
type_expected = type(item_expected)
|
||||
self.assertEqual(
|
||||
type_expected, type_got,
|
||||
f'Type mismatch for list item at index {index} for field {field}, '
|
||||
f'expected {type_expected!r}, got {type_got!r}')
|
||||
expect_value(self, item_got, item_expected, field)
|
||||
else:
|
||||
if isinstance(expected, str) and expected.startswith('md5:'):
|
||||
self.assertTrue(
|
||||
isinstance(got, str),
|
||||
f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
|
||||
got = 'md5:' + md5(got)
|
||||
elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected):
|
||||
self.assertTrue(
|
||||
isinstance(got, (list, dict)),
|
||||
f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
|
||||
op, _, expected_num = expected.partition(':')
|
||||
expected_num = int(expected_num)
|
||||
if op == 'mincount':
|
||||
assert_func = assertGreaterEqual
|
||||
msg_tmpl = 'Expected %d items in field %s, but only got %d'
|
||||
elif op == 'maxcount':
|
||||
assert_func = assertLessEqual
|
||||
msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
|
||||
elif op == 'count':
|
||||
assert_func = assertEqual
|
||||
msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
|
||||
else:
|
||||
assert False
|
||||
assert_func(
|
||||
self, len(got), expected_num,
|
||||
msg_tmpl % (expected_num, field, len(got)))
|
||||
def _iter_differences(got, expected, field):
|
||||
if isinstance(expected, str):
|
||||
op, _, val = expected.partition(':')
|
||||
if op in ('mincount', 'maxcount', 'count'):
|
||||
if not isinstance(got, (list, dict)):
|
||||
yield field, f'expected either {list.__name__} or {dict.__name__}, got {type(got).__name__}'
|
||||
return
|
||||
self.assertEqual(
|
||||
expected, got,
|
||||
f'Invalid value for field {field}, expected {expected!r}, got {got!r}')
|
||||
|
||||
expected_num = int(val)
|
||||
got_num = len(got)
|
||||
if op == 'mincount':
|
||||
if got_num < expected_num:
|
||||
yield field, f'expected at least {val} items, got {got_num}'
|
||||
return
|
||||
|
||||
if op == 'maxcount':
|
||||
if got_num > expected_num:
|
||||
yield field, f'expected at most {val} items, got {got_num}'
|
||||
return
|
||||
|
||||
assert op == 'count'
|
||||
if got_num != expected_num:
|
||||
yield field, f'expected exactly {val} items, got {got_num}'
|
||||
return
|
||||
|
||||
if not isinstance(got, str):
|
||||
yield field, f'expected {str.__name__}, got {type(got).__name__}'
|
||||
return
|
||||
|
||||
if op == 're':
|
||||
if not re.match(val, got):
|
||||
yield field, f'should match {val!r}, got {got!r}'
|
||||
return
|
||||
|
||||
if op == 'startswith':
|
||||
if not val.startswith(got):
|
||||
yield field, f'should start with {val!r}, got {got!r}'
|
||||
return
|
||||
|
||||
if op == 'contains':
|
||||
if not val.startswith(got):
|
||||
yield field, f'should contain {val!r}, got {got!r}'
|
||||
return
|
||||
|
||||
if op == 'md5':
|
||||
hash_val = md5(got)
|
||||
if hash_val != val:
|
||||
yield field, f'expected hash {val}, got {hash_val}'
|
||||
return
|
||||
|
||||
if got != expected:
|
||||
yield field, f'expected {expected!r}, got {got!r}'
|
||||
return
|
||||
|
||||
if isinstance(expected, dict) and isinstance(got, dict):
|
||||
for key, expected_val in expected.items():
|
||||
if key not in got:
|
||||
yield field, f'missing key: {key!r}'
|
||||
continue
|
||||
|
||||
field_name = key if field is None else f'{field}.{key}'
|
||||
yield from _iter_differences(got[key], expected_val, field_name)
|
||||
return
|
||||
|
||||
if isinstance(expected, type):
|
||||
if not isinstance(got, expected):
|
||||
yield field, f'expected {expected.__name__}, got {type(got).__name__}'
|
||||
return
|
||||
|
||||
if isinstance(expected, list) and isinstance(got, list):
|
||||
# TODO: clever diffing algorithm lmao
|
||||
if len(expected) != len(got):
|
||||
yield field, f'expected length of {len(expected)}, got {len(got)}'
|
||||
return
|
||||
|
||||
for index, (got_val, expected_val) in enumerate(zip(got, expected)):
|
||||
field_name = str(index) if field is None else f'{field}.{index}'
|
||||
yield from _iter_differences(got_val, expected_val, field_name)
|
||||
return
|
||||
|
||||
if got != expected:
|
||||
yield field, f'expected {expected!r}, got {got!r}'
|
||||
|
||||
|
||||
def _expect_value(message, got, expected, field):
|
||||
mismatches = list(_iter_differences(got, expected, field))
|
||||
if not mismatches:
|
||||
return
|
||||
|
||||
fields = [field for field, _ in mismatches if field is not None]
|
||||
return ''.join((
|
||||
message, f' ({", ".join(fields)})' if fields else '',
|
||||
*(f'\n\t{field}: {message}' for field, message in mismatches)))
|
||||
|
||||
|
||||
def expect_value(self, got, expected, field):
|
||||
if message := _expect_value('values differ', got, expected, field):
|
||||
self.fail(message)
|
||||
|
||||
|
||||
def expect_dict(self, got_dict, expected_dict):
|
||||
for info_field, expected in expected_dict.items():
|
||||
got = got_dict.get(info_field)
|
||||
expect_value(self, got, expected, info_field)
|
||||
if message := _expect_value('dictionaries differ', got_dict, expected_dict, None):
|
||||
self.fail(message)
|
||||
|
||||
|
||||
def sanitize_got_info_dict(got_dict):
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from yt_dlp.globals import all_plugins_loaded
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
|
@ -1427,6 +1429,12 @@ def check_for_cookie_header(result):
|
|||
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
|
||||
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
|
||||
|
||||
def test_load_plugins_compat(self):
|
||||
# Should try to reload plugins if they haven't already been loaded
|
||||
all_plugins_loaded.value = False
|
||||
FakeYDL().close()
|
||||
assert all_plugins_loaded.value
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -10,22 +10,71 @@
|
|||
sys.path.append(str(TEST_DATA_DIR))
|
||||
importlib.invalidate_caches()
|
||||
|
||||
from yt_dlp.utils import Config
|
||||
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
|
||||
from yt_dlp.plugins import (
|
||||
PACKAGE_NAME,
|
||||
PluginSpec,
|
||||
directories,
|
||||
load_plugins,
|
||||
load_all_plugins,
|
||||
register_plugin_spec,
|
||||
)
|
||||
|
||||
from yt_dlp.globals import (
|
||||
extractors,
|
||||
postprocessors,
|
||||
plugin_dirs,
|
||||
plugin_ies,
|
||||
plugin_pps,
|
||||
all_plugins_loaded,
|
||||
plugin_specs,
|
||||
)
|
||||
|
||||
|
||||
EXTRACTOR_PLUGIN_SPEC = PluginSpec(
|
||||
module_name='extractor',
|
||||
suffix='IE',
|
||||
destination=extractors,
|
||||
plugin_destination=plugin_ies,
|
||||
)
|
||||
|
||||
POSTPROCESSOR_PLUGIN_SPEC = PluginSpec(
|
||||
module_name='postprocessor',
|
||||
suffix='PP',
|
||||
destination=postprocessors,
|
||||
plugin_destination=plugin_pps,
|
||||
)
|
||||
|
||||
|
||||
def reset_plugins():
|
||||
plugin_ies.value = {}
|
||||
plugin_pps.value = {}
|
||||
plugin_dirs.value = ['default']
|
||||
plugin_specs.value = {}
|
||||
all_plugins_loaded.value = False
|
||||
# Clearing override plugins is probably difficult
|
||||
for module_name in tuple(sys.modules):
|
||||
for plugin_type in ('extractor', 'postprocessor'):
|
||||
if module_name.startswith(f'{PACKAGE_NAME}.{plugin_type}.'):
|
||||
del sys.modules[module_name]
|
||||
|
||||
importlib.invalidate_caches()
|
||||
|
||||
|
||||
class TestPlugins(unittest.TestCase):
|
||||
|
||||
TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME
|
||||
|
||||
def setUp(self):
|
||||
reset_plugins()
|
||||
|
||||
def tearDown(self):
|
||||
reset_plugins()
|
||||
|
||||
def test_directories_containing_plugins(self):
|
||||
self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories()))
|
||||
|
||||
def test_extractor_classes(self):
|
||||
for module_name in tuple(sys.modules):
|
||||
if module_name.startswith(f'{PACKAGE_NAME}.extractor'):
|
||||
del sys.modules[module_name]
|
||||
plugins_ie = load_plugins('extractor', 'IE')
|
||||
plugins_ie = load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
|
||||
self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
|
||||
self.assertIn('NormalPluginIE', plugins_ie.keys())
|
||||
|
@ -35,17 +84,29 @@ def test_extractor_classes(self):
|
|||
f'{PACKAGE_NAME}.extractor._ignore' in sys.modules,
|
||||
'loaded module beginning with underscore')
|
||||
self.assertNotIn('IgnorePluginIE', plugins_ie.keys())
|
||||
self.assertNotIn('IgnorePluginIE', plugin_ies.value)
|
||||
|
||||
# Don't load extractors with underscore prefix
|
||||
self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys())
|
||||
self.assertNotIn('_IgnoreUnderscorePluginIE', plugin_ies.value)
|
||||
|
||||
# Don't load extractors not specified in __all__ (if supplied)
|
||||
self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys())
|
||||
self.assertNotIn('IgnoreNotInAllPluginIE', plugin_ies.value)
|
||||
self.assertIn('InAllPluginIE', plugins_ie.keys())
|
||||
self.assertIn('InAllPluginIE', plugin_ies.value)
|
||||
|
||||
# Don't load override extractors
|
||||
self.assertNotIn('OverrideGenericIE', plugins_ie.keys())
|
||||
self.assertNotIn('OverrideGenericIE', plugin_ies.value)
|
||||
self.assertNotIn('_UnderscoreOverrideGenericIE', plugins_ie.keys())
|
||||
self.assertNotIn('_UnderscoreOverrideGenericIE', plugin_ies.value)
|
||||
|
||||
def test_postprocessor_classes(self):
|
||||
plugins_pp = load_plugins('postprocessor', 'PP')
|
||||
plugins_pp = load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
self.assertIn('NormalPluginPP', plugins_pp.keys())
|
||||
self.assertIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
|
||||
self.assertIn('NormalPluginPP', plugin_pps.value)
|
||||
|
||||
def test_importing_zipped_module(self):
|
||||
zip_path = TEST_DATA_DIR / 'zipped_plugins.zip'
|
||||
|
@ -58,10 +119,10 @@ def test_importing_zipped_module(self):
|
|||
package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
|
||||
self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
|
||||
|
||||
plugins_ie = load_plugins('extractor', 'IE')
|
||||
plugins_ie = load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
self.assertIn('ZippedPluginIE', plugins_ie.keys())
|
||||
|
||||
plugins_pp = load_plugins('postprocessor', 'PP')
|
||||
plugins_pp = load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
self.assertIn('ZippedPluginPP', plugins_pp.keys())
|
||||
|
||||
finally:
|
||||
|
@ -69,23 +130,116 @@ def test_importing_zipped_module(self):
|
|||
os.remove(zip_path)
|
||||
importlib.invalidate_caches() # reset the import caches
|
||||
|
||||
def test_plugin_dirs(self):
|
||||
# Internal plugin dirs hack for CLI --plugin-dirs
|
||||
# To be replaced with proper system later
|
||||
custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages'
|
||||
Config._plugin_dirs = [str(custom_plugin_dir)]
|
||||
importlib.invalidate_caches() # reset the import caches
|
||||
def test_reloading_plugins(self):
|
||||
reload_plugins_path = TEST_DATA_DIR / 'reload_plugins'
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
|
||||
# Remove default folder and add reload_plugin path
|
||||
sys.path.remove(str(TEST_DATA_DIR))
|
||||
sys.path.append(str(reload_plugins_path))
|
||||
importlib.invalidate_caches()
|
||||
try:
|
||||
package = importlib.import_module(f'{PACKAGE_NAME}.extractor')
|
||||
self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__))
|
||||
for plugin_type in ('extractor', 'postprocessor'):
|
||||
package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
|
||||
self.assertIn(reload_plugins_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
|
||||
|
||||
plugins_ie = load_plugins('extractor', 'IE')
|
||||
self.assertIn('PackagePluginIE', plugins_ie.keys())
|
||||
plugins_ie = load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
self.assertIn('NormalPluginIE', plugins_ie.keys())
|
||||
self.assertTrue(
|
||||
plugins_ie['NormalPluginIE'].REPLACED,
|
||||
msg='Reloading has not replaced original extractor plugin')
|
||||
self.assertTrue(
|
||||
extractors.value['NormalPluginIE'].REPLACED,
|
||||
msg='Reloading has not replaced original extractor plugin globally')
|
||||
|
||||
plugins_pp = load_plugins(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
self.assertIn('NormalPluginPP', plugins_pp.keys())
|
||||
self.assertTrue(plugins_pp['NormalPluginPP'].REPLACED,
|
||||
msg='Reloading has not replaced original postprocessor plugin')
|
||||
self.assertTrue(
|
||||
postprocessors.value['NormalPluginPP'].REPLACED,
|
||||
msg='Reloading has not replaced original postprocessor plugin globally')
|
||||
|
||||
finally:
|
||||
Config._plugin_dirs = []
|
||||
importlib.invalidate_caches() # reset the import caches
|
||||
sys.path.remove(str(reload_plugins_path))
|
||||
sys.path.append(str(TEST_DATA_DIR))
|
||||
importlib.invalidate_caches()
|
||||
|
||||
def test_extractor_override_plugin(self):
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
|
||||
from yt_dlp.extractor.generic import GenericIE
|
||||
|
||||
self.assertEqual(GenericIE.TEST_FIELD, 'override')
|
||||
self.assertEqual(GenericIE.SECONDARY_TEST_FIELD, 'underscore-override')
|
||||
|
||||
self.assertEqual(GenericIE.IE_NAME, 'generic+override+underscore-override')
|
||||
importlib.invalidate_caches()
|
||||
# test that loading a second time doesn't wrap a second time
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
from yt_dlp.extractor.generic import GenericIE
|
||||
self.assertEqual(GenericIE.IE_NAME, 'generic+override+underscore-override')
|
||||
|
||||
def test_load_all_plugin_types(self):
|
||||
|
||||
# no plugin specs registered
|
||||
load_all_plugins()
|
||||
|
||||
self.assertNotIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
|
||||
self.assertNotIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
|
||||
|
||||
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
|
||||
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
load_all_plugins()
|
||||
self.assertTrue(all_plugins_loaded.value)
|
||||
|
||||
self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
|
||||
self.assertIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
|
||||
|
||||
def test_no_plugin_dirs(self):
|
||||
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
|
||||
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
|
||||
plugin_dirs.value = []
|
||||
load_all_plugins()
|
||||
|
||||
self.assertNotIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
|
||||
self.assertNotIn(f'{PACKAGE_NAME}.postprocessor.normal', sys.modules.keys())
|
||||
|
||||
def test_set_plugin_dirs(self):
|
||||
custom_plugin_dir = str(TEST_DATA_DIR / 'plugin_packages')
|
||||
plugin_dirs.value = [custom_plugin_dir]
|
||||
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
|
||||
self.assertIn(f'{PACKAGE_NAME}.extractor.package', sys.modules.keys())
|
||||
self.assertIn('PackagePluginIE', plugin_ies.value)
|
||||
|
||||
def test_invalid_plugin_dir(self):
|
||||
plugin_dirs.value = ['invalid_dir']
|
||||
with self.assertRaises(ValueError):
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
|
||||
def test_append_plugin_dirs(self):
|
||||
custom_plugin_dir = str(TEST_DATA_DIR / 'plugin_packages')
|
||||
|
||||
self.assertEqual(plugin_dirs.value, ['default'])
|
||||
plugin_dirs.value.append(custom_plugin_dir)
|
||||
self.assertEqual(plugin_dirs.value, ['default', custom_plugin_dir])
|
||||
|
||||
load_plugins(EXTRACTOR_PLUGIN_SPEC)
|
||||
|
||||
self.assertIn(f'{PACKAGE_NAME}.extractor.package', sys.modules.keys())
|
||||
self.assertIn('PackagePluginIE', plugin_ies.value)
|
||||
|
||||
def test_get_plugin_spec(self):
|
||||
register_plugin_spec(EXTRACTOR_PLUGIN_SPEC)
|
||||
register_plugin_spec(POSTPROCESSOR_PLUGIN_SPEC)
|
||||
|
||||
self.assertEqual(plugin_specs.value.get('extractor'), EXTRACTOR_PLUGIN_SPEC)
|
||||
self.assertEqual(plugin_specs.value.get('postprocessor'), POSTPROCESSOR_PLUGIN_SPEC)
|
||||
self.assertIsNone(plugin_specs.value.get('invalid'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -2,4 +2,5 @@
|
|||
|
||||
|
||||
class PackagePluginIE(InfoExtractor):
|
||||
_VALID_URL = 'package'
|
||||
pass
|
||||
|
|
10
test/testdata/reload_plugins/yt_dlp_plugins/extractor/normal.py
vendored
Normal file
10
test/testdata/reload_plugins/yt_dlp_plugins/extractor/normal.py
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
class NormalPluginIE(InfoExtractor):
|
||||
_VALID_URL = 'normal'
|
||||
REPLACED = True
|
||||
|
||||
|
||||
class _IgnoreUnderscorePluginIE(InfoExtractor):
|
||||
pass
|
5
test/testdata/reload_plugins/yt_dlp_plugins/postprocessor/normal.py
vendored
Normal file
5
test/testdata/reload_plugins/yt_dlp_plugins/postprocessor/normal.py
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
|
||||
|
||||
class NormalPluginPP(PostProcessor):
|
||||
REPLACED = True
|
|
@ -6,6 +6,7 @@ class IgnoreNotInAllPluginIE(InfoExtractor):
|
|||
|
||||
|
||||
class InAllPluginIE(InfoExtractor):
|
||||
_VALID_URL = 'inallpluginie'
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -2,8 +2,10 @@
|
|||
|
||||
|
||||
class NormalPluginIE(InfoExtractor):
|
||||
pass
|
||||
_VALID_URL = 'normalpluginie'
|
||||
REPLACED = False
|
||||
|
||||
|
||||
class _IgnoreUnderscorePluginIE(InfoExtractor):
|
||||
_VALID_URL = 'ignoreunderscorepluginie'
|
||||
pass
|
||||
|
|
5
test/testdata/yt_dlp_plugins/extractor/override.py
vendored
Normal file
5
test/testdata/yt_dlp_plugins/extractor/override.py
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
from yt_dlp.extractor.generic import GenericIE
|
||||
|
||||
|
||||
class OverrideGenericIE(GenericIE, plugin_name='override'):
|
||||
TEST_FIELD = 'override'
|
5
test/testdata/yt_dlp_plugins/extractor/overridetwo.py
vendored
Normal file
5
test/testdata/yt_dlp_plugins/extractor/overridetwo.py
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
from yt_dlp.extractor.generic import GenericIE
|
||||
|
||||
|
||||
class _UnderscoreOverrideGenericIE(GenericIE, plugin_name='underscore-override'):
|
||||
SECONDARY_TEST_FIELD = 'underscore-override'
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
|
||||
class NormalPluginPP(PostProcessor):
|
||||
pass
|
||||
REPLACED = False
|
||||
|
|
|
@ -2,4 +2,5 @@
|
|||
|
||||
|
||||
class ZippedPluginIE(InfoExtractor):
|
||||
_VALID_URL = 'zippedpluginie'
|
||||
pass
|
||||
|
|
|
@ -30,9 +30,18 @@
|
|||
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor import gen_extractor_classes, get_info_extractor, import_extractors
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .globals import (
|
||||
IN_CLI,
|
||||
LAZY_EXTRACTORS,
|
||||
plugin_ies,
|
||||
plugin_ies_overrides,
|
||||
plugin_pps,
|
||||
all_plugins_loaded,
|
||||
plugin_dirs,
|
||||
)
|
||||
from .minicurses import format_text
|
||||
from .networking import HEADRequest, Request, RequestDirector
|
||||
from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
|
||||
|
@ -44,8 +53,7 @@
|
|||
network_exceptions,
|
||||
)
|
||||
from .networking.impersonate import ImpersonateRequestHandler
|
||||
from .plugins import directories as plugin_directories
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
||||
from .plugins import directories as plugin_directories, load_all_plugins
|
||||
from .postprocessor import (
|
||||
EmbedThumbnailPP,
|
||||
FFmpegFixupDuplicateMoovPP,
|
||||
|
@ -157,7 +165,7 @@
|
|||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _UnsafeExtensionError, _YDLLogger
|
||||
from .utils._utils import _UnsafeExtensionError, _YDLLogger, _ProgressState
|
||||
from .utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
clean_headers,
|
||||
|
@ -642,20 +650,23 @@ def __init__(self, params=None, auto_init=True):
|
|||
self.cache = Cache(self)
|
||||
self.__header_cookies = []
|
||||
|
||||
stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
|
||||
self._out_files = Namespace(
|
||||
out=stdout,
|
||||
error=sys.stderr,
|
||||
screen=sys.stderr if self.params.get('quiet') else stdout,
|
||||
console=None if os.name == 'nt' else next(
|
||||
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
|
||||
)
|
||||
# compat for API: load plugins if they have not already
|
||||
if not all_plugins_loaded.value:
|
||||
load_all_plugins()
|
||||
|
||||
try:
|
||||
windows_enable_vt_mode()
|
||||
except Exception as e:
|
||||
self.write_debug(f'Failed to enable VT mode: {e}')
|
||||
|
||||
stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
|
||||
self._out_files = Namespace(
|
||||
out=stdout,
|
||||
error=sys.stderr,
|
||||
screen=sys.stderr if self.params.get('quiet') else stdout,
|
||||
console=next(filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
|
||||
)
|
||||
|
||||
if self.params.get('no_color'):
|
||||
if self.params.get('color') is not None:
|
||||
self.params.setdefault('_warnings', []).append(
|
||||
|
@ -956,21 +967,22 @@ def to_stderr(self, message, only_once=False):
|
|||
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
|
||||
|
||||
def _send_console_code(self, code):
|
||||
if os.name == 'nt' or not self._out_files.console:
|
||||
return
|
||||
if not supports_terminal_sequences(self._out_files.console):
|
||||
return False
|
||||
self._write_string(code, self._out_files.console)
|
||||
return True
|
||||
|
||||
def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
def to_console_title(self, message=None, progress_state=None, percent=None):
|
||||
if not self.params.get('consoletitle'):
|
||||
return
|
||||
message = remove_terminal_sequences(message)
|
||||
if os.name == 'nt':
|
||||
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
|
||||
else:
|
||||
self._send_console_code(f'\033]0;{message}\007')
|
||||
|
||||
if message:
|
||||
success = self._send_console_code(f'\033]0;{remove_terminal_sequences(message)}\007')
|
||||
if not success and os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
|
||||
ctypes.windll.kernel32.SetConsoleTitleW(message)
|
||||
|
||||
if isinstance(progress_state, _ProgressState):
|
||||
self._send_console_code(progress_state.get_ansi_escape(percent))
|
||||
|
||||
def save_console_title(self):
|
||||
if not self.params.get('consoletitle') or self.params.get('simulate'):
|
||||
|
@ -984,6 +996,7 @@ def restore_console_title(self):
|
|||
|
||||
def __enter__(self):
|
||||
self.save_console_title()
|
||||
self.to_console_title(progress_state=_ProgressState.INDETERMINATE)
|
||||
return self
|
||||
|
||||
def save_cookies(self):
|
||||
|
@ -992,6 +1005,7 @@ def save_cookies(self):
|
|||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
self.to_console_title(progress_state=_ProgressState.HIDDEN)
|
||||
self.close()
|
||||
|
||||
def close(self):
|
||||
|
@ -3993,15 +4007,6 @@ def print_debug_header(self):
|
|||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
||||
from . import _IN_CLI # Must be delayed import
|
||||
|
||||
# These imports can be slow. So import them only as needed
|
||||
from .extractor.extractors import _LAZY_LOADER
|
||||
from .extractor.extractors import (
|
||||
_PLUGIN_CLASSES as plugin_ies,
|
||||
_PLUGIN_OVERRIDES as plugin_ie_overrides,
|
||||
)
|
||||
|
||||
def get_encoding(stream):
|
||||
ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
|
||||
additional_info = []
|
||||
|
@ -4040,17 +4045,18 @@ def get_encoding(stream):
|
|||
_make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
|
||||
f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
|
||||
'' if source == 'unknown' else f'({source})',
|
||||
'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
|
||||
'' if IN_CLI.value else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
|
||||
delim=' '))
|
||||
|
||||
if not _IN_CLI:
|
||||
if not IN_CLI.value:
|
||||
write_debug(f'params: {self.params}')
|
||||
|
||||
if not _LAZY_LOADER:
|
||||
if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
write_debug('Lazy loading extractors is forcibly disabled')
|
||||
else:
|
||||
import_extractors()
|
||||
lazy_extractors = LAZY_EXTRACTORS.value
|
||||
if lazy_extractors is None:
|
||||
write_debug('Lazy loading extractors is disabled')
|
||||
elif not lazy_extractors:
|
||||
write_debug('Lazy loading extractors is forcibly disabled')
|
||||
if self.params['compat_opts']:
|
||||
write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
|
||||
|
||||
|
@ -4079,24 +4085,27 @@ def get_encoding(stream):
|
|||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||
write_debug('Plugins are forcibly disabled')
|
||||
return
|
||||
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['{}{}'.format(
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
for name, klass in plugins.items()]
|
||||
for plugin_type, plugins in (('Extractor', plugin_ies), ('Post-Processor', plugin_pps)):
|
||||
display_list = [
|
||||
klass.__name__ if klass.__name__ == name else f'{klass.__name__} as {name}'
|
||||
for name, klass in plugins.value.items()]
|
||||
if plugin_type == 'Extractor':
|
||||
display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
|
||||
for parent, plugins in plugin_ie_overrides.items())
|
||||
for parent, plugins in plugin_ies_overrides.value.items())
|
||||
if not display_list:
|
||||
continue
|
||||
write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
|
||||
|
||||
plugin_dirs = plugin_directories()
|
||||
if plugin_dirs:
|
||||
write_debug(f'Plugin directories: {plugin_dirs}')
|
||||
plugin_dirs_msg = 'none'
|
||||
if not plugin_dirs.value:
|
||||
plugin_dirs_msg = 'none (disabled)'
|
||||
else:
|
||||
found_plugin_directories = plugin_directories()
|
||||
if found_plugin_directories:
|
||||
plugin_dirs_msg = ', '.join(found_plugin_directories)
|
||||
|
||||
write_debug(f'Plugin directories: {plugin_dirs_msg}')
|
||||
|
||||
@functools.cached_property
|
||||
def proxies(self):
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
from .extractor import list_extractor_classes
|
||||
from .extractor.adobepass import MSO_INFO
|
||||
from .networking.impersonate import ImpersonateTarget
|
||||
from .globals import IN_CLI, plugin_dirs
|
||||
from .options import parseOpts
|
||||
from .plugins import load_all_plugins as _load_all_plugins
|
||||
from .postprocessor import (
|
||||
FFmpegExtractAudioPP,
|
||||
FFmpegMergerPP,
|
||||
|
@ -33,7 +35,6 @@
|
|||
)
|
||||
from .update import Updater
|
||||
from .utils import (
|
||||
Config,
|
||||
NO_DEFAULT,
|
||||
POSTPROCESS_WHEN,
|
||||
DateRange,
|
||||
|
@ -66,8 +67,6 @@
|
|||
from .utils._utils import _UnsafeExtensionError
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
||||
|
||||
def _exit(status=0, *args):
|
||||
for msg in args:
|
||||
|
@ -295,18 +294,20 @@ def parse_sleep_func(expr):
|
|||
raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
|
||||
|
||||
# Bytes
|
||||
def validate_bytes(name, value):
|
||||
def validate_bytes(name, value, strict_positive=False):
|
||||
if value is None:
|
||||
return None
|
||||
numeric_limit = parse_bytes(value)
|
||||
validate(numeric_limit is not None, 'rate limit', value)
|
||||
validate(numeric_limit is not None, name, value)
|
||||
if strict_positive:
|
||||
validate_positive(name, numeric_limit, True)
|
||||
return numeric_limit
|
||||
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit)
|
||||
opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True)
|
||||
opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit)
|
||||
opts.min_filesize = validate_bytes('min filesize', opts.min_filesize)
|
||||
opts.max_filesize = validate_bytes('max filesize', opts.max_filesize)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize)
|
||||
opts.buffersize = validate_bytes('buffer size', opts.buffersize, True)
|
||||
opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size)
|
||||
|
||||
# Output templates
|
||||
|
@ -431,6 +432,10 @@ def metadataparser_actions(f):
|
|||
}
|
||||
|
||||
# Other options
|
||||
opts.plugin_dirs = opts.plugin_dirs
|
||||
if opts.plugin_dirs is None:
|
||||
opts.plugin_dirs = ['default']
|
||||
|
||||
if opts.playlist_items is not None:
|
||||
try:
|
||||
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
|
||||
|
@ -971,11 +976,6 @@ def _real_main(argv=None):
|
|||
|
||||
parser, opts, all_urls, ydl_opts = parse_options(argv)
|
||||
|
||||
# HACK: Set the plugin dirs early on
|
||||
# TODO(coletdjnz): remove when plugin globals system is implemented
|
||||
if opts.plugin_dirs is not None:
|
||||
Config._plugin_dirs = list(map(expand_path, opts.plugin_dirs))
|
||||
|
||||
# Dump user agent
|
||||
if opts.dump_user_agent:
|
||||
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
|
||||
|
@ -990,6 +990,11 @@ def _real_main(argv=None):
|
|||
if opts.ffmpeg_location:
|
||||
FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location)
|
||||
|
||||
# load all plugins into the global lookup
|
||||
plugin_dirs.value = opts.plugin_dirs
|
||||
if plugin_dirs.value:
|
||||
_load_all_plugins()
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
pre_process = opts.update_self or opts.rm_cachedir
|
||||
actual_use = all_urls or opts.load_info_filename
|
||||
|
@ -1089,8 +1094,7 @@ def make_row(target, handler):
|
|||
|
||||
|
||||
def main(argv=None):
|
||||
global _IN_CLI
|
||||
_IN_CLI = True
|
||||
IN_CLI.value = True
|
||||
try:
|
||||
_exit(*variadic(_real_main(argv)))
|
||||
except (CookieLoadError, DownloadError):
|
||||
|
|
|
@ -35,6 +35,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
|||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
from .youtube_live_chat import YoutubeLiveChatFD
|
||||
from .bunnycdn import BunnyCdnFD
|
||||
|
||||
PROTOCOL_MAP = {
|
||||
'rtmp': RtmpFD,
|
||||
|
@ -55,6 +56,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
|||
'websocket_frag': WebSocketFragmentFD,
|
||||
'youtube_live_chat': YoutubeLiveChatFD,
|
||||
'youtube_live_chat_replay': YoutubeLiveChatFD,
|
||||
'bunnycdn': BunnyCdnFD,
|
||||
}
|
||||
|
||||
|
||||
|
|
50
yt_dlp/downloader/bunnycdn.py
Normal file
50
yt_dlp/downloader/bunnycdn.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import hashlib
|
||||
import random
|
||||
import threading
|
||||
|
||||
from .common import FileDownloader
|
||||
from . import HlsFD
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
|
||||
|
||||
class BunnyCdnFD(FileDownloader):
|
||||
"""
|
||||
Downloads from BunnyCDN with required pings
|
||||
Note, this is not a part of public API, and will be removed without notice.
|
||||
DO NOT USE
|
||||
"""
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from BunnyCDN')
|
||||
|
||||
fd = HlsFD(self.ydl, self.params)
|
||||
|
||||
stop_event = threading.Event()
|
||||
ping_thread = threading.Thread(target=self.ping_thread, args=(stop_event,), kwargs=info_dict['_bunnycdn_ping_data'])
|
||||
ping_thread.start()
|
||||
|
||||
try:
|
||||
return fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
stop_event.set()
|
||||
|
||||
def ping_thread(self, stop_event, url, headers, secret, context_id):
|
||||
# Site sends ping every 4 seconds, but this throttles the download. Pinging every 2 seconds seems to work.
|
||||
ping_interval = 2
|
||||
# Hard coded resolution as it doesn't seem to matter
|
||||
res = 1080
|
||||
paused = 'false'
|
||||
current_time = 0
|
||||
|
||||
while not stop_event.wait(ping_interval):
|
||||
current_time += ping_interval
|
||||
|
||||
time = current_time + round(random.random(), 6)
|
||||
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
||||
ping_url = f'{url}?hash={md5_hash}&time={time}&paused={paused}&resolution={res}'
|
||||
|
||||
try:
|
||||
self.ydl.urlopen(Request(ping_url, headers=headers)).read()
|
||||
except network_exceptions as e:
|
||||
self.to_screen(f'[{self.FD_NAME}] Ping failed: {e}')
|
|
@ -31,6 +31,7 @@
|
|||
timetuple_from_msec,
|
||||
try_call,
|
||||
)
|
||||
from ..utils._utils import _ProgressState
|
||||
|
||||
|
||||
class FileDownloader:
|
||||
|
@ -333,7 +334,7 @@ def _report_progress_status(self, s, default_template):
|
|||
progress_dict), s.get('progress_idx') or 0)
|
||||
self.to_console_title(self.ydl.evaluate_outtmpl(
|
||||
progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
|
||||
progress_dict))
|
||||
progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
|
||||
|
||||
def _format_progress(self, *args, **kwargs):
|
||||
return self.ydl._format_text(
|
||||
|
@ -357,6 +358,7 @@ def with_fields(*tups, default=''):
|
|||
'_speed_str': self.format_speed(speed).strip(),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_elapsed_str': self.format_seconds(s.get('elapsed')),
|
||||
'_percent': 100.0,
|
||||
'_percent_str': self.format_percent(100),
|
||||
})
|
||||
self._report_progress_status(s, join_nonempty(
|
||||
|
@ -375,13 +377,15 @@ def with_fields(*tups, default=''):
|
|||
return
|
||||
self._progress_delta_time += update_delta
|
||||
|
||||
progress = try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)
|
||||
s.update({
|
||||
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
||||
'_speed_str': self.format_speed(s.get('speed')),
|
||||
'_percent_str': self.format_percent(try_call(
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
|
||||
lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
|
||||
lambda: s['downloaded_bytes'] == 0 and 0)),
|
||||
'_percent': progress,
|
||||
'_percent_str': self.format_percent(progress),
|
||||
'_total_bytes_str': _format_bytes('total_bytes'),
|
||||
'_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
|
||||
'_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
update_url_query,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
|
||||
|
||||
class HlsFD(FragmentFD):
|
||||
|
@ -80,7 +81,15 @@ def real_download(self, filename, info_dict):
|
|||
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
s_bytes = urlh.read()
|
||||
if self.params.get('write_pages'):
|
||||
dump_filename = _request_dump_filename(
|
||||
man_url, info_dict['id'], None,
|
||||
trim_length=self.params.get('trim_file_name'))
|
||||
self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}')
|
||||
with open(dump_filename, 'wb') as outf:
|
||||
outf.write(s_bytes)
|
||||
s = s_bytes.decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
|
|
|
@ -1,16 +1,25 @@
|
|||
from ..compat.compat_utils import passthrough_module
|
||||
from ..globals import extractors as _extractors_context
|
||||
from ..globals import plugin_ies as _plugin_ies_context
|
||||
from ..plugins import PluginSpec, register_plugin_spec
|
||||
|
||||
passthrough_module(__name__, '.extractors')
|
||||
del passthrough_module
|
||||
|
||||
register_plugin_spec(PluginSpec(
|
||||
module_name='extractor',
|
||||
suffix='IE',
|
||||
destination=_extractors_context,
|
||||
plugin_destination=_plugin_ies_context,
|
||||
))
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
""" Return a list of supported extractors.
|
||||
The order does matter; the first extractor matched is the one handling the URL.
|
||||
"""
|
||||
from .extractors import _ALL_CLASSES
|
||||
|
||||
return _ALL_CLASSES
|
||||
import_extractors()
|
||||
return list(_extractors_context.value.values())
|
||||
|
||||
|
||||
def gen_extractors():
|
||||
|
@ -37,6 +46,9 @@ def list_extractors(age_limit=None):
|
|||
|
||||
def get_info_extractor(ie_name):
|
||||
"""Returns the info extractor class with the given ie_name"""
|
||||
from . import extractors
|
||||
import_extractors()
|
||||
return _extractors_context.value[f'{ie_name}IE']
|
||||
|
||||
return getattr(extractors, f'{ie_name}IE')
|
||||
|
||||
def import_extractors():
|
||||
from . import extractors # noqa: F401
|
||||
|
|
|
@ -312,6 +312,7 @@
|
|||
)
|
||||
from .bundesliga import BundesligaIE
|
||||
from .bundestag import BundestagIE
|
||||
from .bunnycdn import BunnyCdnIE
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
from .byutv import BYUtvIE
|
||||
|
|
178
yt_dlp/extractor/bunnycdn.py
Normal file
178
yt_dlp/extractor/bunnycdn.py
Normal file
|
@ -0,0 +1,178 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class BunnyCdnIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:iframe\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL}[^\'"]*)[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://iframe.mediadelivery.net/embed/113933/e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||
'info_dict': {
|
||||
'id': 'e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||
'ext': 'mp4',
|
||||
'title': 'mistress morgana (3).mp4',
|
||||
'description': '',
|
||||
'timestamp': 1693251673,
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/e73edec1-e381-4c8b-ae73-717a140e0924/thumbnail\.jpg',
|
||||
'duration': 7.0,
|
||||
'upload_date': '20230828',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||
'info_dict': {
|
||||
'id': '32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1691145748,
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/32e34c4b-0d72-437c-9abb-05e67657da34/thumbnail_9172dc16\.jpg',
|
||||
'duration': 106.0,
|
||||
'description': 'md5:981a3e899a5c78352b21ed8b2f1efd81',
|
||||
'upload_date': '20230804',
|
||||
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# Stream requires activation and pings
|
||||
'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||
'info_dict': {
|
||||
'id': '2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1708497752,
|
||||
'title': 'netflix part 1',
|
||||
'duration': 3959.0,
|
||||
'description': '',
|
||||
'upload_date': '20240221',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# Stream requires Referer
|
||||
'url': 'https://conword.io/',
|
||||
'info_dict': {
|
||||
'id': '3a5d863e-9cd6-447e-b6ef-e289af50b349',
|
||||
'ext': 'mp4',
|
||||
'title': 'Conword bei der Stadt Köln und Stadt Dortmund',
|
||||
'description': '',
|
||||
'upload_date': '20231031',
|
||||
'duration': 31.0,
|
||||
'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
|
||||
'timestamp': 1698783879,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
# URL requires token and expires
|
||||
'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
|
||||
'info_dict': {
|
||||
'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
|
||||
'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
|
||||
'upload_date': '20240531',
|
||||
'duration': 18.0,
|
||||
'timestamp': 1717152269,
|
||||
'description': '',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
yield smuggle_url(embed_url, {'Referer': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id, library_id = self._match_valid_url(url).group('id', 'library_id')
|
||||
webpage = self._download_webpage(
|
||||
f'https://iframe.mediadelivery.net/embed/{library_id}/{video_id}', video_id,
|
||||
headers=traverse_obj(smuggled_data, {'Referer': 'Referer'}),
|
||||
query=traverse_obj(parse_qs(url), {'token': 'token', 'expires': 'expires'}))
|
||||
|
||||
if html_title := self._html_extract_title(webpage, default=None) == '403':
|
||||
raise ExtractorError(
|
||||
'This video is inaccessible. Setting a Referer header '
|
||||
'might be required to access the video', expected=True)
|
||||
elif html_title == '404':
|
||||
raise ExtractorError('This video does not exist', expected=True)
|
||||
|
||||
headers = {'Referer': url}
|
||||
|
||||
info = traverse_obj(self._parse_html5_media_entries(url, webpage, video_id, _headers=headers), 0) or {}
|
||||
formats = info.get('formats') or []
|
||||
subtitles = info.get('subtitles') or {}
|
||||
|
||||
original_url = self._search_regex(
|
||||
r'(?:var|const|let)\s+originalUrl\s*=\s*["\']([^"\']+)["\']', webpage, 'original url', default=None)
|
||||
if url_or_none(original_url):
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(original_url), video_id=video_id, note='Checking original',
|
||||
headers=headers, fatal=False, expected_status=(403, 404))
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
'url': original_url,
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
'http_headers': headers,
|
||||
'ext': urlhandle_detect_ext(urlh, default='mp4'),
|
||||
'filesize': int_or_none(urlh.get_header('Content-Length')),
|
||||
})
|
||||
|
||||
# MediaCage Streams require activation and pings
|
||||
src_url = self._search_regex(
|
||||
r'\.setAttribute\([\'"]src[\'"],\s*[\'"]([^\'"]+)[\'"]\)', webpage, 'src url', default=None)
|
||||
activation_url = self._search_regex(
|
||||
r'loadUrl\([\'"]([^\'"]+/activate)[\'"]', webpage, 'activation url', default=None)
|
||||
ping_url = self._search_regex(
|
||||
r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
|
||||
secret = traverse_obj(parse_qs(src_url), ('secret', 0))
|
||||
context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
|
||||
ping_data = {}
|
||||
if src_url and activation_url and ping_url and secret and context_id:
|
||||
self._download_webpage(
|
||||
activation_url, video_id, headers=headers, note='Downloading activation data')
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update({
|
||||
'protocol': 'bunnycdn',
|
||||
'http_headers': headers,
|
||||
})
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
ping_data = {
|
||||
'_bunnycdn_ping_data': {
|
||||
'url': ping_url,
|
||||
'headers': headers,
|
||||
'secret': secret,
|
||||
'context_id': context_id,
|
||||
},
|
||||
}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(webpage, ({find_element(id='main-video', html=True)}, {extract_attributes}, {
|
||||
'title': ('data-plyr-config', {json.loads}, 'title', {str}),
|
||||
'thumbnail': ('data-poster', {url_or_none}),
|
||||
})),
|
||||
**ping_data,
|
||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||
}
|
|
@ -14,16 +14,18 @@
|
|||
js_to_json,
|
||||
mimetype2ext,
|
||||
orderedSet,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
replace_extension,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj, trim_str
|
||||
|
||||
|
||||
class CBCIE(InfoExtractor):
|
||||
|
@ -516,9 +518,43 @@ def entries():
|
|||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
class CBCGemBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'cbcgem'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _call_show_api(self, item_id, display_id=None):
|
||||
return self._download_json(
|
||||
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
||||
display_id or item_id, query={'device': 'web'})
|
||||
|
||||
def _extract_item_info(self, item_info):
|
||||
episode_number = None
|
||||
title = traverse_obj(item_info, ('title', {str}))
|
||||
if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)):
|
||||
episode_number = int_or_none(mobj.group('episode'))
|
||||
title = mobj.group('title')
|
||||
|
||||
return {
|
||||
'episode_number': episode_number,
|
||||
**traverse_obj(item_info, {
|
||||
'id': ('url', {str}),
|
||||
'episode_id': ('url', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'duration': ('metadata', 'duration', {int_or_none}),
|
||||
'release_timestamp': ('metadata', 'airDate', {unified_timestamp}),
|
||||
'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}),
|
||||
'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}),
|
||||
}),
|
||||
'episode': title,
|
||||
'title': title,
|
||||
}
|
||||
|
||||
|
||||
class CBCGemIE(CBCGemBaseIE):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
|
||||
_TESTS = [{
|
||||
# This is a normal, public, TV show video
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
||||
|
@ -529,7 +565,7 @@ class CBCGemIE(InfoExtractor):
|
|||
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
||||
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
||||
'duration': 1324,
|
||||
'categories': ['comedy'],
|
||||
'genres': ['Comédie et humour'],
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season': 'Season 6',
|
||||
'season_number': 6,
|
||||
|
@ -537,9 +573,10 @@ class CBCGemIE(InfoExtractor):
|
|||
'episode_number': 1,
|
||||
'episode_id': 'schitts-creek/s06e01',
|
||||
'upload_date': '20210618',
|
||||
'timestamp': 1623988800,
|
||||
'timestamp': 1623974400,
|
||||
'release_date': '20200107',
|
||||
'release_timestamp': 1578427200,
|
||||
'release_timestamp': 1578355200,
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
|
@ -557,12 +594,13 @@ class CBCGemIE(InfoExtractor):
|
|||
'episode_number': 1,
|
||||
'episode': 'The Cup Runneth Over',
|
||||
'episode_id': 'schitts-creek/s01e01',
|
||||
'duration': 1309,
|
||||
'categories': ['comedy'],
|
||||
'duration': 1308,
|
||||
'genres': ['Comédie et humour'],
|
||||
'upload_date': '20210617',
|
||||
'timestamp': 1623902400,
|
||||
'release_date': '20151124',
|
||||
'release_timestamp': 1448323200,
|
||||
'timestamp': 1623888000,
|
||||
'release_date': '20151123',
|
||||
'release_timestamp': 1448236800,
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
|
@ -570,9 +608,7 @@ class CBCGemIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
_TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
|
||||
_NETRC_MACHINE = 'cbcgem'
|
||||
_claims_token = None
|
||||
|
||||
def _new_claims_token(self, email, password):
|
||||
|
@ -634,10 +670,12 @@ def _real_initialize(self):
|
|||
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_info = self._download_json(
|
||||
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
|
||||
video_id, expected_status=426)
|
||||
video_id, season_number = self._match_valid_url(url).group('id', 'season')
|
||||
video_info = self._call_show_api(video_id)
|
||||
item_info = traverse_obj(video_info, (
|
||||
'content', ..., 'lineups', ..., 'items',
|
||||
lambda _, v: v['url'] == video_id, any, {require('item info')}))
|
||||
media_id = item_info['idMedia']
|
||||
|
||||
email, password = self._get_login_info()
|
||||
if email and password:
|
||||
|
@ -645,7 +683,20 @@ def _real_extract(self, url):
|
|||
headers = {'x-claims-token': claims_token}
|
||||
else:
|
||||
headers = {}
|
||||
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
||||
|
||||
m3u8_info = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/',
|
||||
video_id, headers=headers, query={
|
||||
'appCode': 'gem',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestVersion': '2',
|
||||
'manifestType': 'desktop',
|
||||
'idMedia': media_id,
|
||||
})
|
||||
|
||||
if m3u8_info.get('errorCode') == 1:
|
||||
self.raise_geo_restricted(countries=['CA'])
|
||||
|
@ -671,26 +722,20 @@ def _real_extract(self, url):
|
|||
fmt['preference'] = -2
|
||||
|
||||
return {
|
||||
'season_number': int_or_none(season_number),
|
||||
**traverse_obj(video_info, {
|
||||
'series': ('title', {str}),
|
||||
'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}),
|
||||
'genres': ('structuredMetadata', 'genre', ..., {str}),
|
||||
}),
|
||||
**self._extract_item_info(item_info),
|
||||
'id': video_id,
|
||||
'episode_id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'series': ('series', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'categories': ('category', {str}, all),
|
||||
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
||||
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CBCGemPlaylistIE(InfoExtractor):
|
||||
class CBCGemPlaylistIE(CBCGemBaseIE):
|
||||
IE_NAME = 'gem.cbc.ca:playlist'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
|
@ -700,70 +745,35 @@ class CBCGemPlaylistIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': 'schitts-creek/s06',
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 6,
|
||||
'season': 'Season 6',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
||||
|
||||
def _entries(self, season_info):
|
||||
for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])):
|
||||
yield self.url_result(
|
||||
f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE,
|
||||
**self._extract_item_info(episode))
|
||||
|
||||
def _real_extract(self, url):
|
||||
match = self._match_valid_url(url)
|
||||
season_id = match.group('id')
|
||||
show = match.group('show')
|
||||
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
|
||||
season = int(match.group('season'))
|
||||
season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season')
|
||||
show_info = self._call_show_api(show, display_id=season_id)
|
||||
season_info = traverse_obj(show_info, (
|
||||
'content', ..., 'lineups',
|
||||
lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')}))
|
||||
|
||||
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
||||
|
||||
if season_info is None:
|
||||
raise ExtractorError(f'Couldn\'t find season {season} of {show}')
|
||||
|
||||
episodes = []
|
||||
for episode in season_info['assets']:
|
||||
episodes.append({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'CBCGem',
|
||||
'url': 'https://gem.cbc.ca/media/' + episode['id'],
|
||||
'id': episode['id'],
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'thumbnail': episode.get('image'),
|
||||
'series': episode.get('series'),
|
||||
'season_number': episode.get('season'),
|
||||
'season': season_info['title'],
|
||||
'season_id': season_info.get('id'),
|
||||
'episode_number': episode.get('episode'),
|
||||
'episode': episode.get('title'),
|
||||
'episode_id': episode['id'],
|
||||
'duration': episode.get('duration'),
|
||||
'categories': [episode.get('category')],
|
||||
})
|
||||
|
||||
thumbnail = None
|
||||
tn_uri = season_info.get('image')
|
||||
# the-national was observed to use a "data:image/png;base64"
|
||||
# URI for their 'image' value. The image was 1x1, and is
|
||||
# probably just a placeholder, so it is ignored.
|
||||
if tn_uri is not None and not tn_uri.startswith('data:'):
|
||||
thumbnail = tn_uri
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': episodes,
|
||||
'id': season_id,
|
||||
'title': season_info['title'],
|
||||
'description': season_info.get('description'),
|
||||
'thumbnail': thumbnail,
|
||||
'series': show_info.get('title'),
|
||||
'season_number': season_info.get('season'),
|
||||
'season': season_info['title'],
|
||||
}
|
||||
return self.playlist_result(
|
||||
self._entries(season_info), season_id,
|
||||
**traverse_obj(season_info, {
|
||||
'title': ('title', {str}),
|
||||
'season': ('title', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
}), series=traverse_obj(show_info, ('title', {str})))
|
||||
|
||||
|
||||
class CBCGemLiveIE(InfoExtractor):
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
import collections
|
||||
import functools
|
||||
import getpass
|
||||
import hashlib
|
||||
import http.client
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
|
@ -30,6 +29,7 @@
|
|||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..globals import plugin_ies_overrides
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
|
@ -78,7 +78,6 @@
|
|||
parse_iso8601,
|
||||
parse_m3u8_attributes,
|
||||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
|
@ -100,6 +99,7 @@
|
|||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
|
||||
|
||||
class InfoExtractor:
|
||||
|
@ -1022,23 +1022,6 @@ def __check_blocked(self, content):
|
|||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode()).hexdigest()
|
||||
basen = basen[:trim_length - len(h)] + h
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
return filename
|
||||
|
||||
def __decode_webpage(self, webpage_bytes, encoding, headers):
|
||||
if not encoding:
|
||||
encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
|
||||
|
@ -1067,7 +1050,9 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
|
|||
if self.get_param('write_pages'):
|
||||
if isinstance(url_or_request, Request):
|
||||
data = self._create_request(url_or_request, data).data
|
||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||
filename = _request_dump_filename(
|
||||
urlh.url, video_id, data,
|
||||
trim_length=self.get_param('trim_file_name'))
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1128,7 +1113,9 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
filename = _request_dump_filename(
|
||||
url_or_request.url, video_id, url_or_request.data,
|
||||
trim_length=self.get_param('trim_file_name'))
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
@ -3968,14 +3955,18 @@ def _extract_url(cls, webpage): # TODO: Remove
|
|||
def __init_subclass__(cls, *, plugin_name=None, **kwargs):
|
||||
if plugin_name:
|
||||
mro = inspect.getmro(cls)
|
||||
super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
|
||||
cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
|
||||
cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
|
||||
next_mro_class = super_class = mro[mro.index(cls) + 1]
|
||||
|
||||
while getattr(super_class, '__wrapped__', None):
|
||||
super_class = super_class.__wrapped__
|
||||
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
|
||||
_PLUGIN_OVERRIDES[super_class].append(cls)
|
||||
|
||||
if not any(override.PLUGIN_NAME == plugin_name for override in plugin_ies_overrides.value[super_class]):
|
||||
cls.__wrapped__ = next_mro_class
|
||||
cls.PLUGIN_NAME, cls.ie_key = plugin_name, next_mro_class.ie_key
|
||||
cls.IE_NAME = f'{next_mro_class.IE_NAME}+{plugin_name}'
|
||||
|
||||
setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
|
||||
plugin_ies_overrides.value[super_class].append(cls)
|
||||
return super().__init_subclass__(**kwargs)
|
||||
|
||||
|
||||
|
@ -4031,6 +4022,3 @@ class UnsupportedURLIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
raise UnsupportedError(url)
|
||||
|
||||
|
||||
_PLUGIN_OVERRIDES = collections.defaultdict(list)
|
||||
|
|
|
@ -1,10 +1,24 @@
|
|||
from .zdf import ZDFIE
|
||||
from .zdf import ZDFBaseIE
|
||||
|
||||
|
||||
class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
||||
class DreiSatIE(ZDFBaseIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
|
||||
'info_dict': {
|
||||
'id': '231124_traumziele_philippinen_und_vietnam_dokreise',
|
||||
'ext': 'mp4',
|
||||
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'description': 'md5:26329ce5197775b596773b939354079d',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
|
||||
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
|
||||
'timestamp': 1738593000,
|
||||
'upload_date': '20250203',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
|
@ -17,6 +31,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
},
|
||||
'skip': '410 Gone',
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'info_dict': {
|
||||
|
@ -30,6 +45,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
|
@ -39,3 +55,14 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE
|
|||
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
|
|
|
@ -1,28 +1,35 @@
|
|||
import contextlib
|
||||
import inspect
|
||||
import os
|
||||
|
||||
from ..plugins import load_plugins
|
||||
from ..globals import LAZY_EXTRACTORS
|
||||
from ..globals import extractors as _extractors_context
|
||||
|
||||
# NB: Must be before other imports so that plugins can be correctly injected
|
||||
_PLUGIN_CLASSES = load_plugins('extractor', 'IE')
|
||||
|
||||
_LAZY_LOADER = False
|
||||
_CLASS_LOOKUP = None
|
||||
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
|
||||
with contextlib.suppress(ImportError):
|
||||
from .lazy_extractors import * # noqa: F403
|
||||
from .lazy_extractors import _ALL_CLASSES
|
||||
_LAZY_LOADER = True
|
||||
try:
|
||||
from .lazy_extractors import _CLASS_LOOKUP
|
||||
LAZY_EXTRACTORS.value = True
|
||||
except ImportError:
|
||||
LAZY_EXTRACTORS.value = False
|
||||
|
||||
if not _LAZY_LOADER:
|
||||
from ._extractors import * # noqa: F403
|
||||
_ALL_CLASSES = [ # noqa: F811
|
||||
klass
|
||||
for name, klass in globals().items()
|
||||
if not _CLASS_LOOKUP:
|
||||
from . import _extractors
|
||||
|
||||
_CLASS_LOOKUP = {
|
||||
name: value
|
||||
for name, value in inspect.getmembers(_extractors)
|
||||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE) # noqa: F405
|
||||
}
|
||||
_CLASS_LOOKUP['GenericIE'] = _extractors.GenericIE
|
||||
|
||||
globals().update(_PLUGIN_CLASSES)
|
||||
_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
|
||||
# We want to append to the main lookup
|
||||
_current = _extractors_context.value
|
||||
for name, ie in _CLASS_LOOKUP.items():
|
||||
_current.setdefault(name, ie)
|
||||
|
||||
from .common import _PLUGIN_OVERRIDES # noqa: F401
|
||||
|
||||
def __getattr__(name):
|
||||
value = _CLASS_LOOKUP.get(name)
|
||||
if not value:
|
||||
raise AttributeError(f'module {__name__} has no attribute {name}')
|
||||
return value
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
filter_dict,
|
||||
format_field,
|
||||
int_or_none,
|
||||
|
@ -18,7 +19,7 @@
|
|||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
|
@ -460,11 +461,16 @@ def _real_extract(self, url):
|
|||
self.url_result(dailymotion_url, DailymotionIE.ie_key())
|
||||
for dailymotion_url in dailymotion_urls])
|
||||
|
||||
video_id = self._search_regex(
|
||||
video_id = (
|
||||
traverse_obj(webpage, (
|
||||
{find_element(tag='button', attr='data-cy', value='francetv-player-wrapper', html=True)},
|
||||
{extract_attributes}, 'id'))
|
||||
or self._search_regex(
|
||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||
r'id-video=([^@]+@[^"]+)',
|
||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||
webpage, 'video id')
|
||||
)
|
||||
|
||||
return self._make_url_result(video_id, url=url)
|
||||
|
|
|
@ -695,7 +695,7 @@ def _query_vars_for(data):
|
|||
|
||||
|
||||
class InstagramStoryIE(InstagramBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/?#]+)(?:/(?P<id>\d+))?'
|
||||
IE_NAME = 'instagram:story'
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -705,25 +705,38 @@ class InstagramStoryIE(InstagramBaseIE):
|
|||
'title': 'Rare',
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/stories/fruits_zipper/3570766765028588805/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.instagram.com/stories/fruits_zipper',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, story_id = self._match_valid_url(url).groups()
|
||||
story_info = self._download_webpage(url, story_id)
|
||||
user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False)
|
||||
username, story_id = self._match_valid_url(url).group('user', 'id')
|
||||
if username == 'highlights' and not story_id: # story id is only mandatory for highlights
|
||||
raise ExtractorError('Input URL is missing a highlight ID', expected=True)
|
||||
display_id = story_id or username
|
||||
story_info = self._download_webpage(url, display_id)
|
||||
user_info = self._search_json(r'"user":', story_info, 'user info', display_id, fatal=False)
|
||||
if not user_info:
|
||||
self.raise_login_required('This content is unreachable')
|
||||
|
||||
user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str)
|
||||
story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
|
||||
if not story_info_url: # user id is only mandatory for non-highlights
|
||||
if username == 'highlights':
|
||||
story_info_url = f'highlight:{story_id}'
|
||||
else:
|
||||
if not user_id: # user id is only mandatory for non-highlights
|
||||
raise ExtractorError('Unable to extract user id')
|
||||
story_info_url = user_id
|
||||
|
||||
videos = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}',
|
||||
story_id, errnote=False, fatal=False, headers=self._api_headers), 'reels')
|
||||
display_id, errnote=False, fatal=False, headers=self._api_headers), 'reels')
|
||||
if not videos:
|
||||
self.raise_login_required('You need to log in to access this content')
|
||||
user_info = traverse_obj(videos, (user_id, 'user', {dict})) or {}
|
||||
|
||||
full_name = traverse_obj(videos, (f'highlight:{story_id}', 'user', 'full_name'), (user_id, 'user', 'full_name'))
|
||||
story_title = traverse_obj(videos, (f'highlight:{story_id}', 'title'))
|
||||
|
@ -733,6 +746,7 @@ def _real_extract(self, url):
|
|||
highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items'))
|
||||
info_data = []
|
||||
for highlight in highlights:
|
||||
highlight.setdefault('user', {}).update(user_info)
|
||||
highlight_data = self._extract_product(highlight)
|
||||
if highlight_data.get('formats'):
|
||||
info_data.append({
|
||||
|
@ -740,4 +754,7 @@ def _real_extract(self, url):
|
|||
'uploader_id': user_id,
|
||||
**filter_dict(highlight_data),
|
||||
})
|
||||
if username != 'highlights' and story_id and not self._yes_playlist(username, story_id):
|
||||
return traverse_obj(info_data, (lambda _, v: v['id'] == _pk_to_id(story_id), any))
|
||||
|
||||
return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
remove_start,
|
||||
|
@ -1033,6 +1035,7 @@ def _real_extract(self, url):
|
|||
thumbnails.append({
|
||||
'id': f'{name}_{width}x{height}',
|
||||
'url': img_url,
|
||||
'ext': traverse_obj(parse_qs(img_url), ('image', 0, {determine_ext(default_ext='jpg')})),
|
||||
**res,
|
||||
})
|
||||
|
||||
|
|
|
@ -501,7 +501,7 @@ def _extract_webpage(self, url):
|
|||
r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed
|
||||
r'class="coveplayerid">([^<]+)<', # coveplayer
|
||||
r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/
|
||||
r'\bclass="passportcoveplayer"[^>]+\bdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/
|
||||
r'\sclass="passportcoveplayer"[^>]*\sdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/
|
||||
r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer
|
||||
r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',",
|
||||
r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/
|
||||
|
|
|
@ -52,7 +52,8 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
||||
_HEADERS = {}
|
||||
|
||||
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
|
||||
_IMAGE_REPL_RE = r'-[0-9a-z]+\.(?P<ext>jpg|png)'
|
||||
_TAGS_RE = re.compile(r'"([^"]+)"|([^ ]+)')
|
||||
|
||||
_ARTWORK_MAP = {
|
||||
'mini': 16,
|
||||
|
@ -331,12 +332,14 @@ def invalid_url(url):
|
|||
thumbnails = []
|
||||
artwork_url = info.get('artwork_url')
|
||||
thumbnail = artwork_url or user.get('avatar_url')
|
||||
if isinstance(thumbnail, str):
|
||||
if re.search(self._IMAGE_REPL_RE, thumbnail):
|
||||
if url_or_none(thumbnail):
|
||||
if mobj := re.search(self._IMAGE_REPL_RE, thumbnail):
|
||||
for image_id, size in self._ARTWORK_MAP.items():
|
||||
# Soundcloud serves JPEG regardless of URL's ext *except* for "original" thumb
|
||||
ext = mobj.group('ext') if image_id == 'original' else 'jpg'
|
||||
i = {
|
||||
'id': image_id,
|
||||
'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.jpg', thumbnail),
|
||||
'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.{ext}', thumbnail),
|
||||
}
|
||||
if image_id == 'tiny' and not artwork_url:
|
||||
size = 18
|
||||
|
@ -372,6 +375,7 @@ def extract_count(key):
|
|||
'comment_count': extract_count('comment'),
|
||||
'repost_count': extract_count('reposts'),
|
||||
'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)),
|
||||
'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)),
|
||||
'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)),
|
||||
'formats': formats if not extract_flat else None,
|
||||
}
|
||||
|
@ -425,6 +429,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'repost_count': int,
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||
'tags': 'count:14',
|
||||
},
|
||||
},
|
||||
# geo-restricted
|
||||
|
@ -440,7 +445,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'uploader_id': '9615865',
|
||||
'timestamp': 1337635207,
|
||||
'upload_date': '20120521',
|
||||
'duration': 227.155,
|
||||
'duration': 227.103,
|
||||
'license': 'all-rights-reserved',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
|
@ -450,6 +455,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||
'genres': ['Alternative'],
|
||||
'artists': ['The Royal Concept'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# private link
|
||||
|
@ -475,6 +481,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# private link (alt format)
|
||||
|
@ -500,15 +507,16 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||
'genres': ['youtubedl'],
|
||||
'tags': [],
|
||||
},
|
||||
},
|
||||
# downloadable song
|
||||
{
|
||||
'url': 'https://soundcloud.com/the80m/the-following',
|
||||
'md5': '9ffcddb08c87d74fb5808a3c183a1d04',
|
||||
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
|
||||
'info_dict': {
|
||||
'id': '343609555',
|
||||
'ext': 'wav',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'title': 'The Following',
|
||||
'track': 'The Following',
|
||||
'description': '',
|
||||
|
@ -526,15 +534,18 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'view_count': int,
|
||||
'genres': ['Dance & EDM'],
|
||||
'artists': ['80M'],
|
||||
'tags': ['80M', 'EDM', 'Dance', 'Music'],
|
||||
},
|
||||
'expected_warnings': ['Original download format is only available for registered users'],
|
||||
},
|
||||
# private link, downloadable format
|
||||
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
|
||||
{
|
||||
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
|
||||
'md5': '64a60b16e617d41d0bef032b7f55441e',
|
||||
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e'
|
||||
'info_dict': {
|
||||
'id': '340344461',
|
||||
'ext': 'wav',
|
||||
'ext': 'opus', # wav original available with auth
|
||||
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
|
||||
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
|
||||
|
@ -552,7 +563,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||
'genres': ['Trance'],
|
||||
'artists': ['Ori Uplift'],
|
||||
'tags': ['Orchestral', 'Emotional', 'Uplifting Trance', 'Trance', 'Ori Uplift', 'UpOnly'],
|
||||
},
|
||||
'expected_warnings': ['Original download format is only available for registered users'],
|
||||
},
|
||||
# no album art, use avatar pic for thumbnail
|
||||
{
|
||||
|
@ -577,6 +590,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'repost_count': int,
|
||||
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||
'artists': ['MadReal'],
|
||||
'tags': [],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -604,8 +618,47 @@ class SoundcloudIE(SoundcloudBaseIE):
|
|||
'repost_count': int,
|
||||
'genres': ['Piano'],
|
||||
'uploader_url': 'https://soundcloud.com/giovannisarani',
|
||||
'tags': 'count:10',
|
||||
},
|
||||
},
|
||||
# .png "original" artwork, 160kbps m4a HLS format
|
||||
{
|
||||
'url': 'https://soundcloud.com/skorxh/audio-dealer',
|
||||
'info_dict': {
|
||||
'id': '2011421339',
|
||||
'ext': 'm4a',
|
||||
'title': 'audio dealer',
|
||||
'description': '',
|
||||
'uploader': '$KORCH',
|
||||
'uploader_id': '150292288',
|
||||
'uploader_url': 'https://soundcloud.com/skorxh',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'duration': 213.469,
|
||||
'tags': [],
|
||||
'artists': ['$KORXH'],
|
||||
'track': 'audio dealer',
|
||||
'timestamp': 1737143201,
|
||||
'upload_date': '20250117',
|
||||
'license': 'all-rights-reserved',
|
||||
'thumbnail': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png',
|
||||
'thumbnails': [
|
||||
{'id': 'mini', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-mini.jpg'},
|
||||
{'id': 'tiny', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-tiny.jpg'},
|
||||
{'id': 'small', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-small.jpg'},
|
||||
{'id': 'badge', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-badge.jpg'},
|
||||
{'id': 't67x67', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t67x67.jpg'},
|
||||
{'id': 'large', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-large.jpg'},
|
||||
{'id': 't300x300', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t300x300.jpg'},
|
||||
{'id': 'crop', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-crop.jpg'},
|
||||
{'id': 't500x500', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t500x500.jpg'},
|
||||
{'id': 'original', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png'},
|
||||
],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
|
||||
},
|
||||
{
|
||||
# AAC HQ format available (account with active subscription needed)
|
||||
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from .bunnycdn import BunnyCdnIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import try_get, unified_timestamp
|
||||
from ..utils import make_archive_id, try_get, unified_timestamp
|
||||
|
||||
|
||||
class SovietsClosetBaseIE(InfoExtractor):
|
||||
|
@ -43,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
|||
'url': 'https://sovietscloset.com/video/1337',
|
||||
'md5': 'bd012b04b261725510ca5383074cdd55',
|
||||
'info_dict': {
|
||||
'id': '1337',
|
||||
'id': '2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Witcher #13',
|
||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
|
||||
|
@ -55,20 +56,23 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
|||
'upload_date': '20170413',
|
||||
'uploader_id': 'SovietWomble',
|
||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||
'duration': 7007,
|
||||
'duration': 7008,
|
||||
'was_live': True,
|
||||
'availability': 'public',
|
||||
'series': 'The Witcher',
|
||||
'season': 'Misc',
|
||||
'episode_number': 13,
|
||||
'episode': 'Episode 13',
|
||||
'creators': ['SovietWomble'],
|
||||
'description': '',
|
||||
'_old_archive_ids': ['sovietscloset 1337'],
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://sovietscloset.com/video/1105',
|
||||
'md5': '89fa928f183893cb65a0b7be846d8a90',
|
||||
'info_dict': {
|
||||
'id': '1105',
|
||||
'id': 'c0e5e76f-3a93-40b4-bf01-12343c2eec5d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arma 3 - Zeus Games #5',
|
||||
'uploader': 'SovietWomble',
|
||||
|
@ -80,39 +84,20 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
|||
'upload_date': '20160420',
|
||||
'uploader_id': 'SovietWomble',
|
||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||
'duration': 8804,
|
||||
'duration': 8805,
|
||||
'was_live': True,
|
||||
'availability': 'public',
|
||||
'series': 'Arma 3',
|
||||
'season': 'Zeus Games',
|
||||
'episode_number': 5,
|
||||
'episode': 'Episode 5',
|
||||
'creators': ['SovietWomble'],
|
||||
'description': '',
|
||||
'_old_archive_ids': ['sovietscloset 1105'],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
|
||||
iframe = self._download_webpage(
|
||||
f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
|
||||
video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
|
||||
thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
|
||||
|
||||
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
if not m3u8_formats:
|
||||
duration = None
|
||||
else:
|
||||
duration = self._extract_m3u8_vod_duration(
|
||||
m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
|
||||
|
||||
return {
|
||||
'formats': m3u8_formats,
|
||||
'thumbnail': thumbnail_url,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
@ -122,13 +107,13 @@ def _real_extract(self, url):
|
|||
|
||||
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
|
||||
|
||||
return {
|
||||
return self.url_result(
|
||||
f'https://iframe.mediadelivery.net/embed/5105/{stream["bunnyId"]}', ie=BunnyCdnIE, url_transparent=True,
|
||||
**self.video_meta(
|
||||
video_id=video_id, game_name=stream['game']['name'],
|
||||
category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
|
||||
episode_number=stream.get('number'), stream_date=stream.get('date')),
|
||||
**self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
|
||||
}
|
||||
_old_archive_ids=[make_archive_id(self, video_id)])
|
||||
|
||||
|
||||
class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
|
||||
|
|
|
@ -249,6 +249,12 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
|||
elif fatal:
|
||||
raise ExtractorError('Unable to extract webpage video data')
|
||||
|
||||
if not traverse_obj(video_data, ('video', {dict})) and traverse_obj(video_data, ('isContentClassified', {bool})):
|
||||
message = 'This post may not be comfortable for some audiences. Log in for access'
|
||||
if fatal:
|
||||
self.raise_login_required(message)
|
||||
self.report_warning(f'{message}. {self._login_hint()}', video_id=video_id)
|
||||
|
||||
return video_data, status
|
||||
|
||||
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||
|
@ -895,8 +901,12 @@ def _real_extract(self, url):
|
|||
|
||||
if video_data and status == 0:
|
||||
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||
elif status == 10216:
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
elif status in (10216, 10222):
|
||||
# 10216: private post; 10222: private account
|
||||
self.raise_login_required(
|
||||
'You do not have permission to view this post. Log into an account that has access')
|
||||
elif status == 10204:
|
||||
raise ExtractorError('Your IP address is blocked from accessing this post', expected=True)
|
||||
raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)
|
||||
|
||||
|
||||
|
|
|
@ -100,8 +100,8 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class WSJArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/(?:articles|opinion)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
||||
'info_dict': {
|
||||
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
||||
|
@ -110,11 +110,20 @@ class WSJArticleIE(InfoExtractor):
|
|||
'uploader_id': 'ralcaraz',
|
||||
'title': 'Bao Bao the Panda Leaves for China',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.wsj.com/opinion/hamas-hostages-caskets-bibas-family-israel-gaza-29da083b',
|
||||
'info_dict': {
|
||||
'id': 'CE68D629-8DB8-4CD3-B30A-92112C102054',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241007',
|
||||
'uploader_id': 'Tinnes, David',
|
||||
'title': 'WSJ Opinion: "Get the Jew": The Crown Heights Riot Revisited',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
webpage = self._download_webpage(url, article_id, impersonate=True)
|
||||
video_id = self._search_regex(
|
||||
r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})',
|
||||
webpage, 'video id')
|
||||
|
|
|
@ -857,6 +857,18 @@ def generate_api_headers(
|
|||
}
|
||||
return filter_dict(headers)
|
||||
|
||||
def _download_webpage_with_retries(self, *args, retry_fatal=False, retry_on_status=None, **kwargs):
|
||||
for retry in self.RetryManager(fatal=retry_fatal):
|
||||
try:
|
||||
return self._download_webpage(*args, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status not in (retry_on_status or (403, 429)):
|
||||
retry.error = e
|
||||
continue
|
||||
self._error_or_warning(e, fatal=retry_fatal)
|
||||
break
|
||||
|
||||
def _download_ytcfg(self, client, video_id):
|
||||
url = {
|
||||
'web': 'https://www.youtube.com',
|
||||
|
@ -866,8 +878,8 @@ def _download_ytcfg(self, client, video_id):
|
|||
}.get(client)
|
||||
if not url:
|
||||
return {}
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config',
|
||||
webpage = self._download_webpage_with_retries(
|
||||
url, video_id, note=f'Downloading {client.replace("_", " ").strip()} client config',
|
||||
headers=traverse_obj(self._get_default_ytcfg(client), {
|
||||
'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}),
|
||||
}))
|
||||
|
@ -3135,15 +3147,22 @@ def _extract_player_url(self, *ytcfgs, webpage=None):
|
|||
get_all=False, expected_type=str)
|
||||
if not player_url:
|
||||
return
|
||||
# TODO: Add proper support for the 'tce' variant players
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/12398
|
||||
if '/player_ias_tce.vflset/' in player_url:
|
||||
self.write_debug(f'Modifying tce player URL: {player_url}')
|
||||
player_url = player_url.replace('/player_ias_tce.vflset/', '/player_ias.vflset/')
|
||||
return urljoin('https://www.youtube.com', player_url)
|
||||
|
||||
def _download_player_url(self, video_id, fatal=False):
|
||||
res = self._download_webpage(
|
||||
iframe_webpage = self._download_webpage_with_retries(
|
||||
'https://www.youtube.com/iframe_api',
|
||||
note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
|
||||
if res:
|
||||
note='Downloading iframe API JS',
|
||||
video_id=video_id, retry_fatal=fatal)
|
||||
|
||||
if iframe_webpage:
|
||||
player_version = self._search_regex(
|
||||
r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
|
||||
r'player\\?/([0-9a-fA-F]{8})\\?/', iframe_webpage, 'player version', fatal=fatal)
|
||||
if player_version:
|
||||
return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
|
||||
|
||||
|
@ -3369,7 +3388,7 @@ def _fixup_n_function_code(self, argnames, code):
|
|||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.02.19')
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
|
@ -4556,8 +4575,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
|
|||
pp = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp:
|
||||
query['pp'] = pp
|
||||
webpage = self._download_webpage(
|
||||
webpage_url, video_id, fatal=False, query=query)
|
||||
webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query)
|
||||
|
||||
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
|
||||
|
||||
|
|
|
@ -137,6 +137,116 @@ def _extract_player(self, webpage, video_id, fatal=True):
|
|||
group='json'),
|
||||
video_id)
|
||||
|
||||
def _extract_entry(self, url, player, content, video_id):
|
||||
title = content.get('title') or content['teaserHeadline']
|
||||
|
||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
ptmd_path = traverse_obj(t, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
), get_all=False)
|
||||
if not ptmd_path:
|
||||
raise ExtractorError('Could not extract ptmd_path')
|
||||
|
||||
info = self._extract_ptmd(
|
||||
urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url)
|
||||
|
||||
thumbnails = []
|
||||
layouts = try_get(
|
||||
content, lambda x: x['teaserImageRef']['layouts'], dict)
|
||||
if layouts:
|
||||
for layout_key, layout_url in layouts.items():
|
||||
layout_url = url_or_none(layout_url)
|
||||
if not layout_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': layout_url,
|
||||
'format_id': layout_key,
|
||||
}
|
||||
mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
|
||||
if mobj:
|
||||
thumbnail.update({
|
||||
'width': int(mobj.group('width')),
|
||||
'height': int(mobj.group('height')),
|
||||
})
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
chapter_marks = t.get('streamAnchorTag') or []
|
||||
chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))})
|
||||
chapters = [{
|
||||
'start_time': chap.get('anchorOffset'),
|
||||
'end_time': next_chap.get('anchorOffset'),
|
||||
'title': chap.get('anchorLabel'),
|
||||
} for chap, next_chap in zip(chapter_marks, chapter_marks[1:])]
|
||||
|
||||
return merge_dicts(info, {
|
||||
'title': title,
|
||||
'description': content.get('leadParagraph') or content.get('teasertext'),
|
||||
'duration': int_or_none(t.get('duration')),
|
||||
'timestamp': unified_timestamp(content.get('editorialDate')),
|
||||
'thumbnails': thumbnails,
|
||||
'chapters': chapters or None,
|
||||
'episode': title,
|
||||
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
|
||||
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
|
||||
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
|
||||
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
|
||||
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode_id': ('contentId', {str}),
|
||||
})),
|
||||
})
|
||||
|
||||
def _extract_regular(self, url, player, video_id, query=None):
|
||||
player_url = player['content']
|
||||
|
||||
content = self._call_api(
|
||||
update_url_query(player_url, query),
|
||||
video_id, 'content', player['apiToken'], url)
|
||||
|
||||
return self._extract_entry(player_url, player, content, video_id)
|
||||
|
||||
def _extract_mobile(self, video_id):
|
||||
video = self._download_v2_doc(video_id)
|
||||
|
||||
formats = []
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
document = formitaeten and video['document']
|
||||
if formitaeten:
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
format_urls = set()
|
||||
for f in formitaeten or []:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
|
||||
thumbnails = []
|
||||
teaser_bild = document.get('teaserBild')
|
||||
if isinstance(teaser_bild, dict):
|
||||
for thumbnail_key, thumbnail in teaser_bild.items():
|
||||
thumbnail_url = try_get(
|
||||
thumbnail, lambda x: x['url'], str)
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_key,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': document.get('beschreibung'),
|
||||
'duration': int_or_none(document.get('length')),
|
||||
'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
|
||||
try_get(video, lambda x: x['meta']['editorialDate'], str)),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': self._extract_subtitles(document),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class ZDFIE(ZDFBaseIE):
|
||||
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
|
@ -306,121 +416,6 @@ class ZDFIE(ZDFBaseIE):
|
|||
},
|
||||
}]
|
||||
|
||||
def _extract_entry(self, url, player, content, video_id):
|
||||
title = content.get('title') or content['teaserHeadline']
|
||||
|
||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
ptmd_path = traverse_obj(t, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
), get_all=False)
|
||||
if not ptmd_path:
|
||||
raise ExtractorError('Could not extract ptmd_path')
|
||||
|
||||
info = self._extract_ptmd(
|
||||
urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url)
|
||||
|
||||
thumbnails = []
|
||||
layouts = try_get(
|
||||
content, lambda x: x['teaserImageRef']['layouts'], dict)
|
||||
if layouts:
|
||||
for layout_key, layout_url in layouts.items():
|
||||
layout_url = url_or_none(layout_url)
|
||||
if not layout_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': layout_url,
|
||||
'format_id': layout_key,
|
||||
}
|
||||
mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
|
||||
if mobj:
|
||||
thumbnail.update({
|
||||
'width': int(mobj.group('width')),
|
||||
'height': int(mobj.group('height')),
|
||||
})
|
||||
thumbnails.append(thumbnail)
|
||||
|
||||
chapter_marks = t.get('streamAnchorTag') or []
|
||||
chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))})
|
||||
chapters = [{
|
||||
'start_time': chap.get('anchorOffset'),
|
||||
'end_time': next_chap.get('anchorOffset'),
|
||||
'title': chap.get('anchorLabel'),
|
||||
} for chap, next_chap in zip(chapter_marks, chapter_marks[1:])]
|
||||
|
||||
return merge_dicts(info, {
|
||||
'title': title,
|
||||
'description': content.get('leadParagraph') or content.get('teasertext'),
|
||||
'duration': int_or_none(t.get('duration')),
|
||||
'timestamp': unified_timestamp(content.get('editorialDate')),
|
||||
'thumbnails': thumbnails,
|
||||
'chapters': chapters or None,
|
||||
'episode': title,
|
||||
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
|
||||
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
|
||||
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
|
||||
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
|
||||
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode_id': ('contentId', {str}),
|
||||
})),
|
||||
})
|
||||
|
||||
def _extract_regular(self, url, player, video_id):
|
||||
player_url = player['content']
|
||||
|
||||
try:
|
||||
content = self._call_api(
|
||||
update_url_query(player_url, {'profile': 'player-3'}),
|
||||
video_id, 'content', player['apiToken'], url)
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'{video_id}: {e.orig_msg}; retrying with v2 profile')
|
||||
content = self._call_api(
|
||||
player_url, video_id, 'content', player['apiToken'], url)
|
||||
|
||||
return self._extract_entry(player_url, player, content, video_id)
|
||||
|
||||
def _extract_mobile(self, video_id):
|
||||
video = self._download_v2_doc(video_id)
|
||||
|
||||
formats = []
|
||||
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||
document = formitaeten and video['document']
|
||||
if formitaeten:
|
||||
title = document['titel']
|
||||
content_id = document['basename']
|
||||
|
||||
format_urls = set()
|
||||
for f in formitaeten or []:
|
||||
self._extract_format(content_id, formats, format_urls, f)
|
||||
|
||||
thumbnails = []
|
||||
teaser_bild = document.get('teaserBild')
|
||||
if isinstance(teaser_bild, dict):
|
||||
for thumbnail_key, thumbnail in teaser_bild.items():
|
||||
thumbnail_url = try_get(
|
||||
thumbnail, lambda x: x['url'], str)
|
||||
if thumbnail_url:
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'id': thumbnail_key,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
'title': title,
|
||||
'description': document.get('beschreibung'),
|
||||
'duration': int_or_none(document.get('length')),
|
||||
'timestamp': unified_timestamp(document.get('date')) or unified_timestamp(
|
||||
try_get(video, lambda x: x['meta']['editorialDate'], str)),
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': self._extract_subtitles(document),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
@ -428,7 +423,7 @@ def _real_extract(self, url):
|
|||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
return self._extract_regular(url, player, video_id, query={'profile': 'player-3'})
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
|
||||
|
@ -474,7 +469,8 @@ def _extract_entry(self, entry):
|
|||
'title': ('titel', {str}),
|
||||
'description': ('beschreibung', {str}),
|
||||
'duration': ('length', {float_or_none}),
|
||||
# TODO: seasonNumber and episodeNumber can be extracted but need to also be in ZDFIE
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}))
|
||||
|
||||
def _entries(self, data, document_id):
|
||||
|
|
30
yt_dlp/globals.py
Normal file
30
yt_dlp/globals.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from collections import defaultdict
|
||||
|
||||
# Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system,
|
||||
# no backwards compatibility is guaranteed for the plugin system API.
|
||||
# However, we will still try our best.
|
||||
|
||||
|
||||
class Indirect:
|
||||
def __init__(self, initial, /):
|
||||
self.value = initial
|
||||
|
||||
def __repr__(self, /):
|
||||
return f'{type(self).__name__}({self.value!r})'
|
||||
|
||||
|
||||
postprocessors = Indirect({})
|
||||
extractors = Indirect({})
|
||||
|
||||
# Plugins
|
||||
all_plugins_loaded = Indirect(False)
|
||||
plugin_specs = Indirect({})
|
||||
plugin_dirs = Indirect(['default'])
|
||||
|
||||
plugin_ies = Indirect({})
|
||||
plugin_pps = Indirect({})
|
||||
plugin_ies_overrides = Indirect(defaultdict(list))
|
||||
|
||||
# Misc
|
||||
IN_CLI = Indirect(False)
|
||||
LAZY_EXTRACTORS = Indirect(False) # `False`=force, `None`=disabled, `True`=enabled
|
|
@ -398,7 +398,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'(Alias: --no-config)'))
|
||||
general.add_option(
|
||||
'--no-config-locations',
|
||||
action='store_const', dest='config_locations', const=[],
|
||||
action='store_const', dest='config_locations', const=None,
|
||||
help=(
|
||||
'Do not load any custom configuration files (default). When given inside a '
|
||||
'configuration file, ignore all previous --config-locations defined in the current file'))
|
||||
|
@ -410,12 +410,21 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'("-" for stdin). Can be used multiple times and inside other configuration files'))
|
||||
general.add_option(
|
||||
'--plugin-dirs',
|
||||
dest='plugin_dirs', metavar='PATH', action='append',
|
||||
metavar='PATH',
|
||||
dest='plugin_dirs',
|
||||
action='callback',
|
||||
callback=_list_from_options_callback,
|
||||
type='str',
|
||||
callback_kwargs={'delim': None},
|
||||
default=['default'],
|
||||
help=(
|
||||
'Path to an additional directory to search for plugins. '
|
||||
'This option can be used multiple times to add multiple directories. '
|
||||
'Note that this currently only works for extractor plugins; '
|
||||
'postprocessor plugins can only be loaded from the default plugin directories'))
|
||||
'Use "default" to search the default plugin directories (default)'))
|
||||
general.add_option(
|
||||
'--no-plugin-dirs',
|
||||
dest='plugin_dirs', action='store_const', const=[],
|
||||
help='Clear plugin directories to search, including defaults and those provided by previous --plugin-dirs')
|
||||
general.add_option(
|
||||
'--flat-playlist',
|
||||
action='store_const', dest='extract_flat', const='in_playlist', default=False,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import contextlib
|
||||
import dataclasses
|
||||
import functools
|
||||
import importlib
|
||||
import importlib.abc
|
||||
|
@ -14,17 +15,48 @@
|
|||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
|
||||
from .globals import (
|
||||
Indirect,
|
||||
plugin_dirs,
|
||||
all_plugins_loaded,
|
||||
plugin_specs,
|
||||
)
|
||||
|
||||
from .utils import (
|
||||
Config,
|
||||
get_executable_path,
|
||||
get_system_config_dirs,
|
||||
get_user_config_dirs,
|
||||
merge_dicts,
|
||||
orderedSet,
|
||||
write_string,
|
||||
)
|
||||
|
||||
PACKAGE_NAME = 'yt_dlp_plugins'
|
||||
COMPAT_PACKAGE_NAME = 'ytdlp_plugins'
|
||||
_BASE_PACKAGE_PATH = Path(__file__).parent
|
||||
|
||||
|
||||
# Please Note: Due to necessary changes and the complex nature involved,
|
||||
# no backwards compatibility is guaranteed for the plugin system API.
|
||||
# However, we will still try our best.
|
||||
|
||||
__all__ = [
|
||||
'COMPAT_PACKAGE_NAME',
|
||||
'PACKAGE_NAME',
|
||||
'PluginSpec',
|
||||
'directories',
|
||||
'load_all_plugins',
|
||||
'load_plugins',
|
||||
'register_plugin_spec',
|
||||
]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PluginSpec:
|
||||
module_name: str
|
||||
suffix: str
|
||||
destination: Indirect
|
||||
plugin_destination: Indirect
|
||||
|
||||
|
||||
class PluginLoader(importlib.abc.Loader):
|
||||
|
@ -44,7 +76,42 @@ def dirs_in_zip(archive):
|
|||
pass
|
||||
except Exception as e:
|
||||
write_string(f'WARNING: Could not read zip file {archive}: {e}\n')
|
||||
return set()
|
||||
return ()
|
||||
|
||||
|
||||
def default_plugin_paths():
|
||||
def _get_package_paths(*root_paths, containing_folder):
|
||||
for config_dir in orderedSet(map(Path, root_paths), lazy=True):
|
||||
# We need to filter the base path added when running __main__.py directly
|
||||
if config_dir == _BASE_PACKAGE_PATH:
|
||||
continue
|
||||
with contextlib.suppress(OSError):
|
||||
yield from (config_dir / containing_folder).iterdir()
|
||||
|
||||
# Load from yt-dlp config folders
|
||||
yield from _get_package_paths(
|
||||
*get_user_config_dirs('yt-dlp'),
|
||||
*get_system_config_dirs('yt-dlp'),
|
||||
containing_folder='plugins',
|
||||
)
|
||||
|
||||
# Load from yt-dlp-plugins folders
|
||||
yield from _get_package_paths(
|
||||
get_executable_path(),
|
||||
*get_user_config_dirs(''),
|
||||
*get_system_config_dirs(''),
|
||||
containing_folder='yt-dlp-plugins',
|
||||
)
|
||||
|
||||
# Load from PYTHONPATH directories
|
||||
yield from (path for path in map(Path, sys.path) if path != _BASE_PACKAGE_PATH)
|
||||
|
||||
|
||||
def candidate_plugin_paths(candidate):
|
||||
candidate_path = Path(candidate)
|
||||
if not candidate_path.is_dir():
|
||||
raise ValueError(f'Invalid plugin directory: {candidate_path}')
|
||||
yield from candidate_path.iterdir()
|
||||
|
||||
|
||||
class PluginFinder(importlib.abc.MetaPathFinder):
|
||||
|
@ -56,40 +123,16 @@ class PluginFinder(importlib.abc.MetaPathFinder):
|
|||
|
||||
def __init__(self, *packages):
|
||||
self._zip_content_cache = {}
|
||||
self.packages = set(itertools.chain.from_iterable(
|
||||
self.packages = set(
|
||||
itertools.chain.from_iterable(
|
||||
itertools.accumulate(name.split('.'), lambda a, b: '.'.join((a, b)))
|
||||
for name in packages))
|
||||
|
||||
def search_locations(self, fullname):
|
||||
candidate_locations = []
|
||||
|
||||
def _get_package_paths(*root_paths, containing_folder='plugins'):
|
||||
for config_dir in orderedSet(map(Path, root_paths), lazy=True):
|
||||
with contextlib.suppress(OSError):
|
||||
yield from (config_dir / containing_folder).iterdir()
|
||||
|
||||
# Load from yt-dlp config folders
|
||||
candidate_locations.extend(_get_package_paths(
|
||||
*get_user_config_dirs('yt-dlp'),
|
||||
*get_system_config_dirs('yt-dlp'),
|
||||
containing_folder='plugins'))
|
||||
|
||||
# Load from yt-dlp-plugins folders
|
||||
candidate_locations.extend(_get_package_paths(
|
||||
get_executable_path(),
|
||||
*get_user_config_dirs(''),
|
||||
*get_system_config_dirs(''),
|
||||
containing_folder='yt-dlp-plugins'))
|
||||
|
||||
candidate_locations.extend(map(Path, sys.path)) # PYTHONPATH
|
||||
with contextlib.suppress(ValueError): # Added when running __main__.py directly
|
||||
candidate_locations.remove(Path(__file__).parent)
|
||||
|
||||
# TODO(coletdjnz): remove when plugin globals system is implemented
|
||||
if Config._plugin_dirs:
|
||||
candidate_locations.extend(_get_package_paths(
|
||||
*Config._plugin_dirs,
|
||||
containing_folder=''))
|
||||
candidate_locations = itertools.chain.from_iterable(
|
||||
default_plugin_paths() if candidate == 'default' else candidate_plugin_paths(candidate)
|
||||
for candidate in plugin_dirs.value
|
||||
)
|
||||
|
||||
parts = Path(*fullname.split('.'))
|
||||
for path in orderedSet(candidate_locations, lazy=True):
|
||||
|
@ -109,7 +152,8 @@ def find_spec(self, fullname, path=None, target=None):
|
|||
|
||||
search_locations = list(map(str, self.search_locations(fullname)))
|
||||
if not search_locations:
|
||||
return None
|
||||
# Prevent using built-in meta finders for searching plugins.
|
||||
raise ModuleNotFoundError(fullname)
|
||||
|
||||
spec = importlib.machinery.ModuleSpec(fullname, PluginLoader(), is_package=True)
|
||||
spec.submodule_search_locations = search_locations
|
||||
|
@ -123,8 +167,10 @@ def invalidate_caches(self):
|
|||
|
||||
|
||||
def directories():
|
||||
spec = importlib.util.find_spec(PACKAGE_NAME)
|
||||
return spec.submodule_search_locations if spec else []
|
||||
with contextlib.suppress(ModuleNotFoundError):
|
||||
if spec := importlib.util.find_spec(PACKAGE_NAME):
|
||||
return list(spec.submodule_search_locations)
|
||||
return []
|
||||
|
||||
|
||||
def iter_modules(subpackage):
|
||||
|
@ -134,19 +180,23 @@ def iter_modules(subpackage):
|
|||
yield from pkgutil.iter_modules(path=pkg.__path__, prefix=f'{fullname}.')
|
||||
|
||||
|
||||
def load_module(module, module_name, suffix):
|
||||
def get_regular_classes(module, module_name, suffix):
|
||||
# Find standard public plugin classes (not overrides)
|
||||
return inspect.getmembers(module, lambda obj: (
|
||||
inspect.isclass(obj)
|
||||
and obj.__name__.endswith(suffix)
|
||||
and obj.__module__.startswith(module_name)
|
||||
and not obj.__name__.startswith('_')
|
||||
and obj.__name__ in getattr(module, '__all__', [obj.__name__])))
|
||||
and obj.__name__ in getattr(module, '__all__', [obj.__name__])
|
||||
and getattr(obj, 'PLUGIN_NAME', None) is None
|
||||
))
|
||||
|
||||
|
||||
def load_plugins(name, suffix):
|
||||
classes = {}
|
||||
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||
return classes
|
||||
def load_plugins(plugin_spec: PluginSpec):
|
||||
name, suffix = plugin_spec.module_name, plugin_spec.suffix
|
||||
regular_classes = {}
|
||||
if os.environ.get('YTDLP_NO_PLUGINS') or not plugin_dirs.value:
|
||||
return regular_classes
|
||||
|
||||
for finder, module_name, _ in iter_modules(name):
|
||||
if any(x.startswith('_') for x in module_name.split('.')):
|
||||
|
@ -163,24 +213,42 @@ def load_plugins(name, suffix):
|
|||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
write_string(f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}')
|
||||
write_string(
|
||||
f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}',
|
||||
)
|
||||
continue
|
||||
classes.update(load_module(module, module_name, suffix))
|
||||
regular_classes.update(get_regular_classes(module, module_name, suffix))
|
||||
|
||||
# Compat: old plugin system using __init__.py
|
||||
# Note: plugins imported this way do not show up in directories()
|
||||
# nor are considered part of the yt_dlp_plugins namespace package
|
||||
if 'default' in plugin_dirs.value:
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
name, Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py'))
|
||||
name,
|
||||
Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py'),
|
||||
)
|
||||
plugins = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = plugins
|
||||
spec.loader.exec_module(plugins)
|
||||
classes.update(load_module(plugins, spec.name, suffix))
|
||||
regular_classes.update(get_regular_classes(plugins, spec.name, suffix))
|
||||
|
||||
return classes
|
||||
# Add the classes into the global plugin lookup for that type
|
||||
plugin_spec.plugin_destination.value = regular_classes
|
||||
# We want to prepend to the main lookup for that type
|
||||
plugin_spec.destination.value = merge_dicts(regular_classes, plugin_spec.destination.value)
|
||||
|
||||
return regular_classes
|
||||
|
||||
|
||||
sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor'))
|
||||
def load_all_plugins():
|
||||
for plugin_spec in plugin_specs.value.values():
|
||||
load_plugins(plugin_spec)
|
||||
all_plugins_loaded.value = True
|
||||
|
||||
__all__ = ['COMPAT_PACKAGE_NAME', 'PACKAGE_NAME', 'directories', 'load_plugins']
|
||||
|
||||
def register_plugin_spec(plugin_spec: PluginSpec):
|
||||
# If the plugin spec for a module is already registered, it will not be added again
|
||||
if plugin_spec.module_name not in plugin_specs.value:
|
||||
plugin_specs.value[plugin_spec.module_name] = plugin_spec
|
||||
sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.{plugin_spec.module_name}'))
|
||||
|
|
|
@ -33,15 +33,38 @@
|
|||
from .sponskrub import SponSkrubPP
|
||||
from .sponsorblock import SponsorBlockPP
|
||||
from .xattrpp import XAttrMetadataPP
|
||||
from ..plugins import load_plugins
|
||||
from ..globals import plugin_pps, postprocessors
|
||||
from ..plugins import PACKAGE_NAME, register_plugin_spec, PluginSpec
|
||||
from ..utils import deprecation_warning
|
||||
|
||||
_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP')
|
||||
|
||||
def __getattr__(name):
|
||||
lookup = plugin_pps.value
|
||||
if name in lookup:
|
||||
deprecation_warning(
|
||||
f'Importing a plugin Post-Processor from {__name__} is deprecated. '
|
||||
f'Please import {PACKAGE_NAME}.postprocessor.{name} instead.')
|
||||
return lookup[name]
|
||||
|
||||
raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
|
||||
|
||||
|
||||
def get_postprocessor(key):
|
||||
return globals()[key + 'PP']
|
||||
return postprocessors.value[key + 'PP']
|
||||
|
||||
|
||||
globals().update(_PLUGIN_CLASSES)
|
||||
__all__ = [name for name in globals() if name.endswith('PP')]
|
||||
__all__.extend(('FFmpegPostProcessor', 'PostProcessor'))
|
||||
register_plugin_spec(PluginSpec(
|
||||
module_name='postprocessor',
|
||||
suffix='PP',
|
||||
destination=postprocessors,
|
||||
plugin_destination=plugin_pps,
|
||||
))
|
||||
|
||||
_default_pps = {
|
||||
name: value
|
||||
for name, value in globals().items()
|
||||
if name.endswith('PP') or name in ('FFmpegPostProcessor', 'PostProcessor')
|
||||
}
|
||||
postprocessors.value.update(_default_pps)
|
||||
|
||||
__all__ = list(_default_pps.values())
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
_configuration_args,
|
||||
deprecation_warning,
|
||||
)
|
||||
from ..utils._utils import _ProgressState
|
||||
|
||||
|
||||
class PostProcessorMetaClass(type):
|
||||
|
@ -189,7 +190,7 @@ def report_progress(self, s):
|
|||
|
||||
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
|
||||
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
|
||||
progress_dict))
|
||||
progress_dict), _ProgressState.from_dict(s), s.get('_percent'))
|
||||
|
||||
def _retry_download(self, err, count, retries):
|
||||
# While this is not an extractor, it behaves similar to one and
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
import datetime as dt
|
||||
import email.header
|
||||
import email.utils
|
||||
import enum
|
||||
import errno
|
||||
import functools
|
||||
import hashlib
|
||||
|
@ -51,6 +52,7 @@
|
|||
compat_HTMLParseError,
|
||||
)
|
||||
from ..dependencies import xattr
|
||||
from ..globals import IN_CLI
|
||||
|
||||
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001: Pretend to be the parent module
|
||||
|
||||
|
@ -1486,8 +1488,7 @@ def write_string(s, out=None, encoding=None):
|
|||
|
||||
# TODO: Use global logger
|
||||
def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
|
||||
from .. import _IN_CLI
|
||||
if _IN_CLI:
|
||||
if IN_CLI.value:
|
||||
if msg in deprecation_warning._cache:
|
||||
return
|
||||
deprecation_warning._cache.add(msg)
|
||||
|
@ -4890,10 +4891,6 @@ class Config:
|
|||
filename = None
|
||||
__initialized = False
|
||||
|
||||
# Internal only, do not use! Hack to enable --plugin-dirs
|
||||
# TODO(coletdjnz): remove when plugin globals system is implemented
|
||||
_plugin_dirs = None
|
||||
|
||||
def __init__(self, parser, label=None):
|
||||
self.parser, self.label = parser, label
|
||||
self._loaded_paths, self.configs = set(), []
|
||||
|
@ -5631,6 +5628,24 @@ def filesize_from_tbr(tbr, duration):
|
|||
return int(duration * tbr * (1000 / 8))
|
||||
|
||||
|
||||
def _request_dump_filename(url, video_id, data=None, trim_length=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = trim_length or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode()).hexdigest()
|
||||
basen = basen[:trim_length - len(h)] + h
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
return filename
|
||||
|
||||
|
||||
# XXX: Temporary
|
||||
class _YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
|
@ -5659,3 +5674,32 @@ def stdout(self, message):
|
|||
def stderr(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_stderr(message)
|
||||
|
||||
|
||||
class _ProgressState(enum.Enum):
|
||||
"""
|
||||
Represents a state for a progress bar.
|
||||
|
||||
See: https://conemu.github.io/en/AnsiEscapeCodes.html#ConEmu_specific_OSC
|
||||
"""
|
||||
|
||||
HIDDEN = 0
|
||||
INDETERMINATE = 3
|
||||
VISIBLE = 1
|
||||
WARNING = 4
|
||||
ERROR = 2
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, s, /):
|
||||
if s['status'] == 'finished':
|
||||
return cls.INDETERMINATE
|
||||
|
||||
# Not currently used
|
||||
if s['status'] == 'error':
|
||||
return cls.ERROR
|
||||
|
||||
return cls.INDETERMINATE if s.get('_percent') is None else cls.VISIBLE
|
||||
|
||||
def get_ansi_escape(self, /, percent=None):
|
||||
percent = 0 if percent is None else int(percent)
|
||||
return f'\033]9;4;{self.value};{percent}\007'
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2025.01.26'
|
||||
__version__ = '2025.02.19'
|
||||
|
||||
RELEASE_GIT_HEAD = '3b4531934465580be22937fecbb6e1a3a9e2334f'
|
||||
RELEASE_GIT_HEAD = '4985a4041770eaa0016271809a1fd950dc809a55'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
|
@ -12,4 +12,4 @@
|
|||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2025.01.26'
|
||||
_pkg_version = '2025.02.19'
|
||||
|
|
Loading…
Reference in a new issue