From ec7250f145c5ce7fdab666be7f3929ebdc0c4473 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 1 Jan 2025 16:04:42 -0500 Subject: [PATCH 1/3] Implement nested --playlist-items --- yt_dlp/YoutubeDL.py | 6 +++- yt_dlp/__init__.py | 2 +- yt_dlp/utils/_utils.py | 78 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 764baf3a0..0e9a9cd6c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -635,6 +635,7 @@ def __init__(self, params=None, auto_init=True): self._num_downloads = 0 self._num_videos = 0 self._playlist_level = 0 + self._nested_playlist_index = () self._playlist_urls = set() self.cache = Cache(self) self.__header_cookies = [] @@ -1987,7 +1988,7 @@ def __process_playlist(self, ie_result, download): self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') all_entries = PlaylistEntries(self, ie_result) - entries = orderedSet(all_entries.get_requested_items(), lazy=True) + entries = orderedSet(all_entries.get_requested_items(self._nested_playlist_index), lazy=True) lazy = self.params.get('lazy_playlist') if lazy: @@ -2064,10 +2065,13 @@ def __process_playlist(self, ie_result, download): f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') + self._nested_playlist_index = (*self._nested_playlist_index, playlist_index) entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }, extra)) + self._nested_playlist_index = self._nested_playlist_index[:-1] + if not entry_result: failures += 1 if failures >= max_failures: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..bbceb661d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -431,7 +431,7 @@ def metadataparser_actions(f): # Other options if opts.playlist_items is not None: try: - tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) + tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items, ())) except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 699bf1e7f..67e9f48ec 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2390,6 +2390,23 @@ def _getslice(self, start, end): yield from page_results +def index_in_slice_inclusive(idx: int, slice_: slice): + start, step, stop = slice_.start, slice_.step, slice_.stop + if start is None: + start = 0 + if step is None: + step = 1 + if stop is None or stop == math.inf or stop == -math.inf: + if (idx - start) % step != 0: + return False + if step > 0: + return idx >= start and stop != -math.inf + else: + return idx <= start and stop != math.inf + else: + return idx in range(start, int(stop) + 1, step) + + class PlaylistEntries: MissingEntry = object() is_exhausted = False @@ -2423,20 +2440,61 @@ def __init__(self, ydl, info_dict): (?::(?P[+-]?\d+))? )?''') + NESTED_PLAYLIST_RE = re.compile(r'''(?x) + (?:\[ + (?:[+-]?\d+)? + (?:[:-] + (?:[+-]?\d+|inf(?:inite)?)? + (?::(?:[+-]?\d+))? + )? + \])+''') + + NESTED_PLAYLIST_SEGMENT_RE = re.compile(r'''(?x) + \[ + (?P[+-]?\d+)? + (?P[:-] + (?P[+-]?\d+|inf(?:inite)?)? + (?::(?P[+-]?\d+))? + )? + \]''') + @classmethod - def parse_playlist_items(cls, string): + def parse_playlist_items(cls, string, playlist_index): for segment in string.split(','): if not segment: - raise ValueError('There is two or more consecutive commas') + raise ValueError('There are two or more consecutive commas') mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment) - if not mobj: - raise ValueError(f'{segment!r} is not a valid specification') - start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') - if int_or_none(step) == 0: - raise ValueError(f'Step in {segment!r} cannot be zero') - yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start) + if mobj: + start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') + if int_or_none(step) == 0: + raise ValueError(f'Step in {segment!r} cannot be zero') + yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start) + continue - def get_requested_items(self): + if not cls.NESTED_PLAYLIST_RE.fullmatch(segment): + raise ValueError(f'{segment!r} is not a valid specification') + + for depth, mobj in enumerate(cls.NESTED_PLAYLIST_SEGMENT_RE.finditer(segment)): + start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') + if int_or_none(step) == 0: + raise ValueError(f'Step in {segment!r} cannot be zero') + + slice_ = ( + slice(int_or_none(start), float_or_none(end), int_or_none(step)) + if has_range + else slice(int(start), int(start)) + ) + + if depth == len(playlist_index): + yield slice_ + break + + if not index_in_slice_inclusive(playlist_index[depth], slice_): + break + else: + yield slice(None) + + def get_requested_items(self, playlist_index): playlist_items = self.ydl.params.get('playlist_items') playlist_start = self.ydl.params.get('playliststart', 1) playlist_end = self.ydl.params.get('playlistend') @@ -2448,7 +2506,7 @@ def get_requested_items(self): elif playlist_start != 1 or playlist_end: self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True) - for index in self.parse_playlist_items(playlist_items): + for index in self.parse_playlist_items(playlist_items, playlist_index): for i, entry in self[index]: yield i, entry if not entry: From 460ff550145a3984957d1053ce54c19412fb9275 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 1 Jan 2025 17:47:30 -0500 Subject: [PATCH 2/3] Reduce code dup --- yt_dlp/utils/_utils.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 67e9f48ec..c9f809570 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2460,31 +2460,30 @@ def __init__(self, ydl, info_dict): @classmethod def parse_playlist_items(cls, string, playlist_index): + + def slice_from_mobj(segment, mobj): + start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') + if int_or_none(step) == 0: + raise ValueError(f'Step in {segment!r} cannot be zero') + return ( + slice(int_or_none(start), float_or_none(end), int_or_none(step)) + if has_range + else slice(int(start), int(start)) + ) + for segment in string.split(','): if not segment: raise ValueError('There are two or more consecutive commas') mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment) if mobj: - start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') - if int_or_none(step) == 0: - raise ValueError(f'Step in {segment!r} cannot be zero') - yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start) + yield slice_from_mobj(segment, mobj) continue if not cls.NESTED_PLAYLIST_RE.fullmatch(segment): raise ValueError(f'{segment!r} is not a valid specification') for depth, mobj in enumerate(cls.NESTED_PLAYLIST_SEGMENT_RE.finditer(segment)): - start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') - if int_or_none(step) == 0: - raise ValueError(f'Step in {segment!r} cannot be zero') - - slice_ = ( - slice(int_or_none(start), float_or_none(end), int_or_none(step)) - if has_range - else slice(int(start), int(start)) - ) - + slice_ = slice_from_mobj(segment, mobj) if depth == len(playlist_index): yield slice_ break From 5358fd1bbcd1821647bb2999237711c255371695 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Wed, 1 Jan 2025 19:56:49 -0500 Subject: [PATCH 3/3] Add tests --- test/test_YoutubeDL.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 6b022a7ea..7dfc0eef8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1166,6 +1166,64 @@ def test_selection(params, expected_ids, evaluate_all=False): test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) test_selection({'playlist_items': '-15::15'}, [], True) + def test_playlist_items_selection_nested(self): + ENTRIES_PER_PLAYLIST = 5 + ALL_IDS = [f'{i + 1}-{j + 1}' for i in range(ENTRIES_PER_PLAYLIST) for j in range(ENTRIES_PER_PLAYLIST)] + + def get_downloaded_info_dicts(params, entries): + ydl = YDL(params) + ydl.process_ie_result({ + '_type': 'playlist', + 'id': 'test', + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': 'http://example.com', + 'entries': entries, + }) + return ydl.downloaded_info_dicts + + def generate_entries(depth, cur_index=()): + entries = [] + for i in range(1, ENTRIES_PER_PLAYLIST + 1): + idx = (*cur_index, i) + name = '-'.join(map(str, idx)) + if depth == 1: + entries.append({ + 'id': name, + 'title': name, + 'url': TEST_URL, + }) + else: + entries.append({ + '_type': 'playlist', + 'id': name, + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': f'http://example.com/{name}', + 'entries': generate_entries(depth - 1, idx), + }) + return entries + + def test_selection(selection_str, expected_ids, depth=2): + entries = generate_entries(depth) + ret = get_downloaded_info_dicts({'playlist_items': selection_str}, entries) + self.assertEqual([info['id'] for info in ret], expected_ids) + + test_selection('[:]', ALL_IDS) + test_selection('[:][:]', ALL_IDS) + test_selection('[:][1]', ['1-1', '2-1', '3-1', '4-1', '5-1']) + test_selection('[2][:]', ['2-1', '2-2', '2-3', '2-4', '2-5']) + test_selection('[3][5]', ['3-5']) + test_selection('[1][2],[3][1]', ['1-2', '3-1']) + test_selection('[1::2][3::-2]', ['1-3', '1-1', '3-3', '3-1', '5-3', '5-1']) + test_selection('[6]', []) + test_selection('[:][6]', []) + test_selection('[6][:]', []) + test_selection('[1:2][1],[1:2][4]', ['1-1', '1-4', '2-1', '2-4']) + test_selection('[1][1]', ['1-1-1', '1-1-2', '1-1-3', '1-1-4', '1-1-5'], 3) + test_selection('[1][:][1]', ['1-1-1', '1-2-1', '1-3-1', '1-4-1', '1-5-1'], 3) + test_selection('[:][:][6]', [], 3) + def test_do_not_override_ie_key_in_url_transparent(self): ydl = YDL()