diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 708a04f92..833b92ebb 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1193,6 +1193,64 @@ def test_selection(params, expected_ids, evaluate_all=False): test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) test_selection({'playlist_items': '-15::15'}, [], True) + def test_playlist_items_selection_nested(self): + ENTRIES_PER_PLAYLIST = 5 + ALL_IDS = [f'{i + 1}-{j + 1}' for i in range(ENTRIES_PER_PLAYLIST) for j in range(ENTRIES_PER_PLAYLIST)] + + def get_downloaded_info_dicts(params, entries): + ydl = YDL(params) + ydl.process_ie_result({ + '_type': 'playlist', + 'id': 'test', + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': 'http://example.com', + 'entries': entries, + }) + return ydl.downloaded_info_dicts + + def generate_entries(depth, cur_index=()): + entries = [] + for i in range(1, ENTRIES_PER_PLAYLIST + 1): + idx = (*cur_index, i) + name = '-'.join(map(str, idx)) + if depth == 1: + entries.append({ + 'id': name, + 'title': name, + 'url': TEST_URL, + }) + else: + entries.append({ + '_type': 'playlist', + 'id': name, + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': f'http://example.com/{name}', + 'entries': generate_entries(depth - 1, idx), + }) + return entries + + def test_selection(selection_str, expected_ids, depth=2): + entries = generate_entries(depth) + ret = get_downloaded_info_dicts({'playlist_items': selection_str}, entries) + self.assertEqual([info['id'] for info in ret], expected_ids) + + test_selection('[:]', ALL_IDS) + test_selection('[:][:]', ALL_IDS) + test_selection('[:][1]', ['1-1', '2-1', '3-1', '4-1', '5-1']) + test_selection('[2][:]', ['2-1', '2-2', '2-3', '2-4', '2-5']) + test_selection('[3][5]', ['3-5']) + test_selection('[1][2],[3][1]', ['1-2', '3-1']) + test_selection('[1::2][3::-2]', ['1-3', '1-1', '3-3', '3-1', '5-3', '5-1']) + test_selection('[6]', []) + test_selection('[:][6]', []) + test_selection('[6][:]', []) + test_selection('[1:2][1],[1:2][4]', ['1-1', '1-4', '2-1', '2-4']) + test_selection('[1][1]', ['1-1-1', '1-1-2', '1-1-3', '1-1-4', '1-1-5'], 3) + test_selection('[1][:][1]', ['1-1-1', '1-2-1', '1-3-1', '1-4-1', '1-5-1'], 3) + test_selection('[:][:][6]', [], 3) + def test_do_not_override_ie_key_in_url_transparent(self): ydl = YDL() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8790b326b..859ee4b26 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -646,6 +646,7 @@ def __init__(self, params=None, auto_init=True): self._num_downloads = 0 self._num_videos = 0 self._playlist_level = 0 + self._nested_playlist_index = () self._playlist_urls = set() self.cache = Cache(self) self.__header_cookies = [] @@ -2004,7 +2005,7 @@ def __process_playlist(self, ie_result, download): self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') all_entries = PlaylistEntries(self, ie_result) - entries = orderedSet(all_entries.get_requested_items(), lazy=True) + entries = orderedSet(all_entries.get_requested_items(self._nested_playlist_index), lazy=True) lazy = self.params.get('lazy_playlist') if lazy: @@ -2081,10 +2082,13 @@ def __process_playlist(self, ie_result, download): f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') + self._nested_playlist_index = (*self._nested_playlist_index, playlist_index) entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }, extra)) + self._nested_playlist_index = self._nested_playlist_index[:-1] + if not entry_result: failures += 1 if failures >= max_failures: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 7d8f10047..51c8c80cd 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -438,7 +438,7 @@ def metadataparser_actions(f): if opts.playlist_items is not None: try: - tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) + tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items, ())) except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 4093c238c..b05e3a6b1 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2394,6 +2394,23 @@ def _getslice(self, start, end): yield from page_results +def index_in_slice_inclusive(idx: int, slice_: slice): + start, step, stop = slice_.start, slice_.step, slice_.stop + if start is None: + start = 0 + if step is None: + step = 1 + if stop is None or stop == math.inf or stop == -math.inf: + if (idx - start) % step != 0: + return False + if step > 0: + return idx >= start and stop != -math.inf + else: + return idx <= start and stop != math.inf + else: + return idx in range(start, int(stop) + 1, step) + + class PlaylistEntries: MissingEntry = object() is_exhausted = False @@ -2427,20 +2444,60 @@ def __init__(self, ydl, info_dict): (?::(?P[+-]?\d+))? )?''') + NESTED_PLAYLIST_RE = re.compile(r'''(?x) + (?:\[ + (?:[+-]?\d+)? + (?:[:-] + (?:[+-]?\d+|inf(?:inite)?)? + (?::(?:[+-]?\d+))? + )? + \])+''') + + NESTED_PLAYLIST_SEGMENT_RE = re.compile(r'''(?x) + \[ + (?P[+-]?\d+)? + (?P[:-] + (?P[+-]?\d+|inf(?:inite)?)? + (?::(?P[+-]?\d+))? + )? + \]''') + @classmethod - def parse_playlist_items(cls, string): - for segment in string.split(','): - if not segment: - raise ValueError('There is two or more consecutive commas') - mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment) - if not mobj: - raise ValueError(f'{segment!r} is not a valid specification') + def parse_playlist_items(cls, string, playlist_index): + + def slice_from_mobj(segment, mobj): start, end, step, has_range = mobj.group('start', 'end', 'step', 'range') if int_or_none(step) == 0: raise ValueError(f'Step in {segment!r} cannot be zero') - yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start) + return ( + slice(int_or_none(start), float_or_none(end), int_or_none(step)) + if has_range + else slice(int(start), int(start)) + ) - def get_requested_items(self): + for segment in string.split(','): + if not segment: + raise ValueError('There are two or more consecutive commas') + mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment) + if mobj: + yield slice_from_mobj(segment, mobj) + continue + + if not cls.NESTED_PLAYLIST_RE.fullmatch(segment): + raise ValueError(f'{segment!r} is not a valid specification') + + for depth, mobj in enumerate(cls.NESTED_PLAYLIST_SEGMENT_RE.finditer(segment)): + slice_ = slice_from_mobj(segment, mobj) + if depth == len(playlist_index): + yield slice_ + break + + if not index_in_slice_inclusive(playlist_index[depth], slice_): + break + else: + yield slice(None) + + def get_requested_items(self, playlist_index): playlist_items = self.ydl.params.get('playlist_items') playlist_start = self.ydl.params.get('playliststart', 1) playlist_end = self.ydl.params.get('playlistend') @@ -2452,7 +2509,7 @@ def get_requested_items(self): elif playlist_start != 1 or playlist_end: self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True) - for index in self.parse_playlist_items(playlist_items): + for index in self.parse_playlist_items(playlist_items, playlist_index): for i, entry in self[index]: yield i, entry if not entry: