diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 3cd70d0c7..fc8921d88 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -689,7 +689,7 @@ def _query_vars_for(data): class InstagramStoryIE(InstagramBaseIE): - _VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P[^/]+)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P[^/]+)(?:/(?P\d+))?' IE_NAME = 'instagram:story' _TESTS = [{ @@ -699,23 +699,34 @@ class InstagramStoryIE(InstagramBaseIE): 'title': 'Rare', }, 'playlist_mincount': 50, + }, { + 'url': 'https://www.instagram.com/stories/fruits_zipper/3570766765028588805/', + 'only_matching': True, + }, { + 'url': 'https://www.instagram.com/stories/fruits_zipper', + 'only_matching': True, }] def _real_extract(self, url): username, story_id = self._match_valid_url(url).groups() - story_info = self._download_webpage(url, story_id) - user_info = self._search_json(r'"user":', story_info, 'user info', story_id, fatal=False) + story_info = self._download_webpage(url, story_id or username) + user_info = self._search_json(r'"user":', story_info, 'user info', story_id or username, fatal=False) if not user_info: self.raise_login_required('This content is unreachable') user_id = traverse_obj(user_info, 'pk', 'id', expected_type=str) - story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}' - if not story_info_url: # user id is only mandatory for non-highlights - raise ExtractorError('Unable to extract user id') + if username == 'highlights': + story_info_url = f'highlight:{story_id}' + if not story_id: # story id is only mandatory for highlights + raise ExtractorError('Missing highlight id') + else: + if not user_id: # user id is only mandatory for non-highlights + raise ExtractorError('Unable to extract user id') + story_info_url = user_id videos = traverse_obj(self._download_json( f'{self._API_BASE_URL}/feed/reels_media/?reel_ids={story_info_url}', - story_id, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels') + story_id or username, errnote=False, fatal=False, headers=self._API_HEADERS), 'reels') if not videos: self.raise_login_required('You need to log in to access this content') @@ -724,7 +735,7 @@ def _real_extract(self, url): if not story_title: story_title = f'Story by {username}' - highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items', lambda _, v: v['pk'] == story_id)) + highlights = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (user_id, 'items')) info_data = [] for highlight in highlights: highlight_data = self._extract_product(highlight) @@ -734,4 +745,7 @@ def _real_extract(self, url): 'uploader_id': user_id, **filter_dict(highlight_data), }) + if username != 'highlights' and story_id and not self._yes_playlist(story_title, story_id): + return traverse_obj(info_data, (lambda _, v: v['id'] == _pk_to_id(story_id), any)) + return self.playlist_result(info_data, playlist_id=story_id, playlist_title=story_title)