diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 5e9a4d0dc..c3ffa15ed 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -70,7 +70,6 @@ class FacebookIE(InfoExtractor): IE_NAME = 'facebook' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' _TESTS = [{ 'url': 'https://www.facebook.com/radiokicksfm/videos/3676516585958356/', @@ -238,7 +237,7 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '1569199726448814', 'ext': 'mp4', - 'title': 'Pence MUST GO!', + 'title': 'Trump/Musk & Vance MUST GO!', 'description': 'Vickie Gentry shared a memory.', 'timestamp': 1511548260, 'upload_date': '20171124', @@ -413,6 +412,13 @@ class FacebookIE(InfoExtractor): }, { 'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl', 'only_matching': True, + }, { + 'url': 'https://www.facebook.com/WatchESLOne/videos/297860117405429/', + 'info_dict': { + 'id': '297860117405429', + }, + 'playlist_count': 1, + 'skip': 'URL that previously required tahoe player, but currently not working. More info: https://github.com/ytdl-org/youtube-dl/issues/15441', }] _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _api_config = { @@ -673,15 +679,15 @@ def _extract_from_jsmods_instances(self, js_data): return self._extract_video_data(try_get( js_data, lambda x: x['jsmods']['instances'], list) or []) - def _yield_all_relay_data(self, _filter, video_id, webpage): + def _yield_all_relay_data(self, _filter, webpage): for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})', webpage): - yield self._parse_json(relay_data, video_id, fatal=False) or {} + yield self._parse_json(relay_data, None, fatal=False) or {} - def _extract_relay_prefetched_data(self, _filter, video_id, webpage, target_keys=None): + def _extract_relay_prefetched_data(self, _filter, webpage, target_keys=None): path = 'data' if target_keys is not None: path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys)) - return traverse_obj(self, self._yield_all_relay_data(_filter, video_id, webpage), ( + return traverse_obj(self._yield_all_relay_data(_filter, webpage), ( ..., 'require', (None, (..., ..., ..., '__bbox', 'require')), lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), ..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {} @@ -709,7 +715,6 @@ def _extract_from_url(self, url, video_id): if not video_data: data = self._extract_relay_prefetched_data( r'"(?:dash_manifest|playable_url(?:_quality_hd)?)', - video_id, webpage, target_keys=('video', 'event', 'nodes', 'node', 'mediaset')) if data: @@ -781,13 +786,13 @@ def parse_attachment(attachment, key='media'): }), } - prefetched_data = self._extract_relay_prefetched_data(r'"login_data"\s*:\s*{', video_id, webpage) + prefetched_data = self._extract_relay_prefetched_data(r'"login_data"\s*:\s*{', webpage) if prefetched_data: lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict) if lsd: post_data[lsd['name']] = lsd['value'] - relay_data = next(filter(None, self._yield_all_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,', video_id, webpage)), {}) + relay_data = next(filter(None, self._yield_all_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,', webpage)), {}) for define in (relay_data.get('define') or []): if define[0] == 'RelayAPIConfigDefaults': @@ -810,33 +815,6 @@ def parse_attachment(attachment, key='media'): return self.playlist_result(entries, video_id) - if not video_data: - # Video info not in first request, do a secondary request using - # tahoe player specific URL - tahoe_data = self._download_webpage( - self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id, - data=urlencode_postdata({ - '__a': 1, - '__pc': self._search_regex( - r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, - 'pkg cohort', default='PHASED:DEFAULT'), - '__rev': self._search_regex( - r'client_revision["\']\s*:\s*(\d+),', webpage, - 'client revision', default='3944515'), - 'fb_dtsg': self._search_regex( - r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', - webpage, 'dtsg token', default=''), - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - tahoe_js_data = self._parse_json( - self._search_regex( - r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data, - 'tahoe js data', default='{}'), - video_id, fatal=False) - video_data = self._extract_from_jsmods_instances(tahoe_js_data) - if not video_data: raise ExtractorError('Cannot parse data')