1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

abstract facebook _extract_metadata method out from inside '_extract_from_url'

This commit is contained in:
Patrick Robertson 2025-02-04 14:07:08 +01:00
parent 03c3d70577
commit 2d02bdceee

View file

@ -478,11 +478,7 @@ def _perform_login(self, username, password):
self.report_warning(f'unable to log in: {err}')
return
def _extract_from_url(self, url, video_id):
webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
def extract_metadata(webpage):
def _extract_metadata(self, webpage, video_id):
post_data = [self._parse_json(j, video_id, fatal=False) for j in re.findall(
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
post = traverse_obj(post_data, (
@ -543,6 +539,10 @@ def extract_metadata(webpage):
or (description or '').replace('\n', ' ') or f'Facebook video #{video_id}')
return merge_dicts(info_json_ld, info_dict)
def _extract_from_url(self, url, video_id):
webpage = self._download_webpage(
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
video_data = None
def extract_video_data(instances):
@ -753,7 +753,7 @@ def parse_attachment(attachment, key='media'):
return self.playlist_result(entries, video_id)
video_info = entries[0] if entries else {'id': video_id}
webpage_info = extract_metadata(webpage)
webpage_info = self._extract_metadata(webpage, video_id)
# honor precise duration in video info
if video_info.get('duration'):
webpage_info['duration'] = video_info['duration']
@ -885,7 +885,7 @@ def parse_attachment(attachment, key='media'):
'subtitles': subtitles,
}
process_formats(info_dict)
info_dict.update(extract_metadata(webpage))
info_dict.update(self._extract_metadata(webpage, video_id))
return info_dict