1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

[extractor] Add MSN extractor

Added a new extractor for MSN video content, supporting direct MP4 downloads and embedded video handling.
This commit is contained in:
thedenv 2025-03-02 02:07:18 +00:00
parent feb8ffdd46
commit e02fde72da

View file

@ -75,6 +75,8 @@ def _real_extract(self, url):
json_data = self._download_json(
json_url, page_id, note='Downloading video metadata', errnote='Unable to fetch video metadata'
)
# Optional debug: Uncomment to inspect JSON response
# self._downloader.to_screen(f"JSON data: {json_data}")
except ExtractorError as e:
self.report_warning(f'JSON metadata fetch failed: {str(e)}. Falling back to webpage parsing.')
json_data = {}
@ -153,12 +155,14 @@ def _extract_embedded_urls(self, webpage, video_id):
"""Extract URLs of embedded videos (e.g., YouTube, Dailymotion) from the webpage."""
embed_urls = []
# Use re.findall to extract all iframe src attributes
iframe_matches = re.findall(r'<iframe[^>]+src=["\'](.*?)["\']', webpage)
for iframe_src in iframe_matches:
embed_url = url_or_none(iframe_src)
if embed_url and any(host in embed_url for host in ('youtube.com', 'dailymotion.com', 'nbcsports.com')):
embed_urls.append(embed_url)
return embed_urls
iframe_matches = re.findall(r'<iframe[^>]+src=["\'](.*?)["\']', webpage)
for iframe_src in iframe_matches:
embed_url = url_or_none(iframe_src)
if embed_url and any(host in embed_url for host in ('youtube.com', 'dailymotion.com', 'nbcsports.com')):
embed_urls.append(embed_url)
# Optional debug: Uncomment to inspect found URLs
# self._downloader.to_screen(f"Found embedded URLs: {embed_urls}")
return embed_urls
# Optional: Add to yt-dlp's extractor list if this is a standalone file