mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
[extractor] Add MSN extractor
Added a new extractor for MSN video content, supporting direct MP4 downloads and embedded video handling.
This commit is contained in:
parent
feb8ffdd46
commit
e02fde72da
1 changed files with 10 additions and 6 deletions
|
@ -75,6 +75,8 @@ def _real_extract(self, url):
|
|||
json_data = self._download_json(
|
||||
json_url, page_id, note='Downloading video metadata', errnote='Unable to fetch video metadata'
|
||||
)
|
||||
# Optional debug: Uncomment to inspect JSON response
|
||||
# self._downloader.to_screen(f"JSON data: {json_data}")
|
||||
except ExtractorError as e:
|
||||
self.report_warning(f'JSON metadata fetch failed: {str(e)}. Falling back to webpage parsing.')
|
||||
json_data = {}
|
||||
|
@ -153,12 +155,14 @@ def _extract_embedded_urls(self, webpage, video_id):
|
|||
"""Extract URLs of embedded videos (e.g., YouTube, Dailymotion) from the webpage."""
|
||||
embed_urls = []
|
||||
# Use re.findall to extract all iframe src attributes
|
||||
iframe_matches = re.findall(r'<iframe[^>]+src=["\'](.*?)["\']', webpage)
|
||||
for iframe_src in iframe_matches:
|
||||
embed_url = url_or_none(iframe_src)
|
||||
if embed_url and any(host in embed_url for host in ('youtube.com', 'dailymotion.com', 'nbcsports.com')):
|
||||
embed_urls.append(embed_url)
|
||||
return embed_urls
|
||||
iframe_matches = re.findall(r'<iframe[^>]+src=["\'](.*?)["\']', webpage)
|
||||
for iframe_src in iframe_matches:
|
||||
embed_url = url_or_none(iframe_src)
|
||||
if embed_url and any(host in embed_url for host in ('youtube.com', 'dailymotion.com', 'nbcsports.com')):
|
||||
embed_urls.append(embed_url)
|
||||
# Optional debug: Uncomment to inspect found URLs
|
||||
# self._downloader.to_screen(f"Found embedded URLs: {embed_urls}")
|
||||
return embed_urls
|
||||
|
||||
|
||||
# Optional: Add to yt-dlp's extractor list if this is a standalone file
|
||||
|
|
Loading…
Reference in a new issue