diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3ab0f5efa..89665381b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -668,6 +668,9 @@ from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE +from .fosdem import ( + FosdemIE +) from .fourtube import ( FourTubeIE, FuxIE, diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py new file mode 100644 index 000000000..7eca96984 --- /dev/null +++ b/yt_dlp/extractor/fosdem.py @@ -0,0 +1,104 @@ +from .common import InfoExtractor +import re + + +class FosdemIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/(?P\d{4})/schedule/(?Ptrack|event)/(?P[\w.-]+)' + _TESTS = [ + { + 'url': 'https://archive.fosdem.org/2022/schedule/event/firmware_updates_for_opnsense_and_pfsense/', + 'info_dict': { + 'id': 'firmware_updates_for_opnsense_and_pfsense', + 'ext': 'webm', + 'title': 'Firmware updates for OPNsense and pfSense with fwupd/LVFS', + 'thumbnail': None, + 'release_date': '2022', + 'cast': ['Norbert Kamiński'], + 'uploader': 'FOSDEM', + 'description': 'md5:06a533c1dd130b9b9aa75a8c50c2625f', + } + }, + { + 'url': 'https://fosdem.org/2023/schedule/event/microkernel2023/', + 'info_dict': { + 'id': 'microkernel2023', + 'ext': 'webm', + 'title': 'The Microkernel Landscape in 2023', + 'thumbnail': None, + 'release_date': '2023', + 'uploader': 'FOSDEM', + 'cast': ['Martin Děcký'], + 'description': 'md5:dd38c1219fe9cc4aa18b2ef51f70f24c' + } + }, + { + 'url': 'https://fosdem.org/2023/schedule/event/hwacceluk/', + 'info_dict': { + 'id': 'hwacceluk', + 'ext': 'webm', + 'title': 'Hardware acceleration for Unikernels', + 'thumbnail': None, + 'release_date': '2023', + 'cast': ['Anastassios Nanos', 'Charalampos Mainas'], + 'uploader': 'FOSDEM', + 'description': 'md5:0e4d502d9aadd42d844407b49fab276c' + } + }, + { + 'url': 'https://fosdem.org/2023/schedule/track/microkernel_and_component_based_os/', + 'playlist_count': 11, + 'info_dict': { + 'id': 'microkernel_and_component_based_os', + 'title': 'Microkernel and Component-based OS devroom', + } + } + ] + + def _real_extract(self, url): + video_id, url_type, year = self._match_valid_url(url).group('id', 'type', 'year') + webpage = self._download_webpage(url, video_id) + title_rgx = r'
\n\s+

(.+?)

' + title = self._html_search_regex(title_rgx, webpage, 'title') \ + or self._og_search_title(webpage) + if url_type == 'event': + evnt_blurb_rgx = r'
\n*(?P(
(

(.+?)

\n*)+
)+\n*(
(

(.+?)

\n*)*
))+\n*
' + evnt_blurb = self._html_search_regex(evnt_blurb_rgx, + webpage, + 'event blurb', + group='blurb', + flags=re.DOTALL, + fatal=False) + description = evnt_blurb + video_url_rgx = r'
  • ' + video_url = self._html_search_regex(video_url_rgx, + webpage, + 'video url') + cast_rgx = r'(?P\w+ \w+)' + cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) or [] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': 'FOSDEM', + 'url': video_url, + 'thumbnail': None, + 'release_date': year, + 'cast': cast, + 'webpage_url': url, + } + elif url_type == 'track': + events_rgx = r'/[0-9]+/schedule/event/[a-z0-9]+/)' + events_slugs = re.findall(events_rgx, webpage) or [] + + if len(events_slugs) > 0: + events_urls = ['https://fosdem.org' + slug for slug in events_slugs] + entries = [] + for event_url in events_urls: + entries.append(self.url_result(event_url, 'Fosdem')) + return self.playlist_result(entries, + playlist_id=video_id, + playlist_title=title, + playlist_description=None) + else: + print(f'The {url_type} is not supported')