From 33aac01e309d79f4bb858307ddbccf5b0c404d00 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 29 Apr 2023 10:26:48 -0400 Subject: [PATCH 01/14] [extractor/fosdem] Added FOSDEM extractor --- yt_dlp/extractor/_extractors.py | 3 ++ yt_dlp/extractor/fosdem.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 yt_dlp/extractor/fosdem.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 09903423d..d2d1a8ff2 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -603,6 +603,9 @@ from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE +from .fosdem import ( + FosdemIE +) from .fourtube import ( FourTubeIE, PornTubeIE, diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py new file mode 100644 index 000000000..1e4c5cf2e --- /dev/null +++ b/yt_dlp/extractor/fosdem.py @@ -0,0 +1,68 @@ +from .common import InfoExtractor +import pdb + +class FosdemIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/[0-9]{4}/schedule/(?Ptrack|event)/(?P[\w\.-_]+)/' + _TESTS = [ + { + 'url': 'https://archive.fosdem.org/2022/schedule/event/firmware_updates_for_opnsense_and_pfsense/', + 'info_dict': { + 'id': 'firmware_updates_for_opnsense_and_pfsense', + 'ext': 'webm', + 'title': 'Firmware updates for OPNsense and pfSense with fwupd/LVFS', + 'thumbnail': None, + 'uploader': 'FOSDEM', + 'description': "This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.", + } + }, + { + 'url': 'https://fosdem.org/2023/schedule/event/microkernel2023/', + 'info_dict': { + 'id': 'microkernel2023', + 'ext': 'webm', + 'title': 'The Microkernel Landscape in 2023', + 'thumbnail': None, + 'uploader': 'FOSDEM', + 'description': 'The idea of the microkernel OS architecture is more that 50 years old and the term itself is just a few years younger. Over the years, it has been implemented in countless variants and modifications, it has served as a basis for intriguing OS experiments, it has gained strong position in the mission-critical and safety-critical areas and while it is still not the dominant architecture in the general-purpose desktop OS domain, it has had major influence on the "mainstream" operating systems as well.\nThis talk, however, is not about the history. Instead, we describe where are the microkernel-based operating systems today, who works on them and why, who uses them in production and why, where they aim for the future. The purpose of this talk is also to present the basic practical experiences with the existing microkernel-based operating systems — not to compare them, but to provide the potential users and contributors with an initial sorted list of operating systems they should look into in more detail depending on their needs.' + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + groups = self._match_valid_url(url).groupdict() + webpage = self._download_webpage(url, video_id) + if groups['url_type'] == 'event': + print("This is an event url") + elif groups['url_type'] == 'track': + print("This is a track") + # Download all videos on this page + else: + print("how did you get here?") + title_rgx = r"
\n\s+

(.+?)

" + title = self._html_search_regex(title_rgx, webpage, 'title') + print(f'TITLE: {title}') + evnt_blurb_rgx = r"
\n*(?P(
(

(.+?)

\n*)+
)+\n*(
(

(.+?)

\n*)*
))+\n*
" + evnt_blurb = self._html_search_regex(evnt_blurb_rgx, + webpage, + 'event blurb', + group='blurb') + description = evnt_blurb + print(f"DESCRIPTION: {description}") + video_url_rgx = r"
  • " + video_url = self._html_search_regex(video_url_rgx, + webpage, + 'video url') + print(f"VIDEO URL: {video_url}") + print('\n\n___________________________') + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': 'FOSDEM', + 'url': video_url, + 'thumbnail': None, + # TODO more properties (see yt_dlp/extractor/common.py) + # 'release_date': release_date, + # 'presenter/author + } From 36a1f6294c2352421b7080573627dea24d659f49 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sun, 30 Apr 2023 09:09:22 -0400 Subject: [PATCH 02/14] [extractor/fosdem] Use re.DOTALL for html search for description --- yt_dlp/extractor/fosdem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 1e4c5cf2e..4c96cd6d8 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +import re import pdb class FosdemIE(InfoExtractor): @@ -46,7 +47,7 @@ def _real_extract(self, url): evnt_blurb = self._html_search_regex(evnt_blurb_rgx, webpage, 'event blurb', - group='blurb') + group='blurb', flags=re.DOTALL) description = evnt_blurb print(f"DESCRIPTION: {description}") video_url_rgx = r"
  • " From 771fd0f0cc61c963eceeafab1b84d65922b0ecc7 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sun, 30 Apr 2023 09:09:52 -0400 Subject: [PATCH 03/14] [extractor/fosdem] Include year --- yt_dlp/extractor/fosdem.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 4c96cd6d8..bf57ca600 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -3,7 +3,7 @@ import pdb class FosdemIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/[0-9]{4}/schedule/(?Ptrack|event)/(?P[\w\.-_]+)/' + _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/(?P[0-9]{4})/schedule/(?Ptrack|event)/(?P[\w\.-_]+)/' _TESTS = [ { 'url': 'https://archive.fosdem.org/2022/schedule/event/firmware_updates_for_opnsense_and_pfsense/', @@ -40,6 +40,7 @@ def _real_extract(self, url): # Download all videos on this page else: print("how did you get here?") + year = groups['year'] title_rgx = r"
    \n\s+

    (.+?)

    " title = self._html_search_regex(title_rgx, webpage, 'title') print(f'TITLE: {title}') @@ -64,6 +65,6 @@ def _real_extract(self, url): 'url': video_url, 'thumbnail': None, # TODO more properties (see yt_dlp/extractor/common.py) - # 'release_date': release_date, + 'release_date': year, # 'presenter/author } From a1a330cd9c3f6d46d5cc22994e17870acecf2297 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sun, 30 Apr 2023 09:10:02 -0400 Subject: [PATCH 04/14] [extractor/fosdem] Add test that needs the re.DOTALL regex for description --- yt_dlp/extractor/fosdem.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index bf57ca600..6c710ddce 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -26,6 +26,17 @@ class FosdemIE(InfoExtractor): 'uploader': 'FOSDEM', 'description': 'The idea of the microkernel OS architecture is more that 50 years old and the term itself is just a few years younger. Over the years, it has been implemented in countless variants and modifications, it has served as a basis for intriguing OS experiments, it has gained strong position in the mission-critical and safety-critical areas and while it is still not the dominant architecture in the general-purpose desktop OS domain, it has had major influence on the "mainstream" operating systems as well.\nThis talk, however, is not about the history. Instead, we describe where are the microkernel-based operating systems today, who works on them and why, who uses them in production and why, where they aim for the future. The purpose of this talk is also to present the basic practical experiences with the existing microkernel-based operating systems — not to compare them, but to provide the potential users and contributors with an initial sorted list of operating systems they should look into in more detail depending on their needs.' } + }, + { + 'url':'https://fosdem.org/2023/schedule/event/hwacceluk/', + 'info_dict':{ + 'id': 'hwacceluk', + 'ext': 'webm', + 'title': 'Hardware acceleration for Unikernels', + 'thumbnail': None, + 'uploader': 'FOSDEM', + 'description': 'Unikernels promise fast boot times, small memory footprint and stronger security but lack in terms of manageability. Moreover, unikernels provide a non-generic environment for applications, with limited or no support for widely used libraries and OS features. This issue is even more apparent in the case of hardware acceleration. Acceleration libraries are often dynamically linked and have numerous dependencies, which directly contradict the statically linked notion of unikernels. Hardware acceleration functionality is almost non-existent in unikernel frameworks, mainly due to the absence of suitable virtualization solutions for such devices. ​ In this talk, we present an update on the vAccel framework we have built that can expose hardware acceleration semantics to workloads running on isolated sandboxes. We go through the components that comprise the framework and elaborate on the challenges in building such a software stack: we first present an overview of vAccel and how it works; then we focus on the porting effort of vAccel in various unikernel frameworks. Finally, we present a hardware acceleration abstraction that expose semantic acceleration functionality to workloads running as unikernels. ​ We will present a short demo of some popular algorithms running on top of Unikraft and vAccel show-casing the merits and trade-offs of this approach.' + } } ] From 03e4ca498a33db0f5a3287eab6df4b048a0143ff Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 6 May 2023 06:52:13 -0400 Subject: [PATCH 05/14] [extractor/fosdem] Move parsing logic --- yt_dlp/extractor/fosdem.py | 51 ++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 6c710ddce..3d166af0e 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -44,8 +44,6 @@ def _real_extract(self, url): video_id = self._match_id(url) groups = self._match_valid_url(url).groupdict() webpage = self._download_webpage(url, video_id) - if groups['url_type'] == 'event': - print("This is an event url") elif groups['url_type'] == 'track': print("This is a track") # Download all videos on this page @@ -54,28 +52,27 @@ def _real_extract(self, url): year = groups['year'] title_rgx = r"
    \n\s+

    (.+?)

    " title = self._html_search_regex(title_rgx, webpage, 'title') - print(f'TITLE: {title}') - evnt_blurb_rgx = r"
    \n*(?P(
    (

    (.+?)

    \n*)+
    )+\n*(
    (

    (.+?)

    \n*)*
    ))+\n*
    " - evnt_blurb = self._html_search_regex(evnt_blurb_rgx, - webpage, - 'event blurb', - group='blurb', flags=re.DOTALL) - description = evnt_blurb - print(f"DESCRIPTION: {description}") - video_url_rgx = r"
  • " - video_url = self._html_search_regex(video_url_rgx, - webpage, - 'video url') - print(f"VIDEO URL: {video_url}") - print('\n\n___________________________') - return { - 'id': video_id, - 'title': title, - 'description': description, - 'uploader': 'FOSDEM', - 'url': video_url, - 'thumbnail': None, - # TODO more properties (see yt_dlp/extractor/common.py) - 'release_date': year, - # 'presenter/author - } + if groups['url_type'] == 'event': + evnt_blurb_rgx = r"
    \n*(?P(
    (

    (.+?)

    \n*)+
    )+\n*(
    (

    (.+?)

    \n*)*
    ))+\n*
    " + evnt_blurb = self._html_search_regex(evnt_blurb_rgx, + webpage, + 'event blurb', + group='blurb', flags=re.DOTALL) + description = evnt_blurb + video_url_rgx = r"
  • " + video_url = self._html_search_regex(video_url_rgx, + webpage, + 'video url') + cast_rgx = r"(?P\w+ \w+)" + cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': 'FOSDEM', + 'url': video_url, + 'thumbnail': None, + 'release_date': year, + 'cast': cast, + 'webpage_url': url, + } From e15cbfb217b052bbb0df49f51eb832ee617673d4 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 6 May 2023 06:53:02 -0400 Subject: [PATCH 06/14] [extractor/fosdem] Added release date and cast to tests --- yt_dlp/extractor/fosdem.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 3d166af0e..8d9bd1ad3 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -12,6 +12,8 @@ class FosdemIE(InfoExtractor): 'ext': 'webm', 'title': 'Firmware updates for OPNsense and pfSense with fwupd/LVFS', 'thumbnail': None, + 'release_date': '2022', + 'cast': ['Norbert Kamiński'], 'uploader': 'FOSDEM', 'description': "This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.", } @@ -23,7 +25,9 @@ class FosdemIE(InfoExtractor): 'ext': 'webm', 'title': 'The Microkernel Landscape in 2023', 'thumbnail': None, + 'release_date': '2023', 'uploader': 'FOSDEM', + 'cast': ['Martin Děcký'], 'description': 'The idea of the microkernel OS architecture is more that 50 years old and the term itself is just a few years younger. Over the years, it has been implemented in countless variants and modifications, it has served as a basis for intriguing OS experiments, it has gained strong position in the mission-critical and safety-critical areas and while it is still not the dominant architecture in the general-purpose desktop OS domain, it has had major influence on the "mainstream" operating systems as well.\nThis talk, however, is not about the history. Instead, we describe where are the microkernel-based operating systems today, who works on them and why, who uses them in production and why, where they aim for the future. The purpose of this talk is also to present the basic practical experiences with the existing microkernel-based operating systems — not to compare them, but to provide the potential users and contributors with an initial sorted list of operating systems they should look into in more detail depending on their needs.' } }, @@ -34,6 +38,8 @@ class FosdemIE(InfoExtractor): 'ext': 'webm', 'title': 'Hardware acceleration for Unikernels', 'thumbnail': None, + 'release_date': '2023', + 'cast': ['Anastassios Nanos', 'Charalampos Mainas'], 'uploader': 'FOSDEM', 'description': 'Unikernels promise fast boot times, small memory footprint and stronger security but lack in terms of manageability. Moreover, unikernels provide a non-generic environment for applications, with limited or no support for widely used libraries and OS features. This issue is even more apparent in the case of hardware acceleration. Acceleration libraries are often dynamically linked and have numerous dependencies, which directly contradict the statically linked notion of unikernels. Hardware acceleration functionality is almost non-existent in unikernel frameworks, mainly due to the absence of suitable virtualization solutions for such devices. ​ In this talk, we present an update on the vAccel framework we have built that can expose hardware acceleration semantics to workloads running on isolated sandboxes. We go through the components that comprise the framework and elaborate on the challenges in building such a software stack: we first present an overview of vAccel and how it works; then we focus on the porting effort of vAccel in various unikernel frameworks. Finally, we present a hardware acceleration abstraction that expose semantic acceleration functionality to workloads running as unikernels. ​ We will present a short demo of some popular algorithms running on top of Unikraft and vAccel show-casing the merits and trade-offs of this approach.' } From 31f9edb502b304e76c0935274634afb0a8e0ec46 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 6 May 2023 06:53:22 -0400 Subject: [PATCH 07/14] [extractor/fosdem] Added playlist support --- yt_dlp/extractor/fosdem.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 8d9bd1ad3..fe5bc8d0e 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -43,18 +43,21 @@ class FosdemIE(InfoExtractor): 'uploader': 'FOSDEM', 'description': 'Unikernels promise fast boot times, small memory footprint and stronger security but lack in terms of manageability. Moreover, unikernels provide a non-generic environment for applications, with limited or no support for widely used libraries and OS features. This issue is even more apparent in the case of hardware acceleration. Acceleration libraries are often dynamically linked and have numerous dependencies, which directly contradict the statically linked notion of unikernels. Hardware acceleration functionality is almost non-existent in unikernel frameworks, mainly due to the absence of suitable virtualization solutions for such devices. ​ In this talk, we present an update on the vAccel framework we have built that can expose hardware acceleration semantics to workloads running on isolated sandboxes. We go through the components that comprise the framework and elaborate on the challenges in building such a software stack: we first present an overview of vAccel and how it works; then we focus on the porting effort of vAccel in various unikernel frameworks. Finally, we present a hardware acceleration abstraction that expose semantic acceleration functionality to workloads running as unikernels. ​ We will present a short demo of some popular algorithms running on top of Unikraft and vAccel show-casing the merits and trade-offs of this approach.' } - } + }, + { + 'url': 'https://fosdem.org/2023/schedule/track/microkernel_and_component_based_os/', + 'playlist_count': 11, + 'info_dict':{ + 'id': 'microkernel_and_component_based_os', + 'title': 'Microkernel and Component-based OS devroom', + } + } ] def _real_extract(self, url): video_id = self._match_id(url) groups = self._match_valid_url(url).groupdict() webpage = self._download_webpage(url, video_id) - elif groups['url_type'] == 'track': - print("This is a track") - # Download all videos on this page - else: - print("how did you get here?") year = groups['year'] title_rgx = r"
    \n\s+

    (.+?)

    " title = self._html_search_regex(title_rgx, webpage, 'title') @@ -82,3 +85,16 @@ def _real_extract(self, url): 'cast': cast, 'webpage_url': url, } + elif groups['url_type'] == 'track': + events_rgx = r"/[0-9]+/schedule/event/[a-z0-9]+/)" + events_slugs = re.findall(events_rgx, webpage) + events_urls = ['https://fosdem.org'+slug for slug in events_slugs] + entries = [] + for event_url in events_urls: + entries.append(self.url_result(event_url, 'Fosdem')) + return self.playlist_result(entries, + playlist_id=video_id, + playlist_title=title, + playlist_description=None) + else: + print(f"The {event_type} is not supported") From e4e7312e8e803fa93bae298c918852401ec8d33b Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 6 May 2023 06:53:34 -0400 Subject: [PATCH 08/14] [extractor/fosdem] Added Fosdem to supported sites --- supportedsites.md | 1 + 1 file changed, 1 insertion(+) diff --git a/supportedsites.md b/supportedsites.md index f5c8c3829..43c73dd11 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -434,6 +434,7 @@ # Supported sites - **FoodNetwork** - **FootyRoom** - **Formula1** + - **Fosdem** - **FOX** - **FOX9** - **FOX9News** From 9a3f1a2a4dc84708c4d71a70c27252035460b632 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sat, 6 May 2023 07:40:52 -0400 Subject: [PATCH 09/14] [extractors/fosdem] Made style changes --- yt_dlp/extractor/fosdem.py | 41 ++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index fe5bc8d0e..80418654a 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -15,7 +15,7 @@ class FosdemIE(InfoExtractor): 'release_date': '2022', 'cast': ['Norbert Kamiński'], 'uploader': 'FOSDEM', - 'description': "This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.", + 'description': 'This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.', } }, { @@ -58,22 +58,28 @@ def _real_extract(self, url): video_id = self._match_id(url) groups = self._match_valid_url(url).groupdict() webpage = self._download_webpage(url, video_id) - year = groups['year'] - title_rgx = r"
    \n\s+

    (.+?)

    " - title = self._html_search_regex(title_rgx, webpage, 'title') - if groups['url_type'] == 'event': - evnt_blurb_rgx = r"
    \n*(?P(
    (

    (.+?)

    \n*)+
    )+\n*(
    (

    (.+?)

    \n*)*
    ))+\n*
    " + year = groups.get('year') + title_rgx = r'
    \n\s+

    (.+?)

    ' + title = self._html_search_regex(title_rgx, webpage, 'title') \ + or self._og_search_title(webpage) + if groups.get('url_type') == 'event': + evnt_blurb_rgx = r'
    \n*(?P(
    (

    (.+?)

    \n*)+
    )+\n*(
    (

    (.+?)

    \n*)*
    ))+\n*
    ' evnt_blurb = self._html_search_regex(evnt_blurb_rgx, webpage, 'event blurb', - group='blurb', flags=re.DOTALL) + group='blurb', + flags=re.DOTALL, + fatal=False) description = evnt_blurb - video_url_rgx = r"
  • " + video_url_rgx = r'
  • ' video_url = self._html_search_regex(video_url_rgx, webpage, 'video url') - cast_rgx = r"(?P\w+ \w+)" - cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) + cast_rgx = r'(?P\w+ \w+)' + try: + cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) + except: + cast = [] return { 'id': video_id, 'title': title, @@ -85,10 +91,14 @@ def _real_extract(self, url): 'cast': cast, 'webpage_url': url, } - elif groups['url_type'] == 'track': - events_rgx = r"/[0-9]+/schedule/event/[a-z0-9]+/)" - events_slugs = re.findall(events_rgx, webpage) - events_urls = ['https://fosdem.org'+slug for slug in events_slugs] + elif groups.get('url_type') == 'track': + events_rgx = r'/[0-9]+/schedule/event/[a-z0-9]+/)' + try: + events_slugs = re.findall(events_rgx, webpage) + except: + events_slugs = [] + if len(events_slugs) > 0: + events_urls = ['https://fosdem.org'+slug for slug in events_slugs] entries = [] for event_url in events_urls: entries.append(self.url_result(event_url, 'Fosdem')) @@ -97,4 +107,5 @@ def _real_extract(self, url): playlist_title=title, playlist_description=None) else: - print(f"The {event_type} is not supported") + url_type = groups.get('url_type') + print(f'The {url_type} is not supported') From 7187056b6925b6c5c65c2639d5f3f3a6fd250024 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Sun, 7 May 2023 07:59:15 -0400 Subject: [PATCH 10/14] [extractors/fosdem] flake8 cleanup --- yt_dlp/extractor/fosdem.py | 46 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 80418654a..d0aff28af 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -1,23 +1,23 @@ from .common import InfoExtractor import re -import pdb + class FosdemIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/(?P[0-9]{4})/schedule/(?Ptrack|event)/(?P[\w\.-_]+)/' _TESTS = [ { 'url': 'https://archive.fosdem.org/2022/schedule/event/firmware_updates_for_opnsense_and_pfsense/', - 'info_dict': { - 'id': 'firmware_updates_for_opnsense_and_pfsense', - 'ext': 'webm', - 'title': 'Firmware updates for OPNsense and pfSense with fwupd/LVFS', - 'thumbnail': None, - 'release_date': '2022', - 'cast': ['Norbert Kamiński'], - 'uploader': 'FOSDEM', - 'description': 'This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.', - } - }, + 'info_dict': { + 'id': 'firmware_updates_for_opnsense_and_pfsense', + 'ext': 'webm', + 'title': 'Firmware updates for OPNsense and pfSense with fwupd/LVFS', + 'thumbnail': None, + 'release_date': '2022', + 'cast': ['Norbert Kamiński'], + 'uploader': 'FOSDEM', + 'description': 'This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.', + } + }, { 'url': 'https://fosdem.org/2023/schedule/event/microkernel2023/', 'info_dict': { @@ -32,8 +32,8 @@ class FosdemIE(InfoExtractor): } }, { - 'url':'https://fosdem.org/2023/schedule/event/hwacceluk/', - 'info_dict':{ + 'url': 'https://fosdem.org/2023/schedule/event/hwacceluk/', + 'info_dict': { 'id': 'hwacceluk', 'ext': 'webm', 'title': 'Hardware acceleration for Unikernels', @@ -47,11 +47,11 @@ class FosdemIE(InfoExtractor): { 'url': 'https://fosdem.org/2023/schedule/track/microkernel_and_component_based_os/', 'playlist_count': 11, - 'info_dict':{ + 'info_dict': { 'id': 'microkernel_and_component_based_os', 'title': 'Microkernel and Component-based OS devroom', - } } + } ] def _real_extract(self, url): @@ -76,10 +76,8 @@ def _real_extract(self, url): webpage, 'video url') cast_rgx = r'(?P\w+ \w+)' - try: - cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) - except: - cast = [] + cast = re.findall(cast_rgx, webpage, flags=re.UNICODE) or [] + return { 'id': video_id, 'title': title, @@ -93,12 +91,10 @@ def _real_extract(self, url): } elif groups.get('url_type') == 'track': events_rgx = r'/[0-9]+/schedule/event/[a-z0-9]+/)' - try: - events_slugs = re.findall(events_rgx, webpage) - except: - events_slugs = [] + events_slugs = re.findall(events_rgx, webpage) or [] + if len(events_slugs) > 0: - events_urls = ['https://fosdem.org'+slug for slug in events_slugs] + events_urls = ['https://fosdem.org' + slug for slug in events_slugs] entries = [] for event_url in events_urls: entries.append(self.url_result(event_url, 'Fosdem')) From 567bd3a83efd05011643c0932747cd5c4b51f554 Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Wed, 7 Feb 2024 13:40:09 -0500 Subject: [PATCH 11/14] Update supportedsites.md Co-authored-by: pukkandan --- supportedsites.md | 1 - 1 file changed, 1 deletion(-) diff --git a/supportedsites.md b/supportedsites.md index 43c73dd11..f5c8c3829 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -434,7 +434,6 @@ # Supported sites - **FoodNetwork** - **FootyRoom** - **Formula1** - - **Fosdem** - **FOX** - **FOX9** - **FOX9News** From f0dada1643aa49a1e348ca9808323ec9b5974d5f Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Wed, 7 Feb 2024 14:10:11 -0500 Subject: [PATCH 12/14] Update yt_dlp/extractor/fosdem.py Co-authored-by: pukkandan --- yt_dlp/extractor/fosdem.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index d0aff28af..603a0313b 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -55,14 +55,12 @@ class FosdemIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) - groups = self._match_valid_url(url).groupdict() + video_id, url_type, year = self._match_valid_url(url).group('id', 'type', 'year') webpage = self._download_webpage(url, video_id) - year = groups.get('year') title_rgx = r'
    \n\s+

    (.+?)

    ' title = self._html_search_regex(title_rgx, webpage, 'title') \ or self._og_search_title(webpage) - if groups.get('url_type') == 'event': + if url_type == 'event': evnt_blurb_rgx = r'
    \n*(?P(
    (

    (.+?)

    \n*)+
    )+\n*(
    (

    (.+?)

    \n*)*
    ))+\n*
    ' evnt_blurb = self._html_search_regex(evnt_blurb_rgx, webpage, @@ -89,7 +87,7 @@ def _real_extract(self, url): 'cast': cast, 'webpage_url': url, } - elif groups.get('url_type') == 'track': + elif url_type == 'track': events_rgx = r'
    /[0-9]+/schedule/event/[a-z0-9]+/)' events_slugs = re.findall(events_rgx, webpage) or [] @@ -103,5 +101,4 @@ def _real_extract(self, url): playlist_title=title, playlist_description=None) else: - url_type = groups.get('url_type') print(f'The {url_type} is not supported') From 354c1f16ecd382d7fe1aabd5c4ae3c657e2035cb Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Wed, 7 Feb 2024 14:09:24 -0500 Subject: [PATCH 13/14] [extractors/fosdem] Replace descriptions in tests with md5sums --- yt_dlp/extractor/fosdem.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 603a0313b..7e70e127a 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -15,7 +15,7 @@ class FosdemIE(InfoExtractor): 'release_date': '2022', 'cast': ['Norbert Kamiński'], 'uploader': 'FOSDEM', - 'description': 'This presentation will describe the results of the proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS for OPNsense and pfSense. It will explain the challenges connected with the implementation of firmware update systems for BSD-based firewall and routing software. It will show basic concepts connected to the fwupd and LVFS. The security of the whole system is not determined only by the software it runs, but also by the firmware. Firmware is a piece of software inseparable from the hardware. It is responsible for proper hardware initialization as well as its security features. That means that the safety of the machine strongly depends on the mitigations of vulnerabilities provided by firmware (like microcode updates, bug/exploit fixes). For these particular reasons, the firmware should be kept up-to-date.\nRouters are highly popular attack vectors, therefore they must be appropriately secured. pfSense and OPNsense are well known secure firewall and routing software, but they do not have any firmware update methods. Therefore to secure hardware initialization of the routers, in this presentation we will present proof of concept work that takes into consideration integration of firmware update framework - fwupd/LVFS.\nNowadays, this is one of the most popular firmware update software. fwupd is a daemon that manages firmware updates of each of your hardware components that have some kind of firmware. What is more fwupd is open source, which makes it more trustworthy than proprietary applications delivered by hardware vendors designed for (only) their devices.', + 'description': 'md5:06a533c1dd130b9b9aa75a8c50c2625f', } }, { @@ -28,7 +28,7 @@ class FosdemIE(InfoExtractor): 'release_date': '2023', 'uploader': 'FOSDEM', 'cast': ['Martin Děcký'], - 'description': 'The idea of the microkernel OS architecture is more that 50 years old and the term itself is just a few years younger. Over the years, it has been implemented in countless variants and modifications, it has served as a basis for intriguing OS experiments, it has gained strong position in the mission-critical and safety-critical areas and while it is still not the dominant architecture in the general-purpose desktop OS domain, it has had major influence on the "mainstream" operating systems as well.\nThis talk, however, is not about the history. Instead, we describe where are the microkernel-based operating systems today, who works on them and why, who uses them in production and why, where they aim for the future. The purpose of this talk is also to present the basic practical experiences with the existing microkernel-based operating systems — not to compare them, but to provide the potential users and contributors with an initial sorted list of operating systems they should look into in more detail depending on their needs.' + 'description': 'md5:dd38c1219fe9cc4aa18b2ef51f70f24c' } }, { @@ -41,7 +41,7 @@ class FosdemIE(InfoExtractor): 'release_date': '2023', 'cast': ['Anastassios Nanos', 'Charalampos Mainas'], 'uploader': 'FOSDEM', - 'description': 'Unikernels promise fast boot times, small memory footprint and stronger security but lack in terms of manageability. Moreover, unikernels provide a non-generic environment for applications, with limited or no support for widely used libraries and OS features. This issue is even more apparent in the case of hardware acceleration. Acceleration libraries are often dynamically linked and have numerous dependencies, which directly contradict the statically linked notion of unikernels. Hardware acceleration functionality is almost non-existent in unikernel frameworks, mainly due to the absence of suitable virtualization solutions for such devices. ​ In this talk, we present an update on the vAccel framework we have built that can expose hardware acceleration semantics to workloads running on isolated sandboxes. We go through the components that comprise the framework and elaborate on the challenges in building such a software stack: we first present an overview of vAccel and how it works; then we focus on the porting effort of vAccel in various unikernel frameworks. Finally, we present a hardware acceleration abstraction that expose semantic acceleration functionality to workloads running as unikernels. ​ We will present a short demo of some popular algorithms running on top of Unikraft and vAccel show-casing the merits and trade-offs of this approach.' + 'description': 'md5:0e4d502d9aadd42d844407b49fab276c' } }, { From bbdbc7111ad41f55d7bb162e7a7a127a68610e6c Mon Sep 17 00:00:00 2001 From: Jesse Millwood Date: Wed, 7 Feb 2024 14:21:42 -0500 Subject: [PATCH 14/14] Update yt_dlp/extractor/fosdem.py Co-authored-by: pukkandan --- yt_dlp/extractor/fosdem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/fosdem.py b/yt_dlp/extractor/fosdem.py index 7e70e127a..7eca96984 100644 --- a/yt_dlp/extractor/fosdem.py +++ b/yt_dlp/extractor/fosdem.py @@ -3,7 +3,7 @@ class FosdemIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/(?P[0-9]{4})/schedule/(?Ptrack|event)/(?P[\w\.-_]+)/' + _VALID_URL = r'https?://(?:www\.)?(?:archive\.)?fosdem\.org/(?P\d{4})/schedule/(?Ptrack|event)/(?P[\w.-]+)' _TESTS = [ { 'url': 'https://archive.fosdem.org/2022/schedule/event/firmware_updates_for_opnsense_and_pfsense/',