From 1a6fdcef9cf606bd343dc3cebb3afa45526c2dd3 Mon Sep 17 00:00:00 2001 From: spookyahell <9724215+spookyahell@users.noreply.github.com> Date: Thu, 16 Mar 2023 00:59:46 +0100 Subject: [PATCH 1/4] Fix not adding two of the same manifest values (Sometimes I hate it when technology is implemented like this in the first place: Link to a "alternative", but it's just the original) --- yt_dlp/extractor/wdr.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index de5dc2666..087abe17e 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -59,6 +59,9 @@ def _real_extract(self, url): formats = [] subtitles = {} + + # list to track the urls and ensure that not a second manifest url with the same value is added + avoid_duplicate_manifest_urls = [] # check if the metadata contains a direct URL to a file for kind, media in media_resource.items(): @@ -74,10 +77,15 @@ def _real_extract(self, url): continue if not isinstance(media, dict): continue - + for tag_name, medium_url in media.items(): if tag_name not in ('videoURL', 'audioURL'): continue + + if medium_url not in avoid_duplicate_manifest_urls: + avoid_duplicate_manifest_urls.append(medium_url) + else: + continue ext = determine_ext(medium_url) if ext == 'm3u8': @@ -164,7 +172,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'ext': 'mp3', 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100', 'title': 'Schriftstellerin Juli Zeh', - 'alt_title': 'WDR 3 Gespräch am Samstag', + 'alt_title': 'WDR 3 Gespräch am Samstag', 'upload_date': '20160312', 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', 'is_live': False, @@ -232,7 +240,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'info_dict': { 'id': 'mdb-1556012', 'ext': 'mp4', - 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"', + 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"', 'upload_date': '20180111', }, 'params': { From f1be31fa800a3a62c2d9e72c7f77e4d921f9dada Mon Sep 17 00:00:00 2001 From: spookyahell <9724215+spookyahell@users.noreply.github.com> Date: Thu, 16 Mar 2023 01:18:22 +0100 Subject: [PATCH 2/4] Fixes for flake8 --- yt_dlp/extractor/wdr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 087abe17e..095de61f2 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -59,7 +59,7 @@ def _real_extract(self, url): formats = [] subtitles = {} - + # list to track the urls and ensure that not a second manifest url with the same value is added avoid_duplicate_manifest_urls = [] @@ -77,11 +77,11 @@ def _real_extract(self, url): continue if not isinstance(media, dict): continue - + for tag_name, medium_url in media.items(): if tag_name not in ('videoURL', 'audioURL'): continue - + if medium_url not in avoid_duplicate_manifest_urls: avoid_duplicate_manifest_urls.append(medium_url) else: From f09bf692517bd190160b00e3881c5cc0d87c05ab Mon Sep 17 00:00:00 2001 From: spookyahell <9724215+spookyahell@users.noreply.github.com> Date: Mon, 3 Apr 2023 01:19:03 +0200 Subject: [PATCH 3/4] Fix incorrectly updated characters (Sorry for that, that was unintentional, some encoding error.) --- yt_dlp/extractor/wdr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 095de61f2..0982237d8 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -172,7 +172,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'ext': 'mp3', 'display_id': 'wdr3-gespraech-am-samstag/audio-schriftstellerin-juli-zeh-100', 'title': 'Schriftstellerin Juli Zeh', - 'alt_title': 'WDR 3 Gespräch am Samstag', + 'alt_title': 'WDR 3 Gespräch am Samstag', 'upload_date': '20160312', 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', 'is_live': False, @@ -240,7 +240,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'info_dict': { 'id': 'mdb-1556012', 'ext': 'mp4', - 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"', + 'title': 'DHB-Vizepräsident Bob Hanning - "Die Weltspitze ist extrem breit"', 'upload_date': '20180111', }, 'params': { From 4bce9e61bade8e7e4a47a3d37ff868d7cc076230 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 21 Jun 2023 08:20:04 +0530 Subject: [PATCH 4/4] Use set --- yt_dlp/extractor/wdr.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 0982237d8..2313c413b 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -59,9 +59,7 @@ def _real_extract(self, url): formats = [] subtitles = {} - - # list to track the urls and ensure that not a second manifest url with the same value is added - avoid_duplicate_manifest_urls = [] + seen_manifest_urls = set() # check if the metadata contains a direct URL to a file for kind, media in media_resource.items(): @@ -82,10 +80,9 @@ def _real_extract(self, url): if tag_name not in ('videoURL', 'audioURL'): continue - if medium_url not in avoid_duplicate_manifest_urls: - avoid_duplicate_manifest_urls.append(medium_url) - else: + if medium_url in seen_manifest_urls: continue + seen_manifest_urls.add(medium_url) ext = determine_ext(medium_url) if ext == 'm3u8':