diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index e56ec63e8..7bab8fd45 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -1,7 +1,6 @@ from .common import InfoExtractor from ..utils import int_or_none - class DigitekaIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/ @@ -23,39 +22,11 @@ class DigitekaIE(InfoExtractor): ) /id )/(?P[\d+a-z]+)''' - _EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)'] - _TESTS = [{ - # news - 'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r', - 'md5': '276a0e49de58c7e85d32b057837952a2', - 'info_dict': { - 'id': 's8uk0r', - 'ext': 'mp4', - 'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 74, - 'upload_date': '20150317', - 'timestamp': 1426604939, - 'uploader_id': '3fszv', - }, - }, { - # music - 'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8', - 'md5': '2ea3513813cf230605c7e2ffe7eca61c', - 'info_dict': { - 'id': 'xvpfp8', - 'ext': 'mp4', - 'title': 'Two - C\'est La Vie (clip)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 233, - 'upload_date': '20150224', - 'timestamp': 1424760500, - 'uploader_id': '3rfzk', - }, - }, { - 'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes', - 'only_matching': True, - }] + _EMBED_REGEX = [r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?Pgeneric|musique)(?:/[^/]+)*/(?:src|article)/(?P[\d+a-z]+))'] + _TESTS = [ + {'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf'}, # direct url + {'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed + ] def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -68,18 +39,34 @@ def _real_extract(self, url): f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}', video_id) + yt_id = deliver_info.get('yt_id') if yt_id: return self.url_result(yt_id, 'Youtube') jwconf = deliver_info['jwconf'] + formats = [] + for source in jwconf['playlist'][0]['sources']: - formats.append({ - 'url': source['file'], - 'format_id': source.get('label'), - }) + if source['file'] is not False: + formats.append({ + 'url': source['file'], + 'format_id': source.get('label'), + }) + if len(formats) == 0: + # the file urls are not available from the json directly anymore, but + # can be found in the iframe content + iframe_content = self._download_webpage(url, video_id) + IFRAME_REGEX = '' + video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url') + video_format = video_url.split('.')[-1] + + formats.append({ + 'url': video_url, + 'ext': video_format, + }) title = deliver_info['title'] thumbnail = jwconf.get('image')