1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

[ie/digiteka] try to fetch url in iframe content

This commit is contained in:
eldonad 2024-12-30 23:09:04 +01:00
parent 0b6b7742c2
commit 1c2cc0dde8

View file

@ -1,7 +1,6 @@
from .common import InfoExtractor
from ..utils import int_or_none
class DigitekaIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
@ -23,39 +22,11 @@ class DigitekaIE(InfoExtractor):
)
/id
)/(?P<id>[\d+a-z]+)'''
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
_TESTS = [{
# news
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
'md5': '276a0e49de58c7e85d32b057837952a2',
'info_dict': {
'id': 's8uk0r',
'ext': 'mp4',
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 74,
'upload_date': '20150317',
'timestamp': 1426604939,
'uploader_id': '3fszv',
},
}, {
# music
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
'info_dict': {
'id': 'xvpfp8',
'ext': 'mp4',
'title': 'Two - C\'est La Vie (clip)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 233,
'upload_date': '20150224',
'timestamp': 1424760500,
'uploader_id': '3rfzk',
},
}, {
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
'only_matching': True,
}]
_EMBED_REGEX = [r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?P<embed_type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+))']
_TESTS = [
{'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf'}, # direct url
{'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed
]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
@ -68,18 +39,34 @@ def _real_extract(self, url):
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
video_id)
yt_id = deliver_info.get('yt_id')
if yt_id:
return self.url_result(yt_id, 'Youtube')
jwconf = deliver_info['jwconf']
formats = []
for source in jwconf['playlist'][0]['sources']:
formats.append({
'url': source['file'],
'format_id': source.get('label'),
})
if source['file'] is not False:
formats.append({
'url': source['file'],
'format_id': source.get('label'),
})
if len(formats) == 0:
# the file urls are not available from the json directly anymore, but
# can be found in the iframe content
iframe_content = self._download_webpage(url, video_id)
IFRAME_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url')
video_format = video_url.split('.')[-1]
formats.append({
'url': video_url,
'ext': video_format,
})
title = deliver_info['title']
thumbnail = jwconf.get('image')