mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
[ie/digiteka] try to fetch url in iframe content
This commit is contained in:
parent
0b6b7742c2
commit
1c2cc0dde8
1 changed files with 25 additions and 38 deletions
|
@ -1,7 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class DigitekaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
|
||||
|
@ -23,39 +22,11 @@ class DigitekaIE(InfoExtractor):
|
|||
)
|
||||
/id
|
||||
)/(?P<id>[\d+a-z]+)'''
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
|
||||
_TESTS = [{
|
||||
# news
|
||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
||||
'info_dict': {
|
||||
'id': 's8uk0r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 74,
|
||||
'upload_date': '20150317',
|
||||
'timestamp': 1426604939,
|
||||
'uploader_id': '3fszv',
|
||||
},
|
||||
}, {
|
||||
# music
|
||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
||||
'info_dict': {
|
||||
'id': 'xvpfp8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Two - C\'est La Vie (clip)',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 233,
|
||||
'upload_date': '20150224',
|
||||
'timestamp': 1424760500,
|
||||
'uploader_id': '3rfzk',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_EMBED_REGEX = [r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?P<embed_type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+))']
|
||||
_TESTS = [
|
||||
{'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf'}, # direct url
|
||||
{'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
|
@ -68,18 +39,34 @@ def _real_extract(self, url):
|
|||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||
video_id)
|
||||
|
||||
|
||||
yt_id = deliver_info.get('yt_id')
|
||||
if yt_id:
|
||||
return self.url_result(yt_id, 'Youtube')
|
||||
|
||||
jwconf = deliver_info['jwconf']
|
||||
|
||||
|
||||
formats = []
|
||||
|
||||
for source in jwconf['playlist'][0]['sources']:
|
||||
if source['file'] is not False:
|
||||
formats.append({
|
||||
'url': source['file'],
|
||||
'format_id': source.get('label'),
|
||||
})
|
||||
if len(formats) == 0:
|
||||
# the file urls are not available from the json directly anymore, but
|
||||
# can be found in the iframe content
|
||||
iframe_content = self._download_webpage(url, video_id)
|
||||
IFRAME_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
|
||||
video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url')
|
||||
video_format = video_url.split('.')[-1]
|
||||
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': video_format,
|
||||
})
|
||||
|
||||
title = deliver_info['title']
|
||||
thumbnail = jwconf.get('image')
|
||||
|
|
Loading…
Reference in a new issue