mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
[ie/digiteka] try to fetch url in iframe content
This commit is contained in:
parent
0b6b7742c2
commit
1c2cc0dde8
1 changed files with 25 additions and 38 deletions
|
@ -1,7 +1,6 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
class DigitekaIE(InfoExtractor):
|
class DigitekaIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
|
https?://(?:www\.)?(?:digiteka\.net|ultimedia\.com)/
|
||||||
|
@ -23,39 +22,11 @@ class DigitekaIE(InfoExtractor):
|
||||||
)
|
)
|
||||||
/id
|
/id
|
||||||
)/(?P<id>[\d+a-z]+)'''
|
)/(?P<id>[\d+a-z]+)'''
|
||||||
_EMBED_REGEX = [r'<(?:iframe|script)[^>]+src=["\'](?P<url>(?:https?:)?//(?:www\.)?ultimedia\.com/deliver/(?:generic|musique)(?:/[^/]+)*/(?:src|article)/[\d+a-z]+)']
|
_EMBED_REGEX = [r'<(?:iframe|script)(?:(?!>)[\s\S])*(?:data-)?src=["\'](?P<url>(?:https?:)?//(?:www\.)?(?:digiteka\.net|ultimedia\.com)/deliver/(?P<embed_type>generic|musique)(?:/[^/]+)*/(?:src|article)/(?P<id>[\d+a-z]+))']
|
||||||
_TESTS = [{
|
_TESTS = [
|
||||||
# news
|
{'url': 'https://www.ultimedia.com/deliver/generic/iframe/mdtk/01747256/zone/60/src/x8smpxf'}, # direct url
|
||||||
'url': 'https://www.ultimedia.com/default/index/videogeneric/id/s8uk0r',
|
{'url': 'https://www.boursorama.com/bourse/actualites/le-retour-des-taux-negatifs-est-il-possible-169e3e0cf337df132285b41e124dc98e'} # from an embed
|
||||||
'md5': '276a0e49de58c7e85d32b057837952a2',
|
]
|
||||||
'info_dict': {
|
|
||||||
'id': 's8uk0r',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Loi sur la fin de vie: le texte prévoit un renforcement des directives anticipées',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'duration': 74,
|
|
||||||
'upload_date': '20150317',
|
|
||||||
'timestamp': 1426604939,
|
|
||||||
'uploader_id': '3fszv',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# music
|
|
||||||
'url': 'https://www.ultimedia.com/default/index/videomusic/id/xvpfp8',
|
|
||||||
'md5': '2ea3513813cf230605c7e2ffe7eca61c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'xvpfp8',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Two - C\'est La Vie (clip)',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
'duration': 233,
|
|
||||||
'upload_date': '20150224',
|
|
||||||
'timestamp': 1424760500,
|
|
||||||
'uploader_id': '3rfzk',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.digiteka.net/deliver/generic/iframe/mdtk/01637594/src/lqm3kl/zone/1/showtitle/1/autoplay/yes',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
|
@ -68,18 +39,34 @@ def _real_extract(self, url):
|
||||||
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}',
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
|
||||||
yt_id = deliver_info.get('yt_id')
|
yt_id = deliver_info.get('yt_id')
|
||||||
if yt_id:
|
if yt_id:
|
||||||
return self.url_result(yt_id, 'Youtube')
|
return self.url_result(yt_id, 'Youtube')
|
||||||
|
|
||||||
jwconf = deliver_info['jwconf']
|
jwconf = deliver_info['jwconf']
|
||||||
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for source in jwconf['playlist'][0]['sources']:
|
for source in jwconf['playlist'][0]['sources']:
|
||||||
formats.append({
|
if source['file'] is not False:
|
||||||
'url': source['file'],
|
formats.append({
|
||||||
'format_id': source.get('label'),
|
'url': source['file'],
|
||||||
})
|
'format_id': source.get('label'),
|
||||||
|
})
|
||||||
|
if len(formats) == 0:
|
||||||
|
# the file urls are not available from the json directly anymore, but
|
||||||
|
# can be found in the iframe content
|
||||||
|
iframe_content = self._download_webpage(url, video_id)
|
||||||
|
IFRAME_REGEX = '<meta property="og:video" content="(?P<url>.*)"/>'
|
||||||
|
video_url = self._search_regex(IFRAME_REGEX, iframe_content, 'url')
|
||||||
|
video_format = video_url.split('.')[-1]
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': video_format,
|
||||||
|
})
|
||||||
|
|
||||||
title = deliver_info['title']
|
title = deliver_info['title']
|
||||||
thumbnail = jwconf.get('image')
|
thumbnail = jwconf.get('image')
|
||||||
|
|
Loading…
Reference in a new issue