From 3bb739f188f801a6ec585110ffb80ecfdabdb41d Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 1 Sep 2024 16:17:45 +0000 Subject: [PATCH 01/12] [ie/vidio:live] Add DASH support; use new API --- yt_dlp/extractor/vidio.py | 112 ++++++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 36 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 955a11647..6ee5dbe1e 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,18 +1,27 @@ +import base64 +import hashlib +import hmac +import time + from .common import InfoExtractor +from ..aes import aes_cbc_encrypt from ..utils import ( ExtractorError, clean_html, format_field, get_element_by_class, int_or_none, + join_nonempty, parse_iso8601, smuggle_url, str_or_none, strip_or_none, try_get, unsmuggle_url, + url_or_none, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class VidioBaseIE(InfoExtractor): @@ -58,6 +67,7 @@ def is_logged_in(): def _initialize_pre_login(self): self._api_key = self._download_json( 'https://www.vidio.com/auth', None, data=b'')['api_key'] + self._ua = self.get_param('http_headers')['User-Agent'] def _call_api(self, url, video_id, note=None): return self._download_json(url, video_id, note=note, headers={ @@ -234,10 +244,37 @@ class VidioLiveIE(VidioBaseIE): 'url': 'https://www.vidio.com/live/204-sctv', 'info_dict': { 'id': '204', - 'title': 'SCTV', + 'ext': 'mp4', + 'title': r're:SCTV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'display_id': 'sctv', 'uploader': 'SCTV', 'uploader_id': 'sctv', + 'uploader_url': 'https://www.vidio.com/@sctv', 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + 'description': r're:^SCTV merupakan stasiun televisi nasional terkemuka di Indonesia.+', + 'like_count': int, + 'dislike_count': int, + 'timestamp': 1461258000, + 'upload_date': '20160421', + }, + }, { + 'url': 'https://vidio.com/live/733-trans-tv', + 'info_dict': { + 'id': '733', + 'ext': 'mp4', + 'title': r're:TRANS TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'display_id': 'trans-tv', + 'uploader': 'Trans TV', + 'uploader_id': 'transtv', + 'uploader_url': 'https://www.vidio.com/@transtv', + 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + 'description': r're:^Trans TV adalah stasiun televisi swasta Indonesia.+', + 'like_count': int, + 'dislike_count': int, + 'timestamp': 1461355080, + 'upload_date': '20160422', }, }, { # Premier-exclusive livestream @@ -251,46 +288,18 @@ class VidioLiveIE(VidioBaseIE): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() - stream_data = self._call_api( - f'https://www.vidio.com/api/livestreamings/{video_id}/detail', display_id) - stream_meta = stream_data['livestreamings'][0] - user = stream_data.get('users', [{}])[0] + stream_detail = self._call_api( + f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id) + stream_meta = traverse_obj(stream_detail, ('livestreamings', 0, {dict}), default={}) + user = traverse_obj(stream_detail, ('users', 0, {dict}), default={}) title = stream_meta.get('title') username = user.get('username') - formats = [] - if stream_meta.get('is_drm'): + stream_data = self._get_stream_data(video_id) + if traverse_obj(stream_data, ('data', 'attributes', 'is_drm', {bool})): if not self.get_param('allow_unplayable_formats'): self.report_drm(video_id) - if stream_meta.get('is_premium'): - sources = self._download_json( - f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=livestreamings', - display_id, note='Downloading premier API JSON') - if not (sources.get('source') or sources.get('source_dash')): - self.raise_login_required('This video is only available for registered users with the appropriate subscription') - - if str_or_none(sources.get('source')): - token_json = self._download_json( - f'https://www.vidio.com/live/{video_id}/tokens', - display_id, note='Downloading HLS token JSON', data=b'') - formats.extend(self._extract_m3u8_formats( - sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native')) - if str_or_none(sources.get('source_dash')): - pass - else: - if stream_meta.get('stream_token_url'): - token_json = self._download_json( - f'https://www.vidio.com/live/{video_id}/tokens', - display_id, note='Downloading HLS token JSON', data=b'') - formats.extend(self._extract_m3u8_formats( - stream_meta['stream_token_url'] + '?' + token_json.get('token', ''), - display_id, 'mp4', 'm3u8_native')) - if stream_meta.get('stream_dash_url'): - pass - if stream_meta.get('stream_url'): - formats.extend(self._extract_m3u8_formats( - stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native')) return { 'id': video_id, @@ -301,9 +310,40 @@ def _real_extract(self, url): 'thumbnail': stream_meta.get('image'), 'like_count': int_or_none(stream_meta.get('like')), 'dislike_count': int_or_none(stream_meta.get('dislike')), - 'formats': formats, + 'formats': [*self._yield_hls_formats(traverse_obj(stream_data, ('data', 'attributes', 'hls', {url_or_none})), video_id), + *self._yield_dash_formats(traverse_obj(stream_data, ('data', 'attributes', 'dash', {url_or_none})), video_id)], 'uploader': user.get('name'), 'timestamp': parse_iso8601(stream_meta.get('start_time')), 'uploader_id': username, 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'), } + + def _get_stream_data(self, video_id): + timestamp = str(time.time()) + + info, urlh = self._download_json_handle( + f'https://api.vidio.com/livestreamings/{video_id}/stream?initialize=true', video_id, + expected_status=401, note='Downloading stream info', headers={ + 'x-api-key': base64.b64encode(bytes(aes_cbc_encrypt( + list(self._api_key.encode()), list(b'dPr0QImQ7bc5o9LMntNba2DOsSbZcjUh'), + list(b'C8RWsrtFsoeyCyPt')))).decode(), + 'x-api-platform': 'web-desktop', + 'x-client': timestamp, + 'x-secure-level': 2, + 'x-signature': hmac.new( + f'V1d10D3v:{timestamp}'.encode(), timestamp.encode(), digestmod=hashlib.sha256).hexdigest(), + 'user-agent': self._ua, + }) + if urlh.status == 401: + self.raise_login_required('This video is only available for registered users with the appropriate subscription') + + return info + + def _yield_hls_formats(self, hls_url, video_id): + fmts = self._extract_m3u8_formats(hls_url, video_id, fatal=False, live=True) + yield from traverse_obj(fmts, (..., {lambda x: {**x, 'format_id': join_nonempty(self._search_regex( + r'/(hls-[^/])/', x['url'], 'hls source', default=None), int_or_none(x['tbr']))}})) + + def _yield_dash_formats(self, dash_url, video_id): + fmts = self._extract_mpd_formats(dash_url, video_id, fatal=False, mpd_id='dash', headers={'User-Agent': self._ua}) + yield from traverse_obj(fmts, (..., {lambda x: {**x, 'http_headers': {'User-Agent': self._ua}}})) From 20c66ec13e3aa4121cc87656834d234fc782d495 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 21 Sep 2024 10:07:44 +0000 Subject: [PATCH 02/12] [ie/vidio] Fix login; use new API; check DRM; extract comments --- yt_dlp/extractor/vidio.py | 372 ++++++++++++++++++++++++++++++-------- 1 file changed, 293 insertions(+), 79 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 6ee5dbe1e..d8958c4e1 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,6 +1,7 @@ import base64 import hashlib import hmac +import json import time from .common import InfoExtractor @@ -8,13 +9,17 @@ from ..utils import ( ExtractorError, clean_html, + extract_attributes, format_field, get_element_by_class, + get_element_html_by_id, int_or_none, join_nonempty, parse_iso8601, + remove_end, smuggle_url, str_or_none, + str_to_int, strip_or_none, try_get, unsmuggle_url, @@ -44,6 +49,7 @@ def is_logged_in(): login_form.update({ 'user[login]': username, 'user[password]': password, + 'authenticity_token': self._html_search_meta('csrf-token', login_page, fatal=True), }) login_post, login_post_urlh = self._download_webpage_handle( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401]) @@ -77,7 +83,9 @@ def _call_api(self, url, video_id, note=None): class VidioIE(VidioBaseIE): + _GEO_COUNTRIES = ['ID'] _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P\d+)-(?P[^/?#&]+)' + _EMBED_REGEX = [rf'(?x)]+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', 'md5': 'abac81b1a205a8d94c609a473b5ea62a', @@ -87,113 +95,319 @@ class VidioIE(VidioBaseIE): 'ext': 'mp4', 'title': 'DJ_AMBRED - Booyah (Live 2015)', 'description': 'md5:27dc15f819b6a78a626490881adbadf8', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', 'duration': 149, - 'like_count': int, - 'uploader': 'TWELVE Pic', - 'timestamp': 1444902800, + 'uploader': 'twelvepictures', + 'timestamp': 1444902960, 'upload_date': '20151015', - 'uploader_id': 'twelvepictures', - 'channel': 'Cover Music Video', + 'uploader_id': '270115', + 'channel': 'cover-music-video', 'channel_id': '280236', - 'view_count': int, - 'dislike_count': int, - 'comment_count': int, + 'channel_url': 'https://www.vidio.com/@twelvepictures/channels/280236-cover-music-video', 'tags': 'count:3', 'uploader_url': 'https://www.vidio.com/@twelvepictures', + 'live_status': 'not_live', + 'genres': ['vlog', 'comedy', 'edm'], + 'season_id': '', + 'season_name': '', + 'age_limit': 13, + 'comment_count': int, + }, + 'params': { + 'getcomments': True, }, }, { + # DRM protected + 'url': 'https://www.vidio.com/watch/7095853-ep-04-sketch-book', + 'md5': 'abac81b1a205a8d94c609a473b5ea62a', + 'info_dict': { + 'id': '7095853', + 'display_id': 'ep-04-sketch-book', + 'ext': 'mp4', + 'title': 'Ep 04 - Sketch Book', + 'description': 'md5:9e22b4b1dbd65209c143d7009e899830', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 2784, + 'uploader': 'vidiooriginal', + 'timestamp': 1658509200, + 'upload_date': '20220722', + 'uploader_id': '31052580', + 'channel': 'cupcake-untuk-rain', + 'channel_id': '52332655', + 'channel_url': 'https://www.vidio.com/@vidiooriginal/channels/52332655-cupcake-untuk-rain', + 'tags': [], + 'uploader_url': 'https://www.vidio.com/@vidiooriginal', + 'live_status': 'not_live', + 'genres': ['romance', 'drama', 'comedy', 'Teen', 'love triangle'], + 'season_id': '8220', + 'season_name': 'Season 1', + 'age_limit': 13, + 'availability': 'premium_only', + 'comment_count': int, + }, + 'expected_warnings': ['This video is DRM protected'], + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://www.vidio.com/watch/7439193-episode-1-magic-5', + 'md5': 'b1644c574aeb20c91503be367ac2d211', + 'info_dict': { + 'id': '7439193', + 'display_id': 'episode-1-magic-5', + 'ext': 'mp4', + 'title': 'Episode 1 - Magic 5', + 'description': 'md5:367255f9e8e7ad7192c26218f01b6260', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 6126, + 'uploader': 'indosiar', + 'timestamp': 1679315400, + 'upload_date': '20230320', + 'uploader_id': '12', + 'channel': 'magic-5', + 'channel_id': '52350795', + 'channel_url': 'https://www.vidio.com/@indosiar/channels/52350795-magic-5', + 'tags': ['basmalah', 'raden-rakha', 'eby-da-5', 'sinetron', 'afan-da-5', 'sridevi-da5'], + 'uploader_url': 'https://www.vidio.com/@indosiar', + 'live_status': 'not_live', + 'genres': ['drama', 'fantasy', 'friendship'], + 'season_id': '11017', + 'season_name': 'Episode', + 'age_limit': 13, + }, + }, { + 'url': 'https://www.vidio.com/watch/1716926-mas-suka-masukin-aja', + 'md5': 'acc4009eeac0033328419aada7bc6925', + 'info_dict': { + 'id': '1716926', + 'display_id': 'mas-suka-masukin-aja', + 'ext': 'mp4', + 'title': 'Mas Suka, Masukin Aja', + 'description': 'md5:667093b08e07b6fb92f68037f81f2267', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 5080, + 'uploader': 'vidiopremier', + 'timestamp': 1564735560, + 'upload_date': '20190802', + 'uploader_id': '26094842', + 'channel': 'mas-suka-masukin-aja', + 'channel_id': '34112289', + 'channel_url': 'https://www.vidio.com/@vidiopremier/channels/34112289-mas-suka-masukin-aja', + 'tags': [], + 'uploader_url': 'https://www.vidio.com/@vidiopremier', + 'live_status': 'not_live', + 'genres': ['comedy', 'romance'], + 'season_id': '663', + 'season_name': '', + 'age_limit': 18, + 'availability': 'premium_only', + }, + 'params': { + 'ignore_no_formats_error': True, + }, + 'expected_warnings': ['This show isn\'t available in your country'], + }, { + 'url': 'https://www.vidio.com/watch/2372948-first-day-of-school-kindergarten-life-song-beabeo-nursery-rhymes-kids-songs', + 'md5': 'c6d1bde08eee88bea27cca9dc38bc3df', + 'info_dict': { + 'id': '2372948', + 'display_id': 'first-day-of-school-kindergarten-life-song-beabeo-nursery-rhymes-kids-songs', + 'ext': 'mp4', + 'title': 'First Day of School | Kindergarten Life Song | BeaBeo Nursery Rhymes & Kids Songs', + 'description': 'md5:d505486a67415903f7f3ab61adfd5a91', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 517, + 'uploader': 'kidsstartv', + 'timestamp': 1638518400, + 'upload_date': '20211203', + 'uploader_id': '38247189', + 'channel': 'beabeo-school-series', + 'channel_id': '52311987', + 'channel_url': 'https://www.vidio.com/@kidsstartv/channels/52311987-beabeo-school-series', + 'tags': [], + 'uploader_url': 'https://www.vidio.com/@kidsstartv', + 'live_status': 'not_live', + 'genres': ['animation', 'Cartoon'], + 'season_id': '6023', + 'season_name': 'school series', + }, + }, { + 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon', + 'md5': '405b61a2f06c74e052e0bd67cad6b891', + 'info_dict': { + 'id': '1550718', + 'display_id': 'stand-by-me-doraemon', + 'ext': 'mp4', + 'title': 'Stand by Me Doraemon', + 'description': 'md5:673d899f6a58dd4b0d18aebe30545e2a', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 5429, + 'uploader': 'vidiopremier', + 'timestamp': 1545815634, + 'upload_date': '20181226', + 'uploader_id': '26094842', + 'channel': 'stand-by-me-doraemon', + 'channel_id': '29750953', + 'channel_url': 'https://www.vidio.com/@vidiopremier/channels/29750953-stand-by-me-doraemon', + 'tags': ['anime-lucu', 'top-10-this-week', 'kids', 'stand-by-me-doraemon-2'], + 'uploader_url': 'https://www.vidio.com/@vidiopremier', + 'live_status': 'not_live', + 'genres': ['anime', 'family', 'adventure', 'comedy', 'coming of age'], + 'season_id': '237', + 'season_name': '', + 'age_limit': 7, + 'availability': 'premium_only', + }, + 'params': { + 'ignore_no_formats_error': True, + }, + 'expected_warnings': ['This show isn\'t available in your country'], + }, { + # 404 Not Found 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', 'only_matching': True, - }, { - # Premier-exclusive video - 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon', - 'only_matching': True, - }, { - # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah - 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah', + }] + _WEBPAGE_TESTS = [{ + # embed player: https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah + 'url': 'https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah', 'info_dict': { 'id': '7115874', - 'ext': 'mp4', - 'channel_id': '40172876', - 'comment_count': int, - 'uploader_id': 'liputan6', - 'view_count': int, - 'dislike_count': int, - 'upload_date': '20220804', - 'uploader': 'Liputan6.com', 'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah', - 'channel': 'ENAM PLUS 165', - 'timestamp': 1659605520, + 'ext': 'mp4', 'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah', - 'duration': 59, - 'like_count': int, - 'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'], - 'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg', - 'uploader_url': 'https://www.vidio.com/@liputan6', 'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', + 'duration': 59, + 'uploader': 'liputan6', + 'timestamp': 1659605693, + 'upload_date': '20220804', + 'uploader_id': '139', + 'channel': 'enam-plus-165', + 'channel_id': '40172876', + 'channel_url': 'https://www.vidio.com/@liputan6/channels/40172876-enam-plus-165', + 'tags': ['monkeypox-indonesia', 'cacar-monyet-menyebar', 'suspek-cacar-monyet-di-indonesia', 'fakta', 'hoax-atau-bukan', 'jawa-tengah'], + 'uploader_url': 'https://www.vidio.com/@liputan6', + 'live_status': 'not_live', + 'genres': ['health'], + 'season_id': '', + 'season_name': '', + 'age_limit': 13, + 'comment_count': int, + }, + 'params': { + 'getcomments': True, }, }] def _real_extract(self, url): - match = self._match_valid_url(url).groupdict() - video_id, display_id = match.get('id'), match.get('display_id') - data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id) - video = data['videos'][0] - title = video['title'].strip() - is_premium = video.get('is_premium') + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') - if is_premium: - sources = self._download_json( - f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=videos', - display_id, note='Downloading premier API JSON') - if not (sources.get('source') or sources.get('source_dash')): - self.raise_login_required('This video is only available for registered users with the appropriate subscription') + webpage = self._download_webpage(url, video_id) + api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data') + interactions_stream = self._download_json( + 'https://www.vidio.com/interactions_stream.json', video_id, + query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info', + errnote='Unable to download stream info') - formats, subs = [], {} - if sources.get('source'): - hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( - sources['source'], display_id, 'mp4', 'm3u8_native') - formats.extend(hls_formats) - subs.update(hls_subs) - if sources.get('source_dash'): # TODO: Find video example with source_dash - dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles( - sources['source_dash'], display_id, 'dash') - formats.extend(dash_formats) - subs.update(dash_subs) - else: - hls_url = data['clips'][0]['hls_url'] - formats, subs = self._extract_m3u8_formats_and_subtitles( - hls_url, display_id, 'mp4', 'm3u8_native') + attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) - get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {} - channel = get_first('channel') - user = get_first('user') - username = user.get('username') - get_count = lambda x: int_or_none(video.get('total_' + x)) + if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): + self.report_drm(video_id) + if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})): + self.raise_geo_restricted( + 'This show isn\'t available in your country', countries=['ID'], metadata_available=True) + + subtitles = dict(traverse_obj(attrs, ('data-subtitles', {json.loads}, ..., { + lambda x: (x['language'], [{'url': x['file']['url']}]), + }))) + formats = [] + + # There are time-based strings in the playlist URL, + # so try the other URL iff no formats extracted from the prior one. + + for m3u8_url in traverse_obj([ + interactions_stream.get('source'), + attrs.get('data-vjs-clip-hls-url'), + ], (..., {url_or_none})): + fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls') + formats.extend(fmt) + self._merge_subtitles(subs, subtitles) + if fmt: + break + + for mpd_url in traverse_obj([ + interactions_stream.get('source_dash'), + attrs.get('data-vjs-clip-dash-url'), + ], (..., {url_or_none})): + fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash') + formats.extend(fmt) + self._merge_subtitles(subs, subtitles) + if fmt: + break + + # TODO: extract also short previews of premier-exclusive videos from "attrs['data-content-preview-url']". + + uploader = attrs.get('data-video-username') + uploader_url = f'https://www.vidio.com/@{uploader}' + channel = attrs.get('data-video-channel') + channel_id = attrs.get('data-video-channel-id') return { 'id': video_id, 'display_id': display_id, - 'title': title, - 'description': strip_or_none(video.get('description')), - 'thumbnail': video.get('image_url_medium'), - 'duration': int_or_none(video.get('duration')), - 'like_count': get_count('likes'), + 'title': (traverse_obj(api_data, ('videos', 0, 'title')) + or attrs.get('data-video-title') + or self._html_extract_title(webpage)), + 'live_status': 'not_live', 'formats': formats, - 'subtitles': subs, - 'uploader': user.get('name'), - 'timestamp': parse_iso8601(video.get('created_at')), - 'uploader_id': username, - 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'), - 'channel': channel.get('name'), - 'channel_id': str_or_none(channel.get('id')), - 'view_count': get_count('view_count'), - 'dislike_count': get_count('dislikes'), - 'comment_count': get_count('comments'), - 'tags': video.get('tag_list'), + 'subtitles': subtitles, + 'channel': channel, + 'channel_id': channel_id, + 'channel_url': f'{uploader_url}/channels/{channel_id}-{channel}', + 'genres': traverse_obj(attrs, ('data-genres', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'season_id': traverse_obj(attrs, ('data-season-id', {str_or_none})), + 'season_name': traverse_obj(attrs, ('data-season-name', {str})), + 'uploader': uploader, + 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), + 'uploader_url': uploader_url, + 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), + 'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})), + 'description': traverse_obj(attrs, ('data-video-description', {str})), + 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), + 'tags': traverse_obj(attrs, ('data-video-tags', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {lambda x: parse_iso8601(x, ' ')})), + 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) + or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), + '__post_extractor': self.extract_comments(video_id), } + def _get_comments(self, video_id): + # TODO: extract replies under comments + + def extract_comments(comments_data): + users = dict(traverse_obj(comments_data, ('included', ..., { + lambda x: (x['id'], { + 'author': x['attributes']['username'], + 'author_thumbnail': url_or_none(x['attributes']['avatar_url_big'] or x['attributes']['avatar_url_small']), + 'author_url': url_or_none(x['links']['self']), + }), + }))) + yield from traverse_obj(comments_data, ('data', ..., { + 'id': 'id', + 'text': ('attributes', 'content'), + 'timestamp': ('attributes', 'created_at', {parse_iso8601}), + 'like_count': ('attributes', 'likes'), + 'author_id': ('attributes', 'user_id'), + }, {lambda x: {**x, **users.get(x['author_id'])}})) + + comment_page_url = f'https://api.vidio.com/videos/{video_id}/comments' + while comment_page_url: + comments_data = self._call_api(comment_page_url, video_id, 'Downloading comments') + comment_page_url = traverse_obj(comments_data, ('links', 'next', {url_or_none})) + yield from extract_comments(comments_data) + class VidioPremierIE(VidioBaseIE): _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P\d+)/(?P[^/?#&]+)' From 4b00360b4e589d8ad2b5a8e237b87737b643f16a Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 21 Sep 2024 12:28:25 +0000 Subject: [PATCH 03/12] [ie/vidio:live] the code I wrote does not seem to work. let's rewrite it Those two URLs of Premier-exclusive livestreams are still not working! --- yt_dlp/extractor/vidio.py | 134 ++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 70 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index d8958c4e1..f5b7a0094 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,26 +1,18 @@ -import base64 -import hashlib -import hmac import json -import time from .common import InfoExtractor -from ..aes import aes_cbc_encrypt from ..utils import ( ExtractorError, clean_html, extract_attributes, - format_field, get_element_by_class, get_element_html_by_id, int_or_none, - join_nonempty, parse_iso8601, remove_end, smuggle_url, str_or_none, str_to_int, - strip_or_none, try_get, unsmuggle_url, url_or_none, @@ -302,7 +294,7 @@ class VidioIE(VidioBaseIE): }] def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + video_id, display_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data') @@ -328,9 +320,8 @@ def _real_extract(self, url): # so try the other URL iff no formats extracted from the prior one. for m3u8_url in traverse_obj([ - interactions_stream.get('source'), - attrs.get('data-vjs-clip-hls-url'), - ], (..., {url_or_none})): + interactions_stream.get('source'), + attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})): fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls') formats.extend(fmt) self._merge_subtitles(subs, subtitles) @@ -338,9 +329,8 @@ def _real_extract(self, url): break for mpd_url in traverse_obj([ - interactions_stream.get('source_dash'), - attrs.get('data-vjs-clip-dash-url'), - ], (..., {url_or_none})): + interactions_stream.get('source_dash'), + attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})): fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash') formats.extend(fmt) self._merge_subtitles(subs, subtitles) @@ -461,16 +451,19 @@ class VidioLiveIE(VidioBaseIE): 'ext': 'mp4', 'title': r're:SCTV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'display_id': 'sctv', - 'uploader': 'SCTV', - 'uploader_id': 'sctv', + 'uploader': 'sctv', + 'uploader_id': '4', 'uploader_url': 'https://www.vidio.com/@sctv', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', 'live_status': 'is_live', 'description': r're:^SCTV merupakan stasiun televisi nasional terkemuka di Indonesia.+', 'like_count': int, 'dislike_count': int, 'timestamp': 1461258000, 'upload_date': '20160421', + 'tags': [], + 'genres': [], + 'age_limit': 13, }, }, { 'url': 'https://vidio.com/live/733-trans-tv', @@ -479,16 +472,19 @@ class VidioLiveIE(VidioBaseIE): 'ext': 'mp4', 'title': r're:TRANS TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'display_id': 'trans-tv', - 'uploader': 'Trans TV', - 'uploader_id': 'transtv', + 'uploader': 'transtv', + 'uploader_id': '551300', 'uploader_url': 'https://www.vidio.com/@transtv', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', 'live_status': 'is_live', 'description': r're:^Trans TV adalah stasiun televisi swasta Indonesia.+', 'like_count': int, 'dislike_count': int, 'timestamp': 1461355080, 'upload_date': '20160422', + 'tags': [], + 'genres': [], + 'age_limit': 13, }, }, { # Premier-exclusive livestream @@ -502,62 +498,60 @@ class VidioLiveIE(VidioBaseIE): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() - stream_detail = self._call_api( - f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id) - stream_meta = traverse_obj(stream_detail, ('livestreamings', 0, {dict}), default={}) - user = traverse_obj(stream_detail, ('users', 0, {dict}), default={}) - title = stream_meta.get('title') - username = user.get('username') + webpage = self._download_webpage(url, video_id) + stream_meta = traverse_obj(self._call_api( + f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id), + ('livestreamings', 0, {dict}), default={}) + tokenized_playlist_urls = self._download_json( + f'https://www.vidio.com/live/{video_id}/tokens', video_id, + query={'type': 'dash'}, note='Downloading tokenized playlist', + errnote='Unable to download tokenized playlist', data=b'') + interactions_stream = self._download_json( + 'https://www.vidio.com/interactions_stream.json', video_id, + query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info', + errnote='Unable to download stream info') - stream_data = self._get_stream_data(video_id) - if traverse_obj(stream_data, ('data', 'attributes', 'is_drm', {bool})): - if not self.get_param('allow_unplayable_formats'): - self.report_drm(video_id) + attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) + + if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): + self.report_drm(video_id) + if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})): + self.raise_geo_restricted( + 'This show isn\'t available in your country', countries=['ID'], metadata_available=True) + + formats = [] + + for m3u8_url in traverse_obj([ + tokenized_playlist_urls.get('hls_url'), + interactions_stream.get('source')], (..., {url_or_none})): + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='hls')) + + for mpd_url in traverse_obj([ + tokenized_playlist_urls.get('dash_url'), + interactions_stream.get('source_dash')], (..., {url_or_none})): + formats.extend(self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')) + + uploader = attrs.get('data-video-username') + uploader_url = f'https://www.vidio.com/@{uploader}' return { 'id': video_id, 'display_id': display_id, - 'title': title, - 'is_live': True, - 'description': strip_or_none(stream_meta.get('description')), - 'thumbnail': stream_meta.get('image'), + 'title': attrs.get('data-video-title'), + 'live_status': 'is_live', + 'formats': formats, + 'genres': traverse_obj(attrs, ('data-genres', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'uploader': uploader, + 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), + 'uploader_url': uploader_url, + 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), + 'description': traverse_obj(attrs, ('data-video-description', {str})), + 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), + 'tags': traverse_obj(attrs, ('data-video-tags', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) + or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), 'like_count': int_or_none(stream_meta.get('like')), 'dislike_count': int_or_none(stream_meta.get('dislike')), - 'formats': [*self._yield_hls_formats(traverse_obj(stream_data, ('data', 'attributes', 'hls', {url_or_none})), video_id), - *self._yield_dash_formats(traverse_obj(stream_data, ('data', 'attributes', 'dash', {url_or_none})), video_id)], - 'uploader': user.get('name'), 'timestamp': parse_iso8601(stream_meta.get('start_time')), - 'uploader_id': username, - 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'), } - - def _get_stream_data(self, video_id): - timestamp = str(time.time()) - - info, urlh = self._download_json_handle( - f'https://api.vidio.com/livestreamings/{video_id}/stream?initialize=true', video_id, - expected_status=401, note='Downloading stream info', headers={ - 'x-api-key': base64.b64encode(bytes(aes_cbc_encrypt( - list(self._api_key.encode()), list(b'dPr0QImQ7bc5o9LMntNba2DOsSbZcjUh'), - list(b'C8RWsrtFsoeyCyPt')))).decode(), - 'x-api-platform': 'web-desktop', - 'x-client': timestamp, - 'x-secure-level': 2, - 'x-signature': hmac.new( - f'V1d10D3v:{timestamp}'.encode(), timestamp.encode(), digestmod=hashlib.sha256).hexdigest(), - 'user-agent': self._ua, - }) - if urlh.status == 401: - self.raise_login_required('This video is only available for registered users with the appropriate subscription') - - return info - - def _yield_hls_formats(self, hls_url, video_id): - fmts = self._extract_m3u8_formats(hls_url, video_id, fatal=False, live=True) - yield from traverse_obj(fmts, (..., {lambda x: {**x, 'format_id': join_nonempty(self._search_regex( - r'/(hls-[^/])/', x['url'], 'hls source', default=None), int_or_none(x['tbr']))}})) - - def _yield_dash_formats(self, dash_url, video_id): - fmts = self._extract_mpd_formats(dash_url, video_id, fatal=False, mpd_id='dash', headers={'User-Agent': self._ua}) - yield from traverse_obj(fmts, (..., {lambda x: {**x, 'http_headers': {'User-Agent': self._ua}}})) From c0aa2e81603aa64aff5a6a7418fe7cf980ec5ac8 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 28 Oct 2024 00:37:41 +0000 Subject: [PATCH 04/12] fix usage of 'self._merge_subtitles' --- yt_dlp/extractor/vidio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index f5b7a0094..993f302d5 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -324,7 +324,7 @@ def _real_extract(self, url): attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})): fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls') formats.extend(fmt) - self._merge_subtitles(subs, subtitles) + self._merge_subtitles(subs, target=subtitles) if fmt: break @@ -333,7 +333,7 @@ def _real_extract(self, url): attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})): fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash') formats.extend(fmt) - self._merge_subtitles(subs, subtitles) + self._merge_subtitles(subs, target=subtitles) if fmt: break From 8779a8897cad104475b795c5019f00d48c9273a0 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sat, 16 Nov 2024 07:39:59 +0000 Subject: [PATCH 05/12] simplify statements in traversal --- yt_dlp/extractor/vidio.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 993f302d5..ed781b930 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -356,7 +356,7 @@ def _real_extract(self, url): 'channel': channel, 'channel_id': channel_id, 'channel_url': f'{uploader_url}/channels/{channel_id}-{channel}', - 'genres': traverse_obj(attrs, ('data-genres', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]), 'season_id': traverse_obj(attrs, ('data-season-id', {str_or_none})), 'season_name': traverse_obj(attrs, ('data-season-name', {str})), 'uploader': uploader, @@ -366,10 +366,10 @@ def _real_extract(self, url): 'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str}, {lambda x: x.split(',') if x else []}), default=[]), - 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {lambda x: parse_iso8601(x, ' ')})), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]), + 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) - or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), + or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))), '__post_extractor': self.extract_comments(video_id), } @@ -541,16 +541,16 @@ def _real_extract(self, url): 'title': attrs.get('data-video-title'), 'live_status': 'is_live', 'formats': formats, - 'genres': traverse_obj(attrs, ('data-genres', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]), 'uploader': uploader, 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), 'uploader_url': uploader_url, 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str}, {lambda x: x.split(',') if x else []}), default=[]), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) - or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), + or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))), 'like_count': int_or_none(stream_meta.get('like')), 'dislike_count': int_or_none(stream_meta.get('dislike')), 'timestamp': parse_iso8601(stream_meta.get('start_time')), From 8c987a05644a45fe3069397bb5ae7eaa090506ff Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 24 Nov 2024 05:05:04 +0000 Subject: [PATCH 06/12] fix wrong usage in traversal; fix regex Partially revert 8779a8897cad104475b795c5019f00d48c9273a0 Set "@partial_application" to `utils::remove_end()`? --- yt_dlp/extractor/vidio.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index ed781b930..0efd6104d 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -76,7 +76,7 @@ def _call_api(self, url, video_id, note=None): class VidioIE(VidioBaseIE): _GEO_COUNTRIES = ['ID'] - _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P\d+)-(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vidio\.com/(?:watch|embed)/(?P\d+)-(?P[^/?#&]+)' _EMBED_REGEX = [rf'(?x)]+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015', @@ -356,7 +356,7 @@ def _real_extract(self, url): 'channel': channel, 'channel_id': channel_id, 'channel_url': f'{uploader_url}/channels/{channel_id}-{channel}', - 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {lambda x: x.split(',')}), default=[]), 'season_id': traverse_obj(attrs, ('data-season-id', {str_or_none})), 'season_name': traverse_obj(attrs, ('data-season-name', {str})), 'uploader': uploader, @@ -366,10 +366,10 @@ def _real_extract(self, url): 'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {lambda x: x.split(',')}), default=[]), 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) - or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))), + or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), '__post_extractor': self.extract_comments(video_id), } @@ -541,16 +541,16 @@ def _real_extract(self, url): 'title': attrs.get('data-video-title'), 'live_status': 'is_live', 'formats': formats, - 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {str.split(sep=',')}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {lambda x: x.split(',')}), default=[]), 'uploader': uploader, 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), 'uploader_url': uploader_url, 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {str.split(sep=',')}), default=[]), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {lambda x: x.split(',')}), default=[]), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) - or traverse_obj(attrs, ('data-content-rating-option', {remove_end(end=' or more')}, {str_to_int}))), + or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), 'like_count': int_or_none(stream_meta.get('like')), 'dislike_count': int_or_none(stream_meta.get('dislike')), 'timestamp': parse_iso8601(stream_meta.get('start_time')), From c3bd33ccb206cee984299395423e5575cf1261b9 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 24 Nov 2024 05:16:04 +0000 Subject: [PATCH 07/12] use 'filter' to make `str.split` works correctly --- yt_dlp/extractor/vidio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 0efd6104d..9b1fd839c 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -356,7 +356,7 @@ def _real_extract(self, url): 'channel': channel, 'channel_id': channel_id, 'channel_url': f'{uploader_url}/channels/{channel_id}-{channel}', - 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {lambda x: x.split(',')}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'season_id': traverse_obj(attrs, ('data-season-id', {str_or_none})), 'season_name': traverse_obj(attrs, ('data-season-name', {str})), 'uploader': uploader, @@ -366,7 +366,7 @@ def _real_extract(self, url): 'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {lambda x: x.split(',')}), default=[]), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), @@ -541,14 +541,14 @@ def _real_extract(self, url): 'title': attrs.get('data-video-title'), 'live_status': 'is_live', 'formats': formats, - 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, {lambda x: x.split(',')}), default=[]), + 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'uploader': uploader, 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), 'uploader_url': uploader_url, 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), - 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, {lambda x: x.split(',')}), default=[]), + 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), 'like_count': int_or_none(stream_meta.get('like')), From 97f1f1e4e34e1bbb28ecebe5c1a05fb7bfc5c58b Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Sun, 24 Nov 2024 16:13:40 +0000 Subject: [PATCH 08/12] fix test; code style; extract functions --- yt_dlp/extractor/vidio.py | 88 +++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 9b1fd839c..ba6547941 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -130,11 +130,10 @@ class VidioIE(VidioBaseIE): 'tags': [], 'uploader_url': 'https://www.vidio.com/@vidiooriginal', 'live_status': 'not_live', - 'genres': ['romance', 'drama', 'comedy', 'Teen', 'love triangle'], + 'genres': ['romance', 'drama', 'comedy', 'Teen', 'love triangle', 'Female-Led'], 'season_id': '8220', 'season_name': 'Season 1', 'age_limit': 13, - 'availability': 'premium_only', 'comment_count': int, }, 'expected_warnings': ['This video is DRM protected'], @@ -171,11 +170,9 @@ class VidioIE(VidioBaseIE): }, }, { 'url': 'https://www.vidio.com/watch/1716926-mas-suka-masukin-aja', - 'md5': 'acc4009eeac0033328419aada7bc6925', 'info_dict': { 'id': '1716926', 'display_id': 'mas-suka-masukin-aja', - 'ext': 'mp4', 'title': 'Mas Suka, Masukin Aja', 'description': 'md5:667093b08e07b6fb92f68037f81f2267', 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', @@ -197,9 +194,14 @@ class VidioIE(VidioBaseIE): 'availability': 'premium_only', }, 'params': { + 'skip_download': True, 'ignore_no_formats_error': True, }, - 'expected_warnings': ['This show isn\'t available in your country'], + 'expected_warnings': [ + 'This video requires subscription', + 'No video formats found!', + 'Requested format is not available', + ], }, { 'url': 'https://www.vidio.com/watch/2372948-first-day-of-school-kindergarten-life-song-beabeo-nursery-rhymes-kids-songs', 'md5': 'c6d1bde08eee88bea27cca9dc38bc3df', @@ -221,19 +223,17 @@ class VidioIE(VidioBaseIE): 'tags': [], 'uploader_url': 'https://www.vidio.com/@kidsstartv', 'live_status': 'not_live', - 'genres': ['animation', 'Cartoon'], + 'genres': ['3D cartoon', 'kids music'], 'season_id': '6023', 'season_name': 'school series', }, }, { 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon', - 'md5': '405b61a2f06c74e052e0bd67cad6b891', 'info_dict': { 'id': '1550718', 'display_id': 'stand-by-me-doraemon', - 'ext': 'mp4', 'title': 'Stand by Me Doraemon', - 'description': 'md5:673d899f6a58dd4b0d18aebe30545e2a', + 'description': 'md5:19b658efb7c609895ea5472daa76b645', 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', 'duration': 5429, 'uploader': 'vidiopremier', @@ -246,16 +246,21 @@ class VidioIE(VidioBaseIE): 'tags': ['anime-lucu', 'top-10-this-week', 'kids', 'stand-by-me-doraemon-2'], 'uploader_url': 'https://www.vidio.com/@vidiopremier', 'live_status': 'not_live', - 'genres': ['anime', 'family', 'adventure', 'comedy', 'coming of age'], + 'genres': 'count:11', 'season_id': '237', 'season_name': '', 'age_limit': 7, 'availability': 'premium_only', }, 'params': { + 'skip_download': True, 'ignore_no_formats_error': True, }, - 'expected_warnings': ['This show isn\'t available in your country'], + 'expected_warnings': [ + 'This video requires subscription', + 'No video formats found!', + 'Requested format is not available', + ], }, { # 404 Not Found 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north', @@ -293,24 +298,12 @@ class VidioIE(VidioBaseIE): }, }] - def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).groups() - - webpage = self._download_webpage(url, video_id) - api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data') + def _get_formats_and_subtitles(self, attrs, video_id): interactions_stream = self._download_json( 'https://www.vidio.com/interactions_stream.json', video_id, query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info', errnote='Unable to download stream info') - attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) - - if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): - self.report_drm(video_id) - if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})): - self.raise_geo_restricted( - 'This show isn\'t available in your country', countries=['ID'], metadata_available=True) - subtitles = dict(traverse_obj(attrs, ('data-subtitles', {json.loads}, ..., { lambda x: (x['language'], [{'url': x['file']['url']}]), }))) @@ -319,26 +312,47 @@ def _real_extract(self, url): # There are time-based strings in the playlist URL, # so try the other URL iff no formats extracted from the prior one. - for m3u8_url in traverse_obj([ - interactions_stream.get('source'), - attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})): - fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls') - formats.extend(fmt) - self._merge_subtitles(subs, target=subtitles) + for m3u8_url in traverse_obj( + [interactions_stream.get('source'), attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})): + fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False) if fmt: + formats.extend(fmt) + self._merge_subtitles(subs, target=subtitles) break - for mpd_url in traverse_obj([ - interactions_stream.get('source_dash'), - attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})): - fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash') - formats.extend(fmt) - self._merge_subtitles(subs, target=subtitles) + for mpd_url in traverse_obj( + [interactions_stream.get('source_dash'), attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})): + fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash', fatal=False) if fmt: + formats.extend(fmt) + self._merge_subtitles(subs, target=subtitles) break # TODO: extract also short previews of premier-exclusive videos from "attrs['data-content-preview-url']". + return formats, subtitles + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage(url, video_id) + api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data') + + attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) + + availability = self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')) + + if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): + self.report_drm(video_id) + + formats, subtitles = self._get_formats_and_subtitles(attrs, video_id) + if not formats: + if availability == 'premium_only': + self.raise_login_required('This video requires subscription', metadata_available=True) + elif traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})): + self.raise_geo_restricted( + 'This show isn\'t available in your country', countries=self._GEO_COUNTRIES, metadata_available=True) + uploader = attrs.get('data-video-username') uploader_url = f'https://www.vidio.com/@{uploader}' channel = attrs.get('data-video-channel') @@ -365,7 +379,7 @@ def _real_extract(self, url): 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), 'duration': traverse_obj(attrs, ('data-video-duration', {str_to_int})), 'description': traverse_obj(attrs, ('data-video-description', {str})), - 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), + 'availability': availability, 'tags': traverse_obj(attrs, ('data-video-tags', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) From 89aa985a472bced9c9c994030b2b677e49f74302 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 28 Jan 2025 16:45:09 +0000 Subject: [PATCH 09/12] fix live support --- yt_dlp/extractor/vidio.py | 107 ++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 57 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index ba6547941..9a3fe8e5f 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,6 +1,12 @@ +import base64 +import datetime as dt +import hashlib +import hmac import json from .common import InfoExtractor +from ..aes import aes_cbc_encrypt_bytes +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, clean_html, @@ -22,6 +28,7 @@ class VidioBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['ID'] _LOGIN_URL = 'https://www.vidio.com/users/login' _NETRC_MACHINE = 'vidio' @@ -67,15 +74,15 @@ def _initialize_pre_login(self): 'https://www.vidio.com/auth', None, data=b'')['api_key'] self._ua = self.get_param('http_headers')['User-Agent'] - def _call_api(self, url, video_id, note=None): + def _call_api(self, url, video_id, note=None, headers=None): return self._download_json(url, video_id, note=note, headers={ 'Content-Type': 'application/vnd.api+json', 'X-API-KEY': self._api_key, + **(headers or {}), }) class VidioIE(VidioBaseIE): - _GEO_COUNTRIES = ['ID'] _VALID_URL = r'https?://(?:www\.)?vidio\.com/(?:watch|embed)/(?P\d+)-(?P[^/?#&]+)' _EMBED_REGEX = [rf'(?x)]+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ @@ -477,29 +484,11 @@ class VidioLiveIE(VidioBaseIE): 'upload_date': '20160421', 'tags': [], 'genres': [], - 'age_limit': 13, }, }, { + # Premier-exclusive livestream 'url': 'https://vidio.com/live/733-trans-tv', - 'info_dict': { - 'id': '733', - 'ext': 'mp4', - 'title': r're:TRANS TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', - 'display_id': 'trans-tv', - 'uploader': 'transtv', - 'uploader_id': '551300', - 'uploader_url': 'https://www.vidio.com/@transtv', - 'thumbnail': r're:^https?://thumbor\.prod\.vidiocdn\.com/.+\.jpg$', - 'live_status': 'is_live', - 'description': r're:^Trans TV adalah stasiun televisi swasta Indonesia.+', - 'like_count': int, - 'dislike_count': int, - 'timestamp': 1461355080, - 'upload_date': '20160422', - 'tags': [], - 'genres': [], - 'age_limit': 13, - }, + 'only_matching': True, }, { # Premier-exclusive livestream 'url': 'https://www.vidio.com/live/6362-tvn', @@ -510,55 +499,59 @@ class VidioLiveIE(VidioBaseIE): 'only_matching': True, }] + _WEB_CLIENT_SECRTE = b'dPr0QImQ7bc5o9LMntNba2DOsSbZcjUh' + _WEB_CLIENT_IV = b'C8RWsrtFsoeyCyPt' + + def _yield_formats(self, url, video_id): + client_id = str(dt.datetime.now().timestamp())[:-3] + try: + stream_info = self._call_api( + f'https://api.vidio.com/livestreamings/{video_id}/stream?initialize=true', video_id, + headers={ + 'X-API-KEY': base64.b64encode(aes_cbc_encrypt_bytes( + self._api_key.encode(), + self._WEB_CLIENT_SECRTE, + self._WEB_CLIENT_IV, + )), + 'X-API-Platform': 'web-desktop', + 'X-Client': client_id, + 'X-Request-From': url, + 'X-Secure-Level': 2, + 'X-Signature': hmac.new(f'V1d10D3v:{client_id}'.encode(), client_id.encode(), hashlib.sha256).hexdigest(), + }, + ) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + self.raise_login_required('This show requires subscription', metadata_available=True) + return [] + raise + + if m3u8_url := traverse_obj(stream_info, ('data', 'attributes', 'hls', {url_or_none})): + yield from self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + if mpd_url := traverse_obj(stream_info, ('data', 'attributes', 'dash', {url_or_none})): + yield from self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False) + def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, video_id) + attrs = extract_attributes(get_element_html_by_id( + f'player-data-{video_id}', self._download_webpage(url, video_id, fatal=False) or '')) stream_meta = traverse_obj(self._call_api( f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id), ('livestreamings', 0, {dict}), default={}) - tokenized_playlist_urls = self._download_json( - f'https://www.vidio.com/live/{video_id}/tokens', video_id, - query={'type': 'dash'}, note='Downloading tokenized playlist', - errnote='Unable to download tokenized playlist', data=b'') - interactions_stream = self._download_json( - 'https://www.vidio.com/interactions_stream.json', video_id, - query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info', - errnote='Unable to download stream info') - - attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) - - if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): - self.report_drm(video_id) - if traverse_obj(attrs, ('data-geoblock', {lambda x: x == 'true'})): - self.raise_geo_restricted( - 'This show isn\'t available in your country', countries=['ID'], metadata_available=True) - - formats = [] - - for m3u8_url in traverse_obj([ - tokenized_playlist_urls.get('hls_url'), - interactions_stream.get('source')], (..., {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='hls')) - - for mpd_url in traverse_obj([ - tokenized_playlist_urls.get('dash_url'), - interactions_stream.get('source_dash')], (..., {url_or_none})): - formats.extend(self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')) - - uploader = attrs.get('data-video-username') - uploader_url = f'https://www.vidio.com/@{uploader}' return { 'id': video_id, 'display_id': display_id, 'title': attrs.get('data-video-title'), 'live_status': 'is_live', - 'formats': formats, + 'formats': list(self._yield_formats(url, video_id)), 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), - 'uploader': uploader, 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), - 'uploader_url': uploader_url, + **traverse_obj(attrs, ('data-video-username', {lambda x: { + 'uploader': x, + 'uploader_url': f'https://www.vidio.com/@{x}', + }}), default={}), 'thumbnail': traverse_obj(attrs, ('data-video-image-url', {url_or_none})), 'description': traverse_obj(attrs, ('data-video-description', {str})), 'availability': self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')), From 655006bc0a7bf042474547a5fcea9f1c39ec87a4 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:06:46 +0000 Subject: [PATCH 10/12] HTTP 302 without HTTPError can be handled automatically --- yt_dlp/extractor/vidio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 9a3fe8e5f..f84e1bfdb 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -51,7 +51,7 @@ def is_logged_in(): 'authenticity_token': self._html_search_meta('csrf-token', login_page, fatal=True), }) login_post, login_post_urlh = self._download_webpage_handle( - self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401]) + self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=401) if login_post_urlh.status == 401: if get_element_by_class('onboarding-content-register-popup__title', login_post): From bbd3f76825492e6c97ba0b09dfe0a456bee46e6e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:16:40 +0000 Subject: [PATCH 11/12] use display_id like the existing code. don't know if it's OK --- yt_dlp/extractor/vidio.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index f84e1bfdb..f14a41ac2 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -74,8 +74,8 @@ def _initialize_pre_login(self): 'https://www.vidio.com/auth', None, data=b'')['api_key'] self._ua = self.get_param('http_headers')['User-Agent'] - def _call_api(self, url, video_id, note=None, headers=None): - return self._download_json(url, video_id, note=note, headers={ + def _call_api(self, url, display_id, note=None, headers=None): + return self._download_json(url, display_id, note=note, headers={ 'Content-Type': 'application/vnd.api+json', 'X-API-KEY': self._api_key, **(headers or {}), @@ -305,9 +305,9 @@ class VidioIE(VidioBaseIE): }, }] - def _get_formats_and_subtitles(self, attrs, video_id): + def _get_formats_and_subtitles(self, attrs, video_id, display_id): interactions_stream = self._download_json( - 'https://www.vidio.com/interactions_stream.json', video_id, + 'https://www.vidio.com/interactions_stream.json', display_id, query={'video_id': video_id, 'type': 'videos'}, note='Downloading stream info', errnote='Unable to download stream info') @@ -321,7 +321,7 @@ def _get_formats_and_subtitles(self, attrs, video_id): for m3u8_url in traverse_obj( [interactions_stream.get('source'), attrs.get('data-vjs-clip-hls-url')], (..., {url_or_none})): - fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False) + fmt, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, display_id, ext='mp4', m3u8_id='hls', fatal=False) if fmt: formats.extend(fmt) self._merge_subtitles(subs, target=subtitles) @@ -329,7 +329,7 @@ def _get_formats_and_subtitles(self, attrs, video_id): for mpd_url in traverse_obj( [interactions_stream.get('source_dash'), attrs.get('data-vjs-clip-dash-url')], (..., {url_or_none})): - fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id, mpd_id='dash', fatal=False) + fmt, subs = self._extract_mpd_formats_and_subtitles(mpd_url, display_id, mpd_id='dash', fatal=False) if fmt: formats.extend(fmt) self._merge_subtitles(subs, target=subtitles) @@ -342,7 +342,7 @@ def _get_formats_and_subtitles(self, attrs, video_id): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) api_data = self._call_api(f'https://api.vidio.com/videos/{video_id}', display_id, 'Downloading API data') attrs = extract_attributes(get_element_html_by_id(f'player-data-{video_id}', webpage)) @@ -350,9 +350,9 @@ def _real_extract(self, url): availability = self._availability(needs_premium=(attrs.get('data-access-type') == 'premium')) if traverse_obj(attrs, ('data-drm-enabled', {lambda x: x == 'true'})): - self.report_drm(video_id) + self.report_drm(display_id) - formats, subtitles = self._get_formats_and_subtitles(attrs, video_id) + formats, subtitles = self._get_formats_and_subtitles(attrs, video_id, display_id) if not formats: if availability == 'premium_only': self.raise_login_required('This video requires subscription', metadata_available=True) @@ -391,10 +391,10 @@ def _real_extract(self, url): 'timestamp': traverse_obj(attrs, ('data-video-publish-date', {parse_iso8601(delimiter=' ')})), 'age_limit': (traverse_obj(attrs, ('data-adult', {lambda x: 18 if x == 'true' else 0})) or traverse_obj(attrs, ('data-content-rating-option', {lambda x: remove_end(x, ' or more')}, {str_to_int}))), - '__post_extractor': self.extract_comments(video_id), + '__post_extractor': self.extract_comments(video_id, display_id), } - def _get_comments(self, video_id): + def _get_comments(self, video_id, display_id): # TODO: extract replies under comments def extract_comments(comments_data): @@ -415,7 +415,7 @@ def extract_comments(comments_data): comment_page_url = f'https://api.vidio.com/videos/{video_id}/comments' while comment_page_url: - comments_data = self._call_api(comment_page_url, video_id, 'Downloading comments') + comments_data = self._call_api(comment_page_url, display_id, 'Downloading comments') comment_page_url = traverse_obj(comments_data, ('links', 'next', {url_or_none})) yield from extract_comments(comments_data) @@ -502,11 +502,11 @@ class VidioLiveIE(VidioBaseIE): _WEB_CLIENT_SECRTE = b'dPr0QImQ7bc5o9LMntNba2DOsSbZcjUh' _WEB_CLIENT_IV = b'C8RWsrtFsoeyCyPt' - def _yield_formats(self, url, video_id): + def _yield_formats(self, url, video_id, display_id): client_id = str(dt.datetime.now().timestamp())[:-3] try: stream_info = self._call_api( - f'https://api.vidio.com/livestreamings/{video_id}/stream?initialize=true', video_id, + f'https://api.vidio.com/livestreamings/{video_id}/stream?initialize=true', display_id, headers={ 'X-API-KEY': base64.b64encode(aes_cbc_encrypt_bytes( self._api_key.encode(), @@ -527,17 +527,17 @@ def _yield_formats(self, url, video_id): raise if m3u8_url := traverse_obj(stream_info, ('data', 'attributes', 'hls', {url_or_none})): - yield from self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + yield from self._extract_m3u8_formats(m3u8_url, display_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) if mpd_url := traverse_obj(stream_info, ('data', 'attributes', 'dash', {url_or_none})): - yield from self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash', fatal=False) + yield from self._extract_mpd_formats(mpd_url, display_id, mpd_id='dash', fatal=False) def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() attrs = extract_attributes(get_element_html_by_id( - f'player-data-{video_id}', self._download_webpage(url, video_id, fatal=False) or '')) + f'player-data-{video_id}', self._download_webpage(url, display_id, fatal=False) or '')) stream_meta = traverse_obj(self._call_api( - f'https://www.vidio.com/api/livestreamings/{video_id}/detail', video_id), + f'https://www.vidio.com/api/livestreamings/{video_id}/detail', display_id), ('livestreamings', 0, {dict}), default={}) return { @@ -545,7 +545,7 @@ def _real_extract(self, url): 'display_id': display_id, 'title': attrs.get('data-video-title'), 'live_status': 'is_live', - 'formats': list(self._yield_formats(url, video_id)), + 'formats': list(self._yield_formats(url, video_id, display_id)), 'genres': traverse_obj(attrs, ('data-genres', {str_or_none}, filter, {lambda x: x.split(',')}), default=[]), 'uploader_id': traverse_obj(attrs, ('data-video-user-id', {str_or_none})), **traverse_obj(attrs, ('data-video-username', {lambda x: { From 316b24d66cef1b393f9ca9db87d33bf6a807806e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 28 Jan 2025 18:40:00 +0000 Subject: [PATCH 12/12] fix test --- yt_dlp/extractor/vidio.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index f14a41ac2..79f3f8139 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -484,6 +484,7 @@ class VidioLiveIE(VidioBaseIE): 'upload_date': '20160421', 'tags': [], 'genres': [], + 'age_limit': 13, }, }, { # Premier-exclusive livestream