From e07f796bc4bcaffb7789910e304f679af5ebe617 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 20 May 2024 15:22:40 +0000 Subject: [PATCH 01/11] [ie/sheeta] Support websites based on sheeta; HTTP 404 ALLOWED --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/niconicochannelplus.py | 334 ++-------- yt_dlp/extractor/sheeta.py | 836 ++++++++++++++++++++++++ 4 files changed, 882 insertions(+), 291 deletions(-) create mode 100644 yt_dlp/extractor/sheeta.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index cf408b682..b2afb7476 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1756,6 +1756,7 @@ ) from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE +from .sheeta import SheetaEmbedIE from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2cfed0fd0..b8e394e33 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2376,7 +2376,7 @@ def _real_extract(self, url): full_response = self._request_webpage(url, video_id, headers=filter_dict({ 'Accept-Encoding': 'identity', 'Referer': smuggled_data.get('referer'), - })) + }), expected_status=404) new_url = full_response.url if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 89af3f7b5..8cfede78b 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -1,97 +1,54 @@ -import functools -import json - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - filter_dict, - int_or_none, - parse_qs, - str_or_none, - traverse_obj, - unified_timestamp, - url_or_none, -) +from .sheeta import SheetaEmbedIE -class NiconicoChannelPlusBaseIE(InfoExtractor): - _WEBPAGE_BASE_URL = 'https://nicochannel.jp' - - def _call_api(self, path, item_id, *args, **kwargs): - return self._download_json( - f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs) - - def _find_fanclub_site_id(self, channel_name): - fanclub_list_json = self._call_api( - 'content_providers/channels', item_id=f'channels/{channel_name}', - note='Fetching channel list', errnote='Unable to fetch channel list', - )['data']['content_providers'] - fanclub_id = traverse_obj(fanclub_list_json, ( - lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), - get_all=False) - if not fanclub_id: - raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) - return fanclub_id - - def _get_channel_base_info(self, fanclub_site_id): - return traverse_obj(self._call_api( - f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', - note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False, - ), ('data', 'fanclub_site', {dict})) or {} - - def _get_channel_user_info(self, fanclub_site_id): - return traverse_obj(self._call_api( - f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}', - note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False, - data=json.dumps('null').encode('ascii'), - ), ('data', 'fanclub_site', {dict})) or {} - - -class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): +class NiconicoChannelPlusIE(SheetaEmbedIE): IE_NAME = 'NiconicoChannelPlus' IE_DESC = 'ニコニコチャンネルプラス' _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' _TESTS = [{ - 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', + 'url': 'https://nicochannel.jp/kaorin/video/sm89Hd4SEduy8WTsb4KxAhBL', 'info_dict': { - 'id': 'smsDd8EdFLcVZk9yyAhD6H7H', - 'title': '前田佳織里はニコ生がしたい!', + 'id': 'sm89Hd4SEduy8WTsb4KxAhBL', + 'title': '前田佳織里の世界攻略計画 #2', 'ext': 'mp4', 'channel': '前田佳織里の世界攻略計画', - 'channel_id': 'kaorin', + 'channel_id': 'nicochannel.jp/kaorin', 'channel_url': 'https://nicochannel.jp/kaorin', 'live_status': 'not_live', - 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', - 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', - 'timestamp': 1641360276, - 'duration': 4097, + 'thumbnail': str, + 'description': 'md5:02573495c8be849c0cb88df6f1b85f8b', + 'timestamp': 1644546015, + 'duration': 4093, 'comment_count': int, 'view_count': int, - 'tags': [], - 'upload_date': '20220105', + 'tags': ['前田攻略', '前田佳織里', '前田佳織里の世界攻略計画'], + 'upload_date': '20220211', }, 'params': { 'skip_download': True, }, }, { # age limited video; test purpose channel. - 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve', + 'url': 'https://nicochannel.jp/testman/video/smJPZg3nwAxP8UECPsHDiCGM', 'info_dict': { - 'id': 'smDXbcrtyPNxLx9jc4BW69Ve', - 'title': 'test oshiro', + 'id': 'smJPZg3nwAxP8UECPsHDiCGM', + 'title': 'DW_itaba_LSM検証_1080p60fps_9000Kbpsで打ち上げたときの挙動確認(パススルーあり)', 'ext': 'mp4', 'channel': '本番チャンネルプラステストマン', - 'channel_id': 'testman', + 'channel_id': 'nicochannel.jp/testman', 'channel_url': 'https://nicochannel.jp/testman', 'age_limit': 18, 'live_status': 'was_live', - 'timestamp': 1666344616, - 'duration': 86465, + 'thumbnail': str, + 'description': 'TEST', + 'timestamp': 1701329428, + 'duration': 229, 'comment_count': int, 'view_count': int, - 'tags': [], - 'upload_date': '20221021', + 'tags': ['検証用'], + 'upload_date': '20231130', + 'release_timestamp': 1701328800, + 'release_date': '20231130', }, 'params': { 'skip_download': True, @@ -99,158 +56,10 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): }] def _real_extract(self, url): - content_code, channel_id = self._match_valid_url(url).group('code', 'channel') - fanclub_site_id = self._find_fanclub_site_id(channel_id) - - data_json = self._call_api( - f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'}, - note='Fetching video page info', errnote='Unable to fetch video page info', - )['data']['video_page'] - - live_status, session_id = self._get_live_status_and_session_id(content_code, data_json) - - release_timestamp_str = data_json.get('live_scheduled_start_at') - - formats = [] - - if live_status == 'is_upcoming': - if release_timestamp_str: - msg = f'This live event will begin at {release_timestamp_str} UTC' - else: - msg = 'This event has not started yet' - self.raise_no_formats(msg, expected=True, video_id=content_code) - else: - formats = self._extract_m3u8_formats( - # "authenticated_url" is a format string that contains "{session_id}". - m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), - video_id=content_code) - - return { - 'id': content_code, - 'formats': formats, - '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), - 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'), - 'channel_id': channel_id, - 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', - 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')), - 'live_status': live_status, - 'release_timestamp': unified_timestamp(release_timestamp_str), - **traverse_obj(data_json, { - 'title': ('title', {str}), - 'thumbnail': ('thumbnail_url', {url_or_none}), - 'description': ('description', {str}), - 'timestamp': ('released_at', {unified_timestamp}), - 'duration': ('active_video_filename', 'length', {int_or_none}), - 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}), - 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}), - 'tags': ('video_tags', ..., 'tag', {str}), - }), - '__post_extractor': self.extract_comments( - content_code=content_code, - comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), - } - - def _get_comments(self, content_code, comment_group_id): - item_id = f'{content_code}/comments' - - if not comment_group_id: - return None - - comment_access_token = self._call_api( - f'video_pages/{content_code}/comments_user_token', item_id, - note='Getting comment token', errnote='Unable to get comment token', - )['data']['access_token'] - - comment_list = self._download_json( - 'https://comm-api.sheeta.com/messages.history', video_id=item_id, - note='Fetching comments', errnote='Unable to fetch comments', - headers={'Content-Type': 'application/json'}, - query={ - 'sort_direction': 'asc', - 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120, - }, - data=json.dumps({ - 'token': comment_access_token, - 'group_id': comment_group_id, - }).encode('ascii')) - - for comment in traverse_obj(comment_list, ...): - yield traverse_obj(comment, { - 'author': ('nickname', {str}), - 'author_id': ('sender_id', {str_or_none}), - 'id': ('id', {str_or_none}), - 'text': ('message', {str}), - 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}), - 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}), - }, get_all=False) - - def _get_live_status_and_session_id(self, content_code, data_json): - video_type = data_json.get('type') - live_finished_at = data_json.get('live_finished_at') - - payload = {} - if video_type == 'vod': - if live_finished_at: - live_status = 'was_live' - else: - live_status = 'not_live' - elif video_type == 'live': - if not data_json.get('live_started_at'): - return 'is_upcoming', '' - - if not live_finished_at: - live_status = 'is_live' - else: - live_status = 'was_live' - payload = {'broadcast_type': 'dvr'} - - video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) - video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) - - self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.') - - if not (video_allow_dvr_flg and video_convert_to_vod_flg): - raise ExtractorError( - 'Live was ended, there is no video for download.', video_id=content_code, expected=True) - else: - raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) - - self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') - - session_id = self._call_api( - f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', - data=json.dumps(payload).encode('ascii'), headers={ - 'Content-Type': 'application/json', - 'fc_use_device': 'null', - 'origin': 'https://nicochannel.jp', - }, - note='Getting session id', errnote='Unable to get session id', - )['data']['session_id'] - - return live_status, session_id + return super()._extract_from_url(url) -class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): - _PAGE_SIZE = 12 - - def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page): - response = self._call_api( - path, item_id, query={ - **query, - 'page': (page + 1), - 'per_page': self._PAGE_SIZE, - }, - headers={'fc_use_device': 'null'}, - note=f'Getting channel info (page {page + 1})', - errnote=f'Unable to get channel info (page {page + 1})') - - for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): - # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD - yield self.url_result( - f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) - - -class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): +class NiconicoChannelPlusChannelVideosIE(SheetaEmbedIE): IE_NAME = 'NiconicoChannelPlus:channel:videos' IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos' _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/videos(?:\?.*)?' @@ -258,7 +67,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: None 'url': 'https://nicochannel.jp/testman/videos', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 18, @@ -266,7 +75,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: None 'url': 'https://nicochannel.jp/testtarou/videos', 'info_dict': { - 'id': 'testtarou-videos', + 'id': 'nicochannel.jp/testtarou/videos', 'title': 'チャンネルプラステスト太郎-videos', }, 'playlist_mincount': 2, @@ -274,15 +83,15 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: None 'url': 'https://nicochannel.jp/testjirou/videos', 'info_dict': { - 'id': 'testjirou-videos', - 'title': 'チャンネルプラステスト二郎-videos', + 'id': 'nicochannel.jp/testjirou/videos', + 'title': 'チャンネルプラステスト二郎21-videos', }, 'playlist_mincount': 12, }, { # query: tag 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 6, @@ -290,7 +99,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: vodType 'url': 'https://nicochannel.jp/testman/videos?vodType=1', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 18, @@ -298,7 +107,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: sort 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 18, @@ -306,7 +115,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: tag, vodType 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 6, @@ -314,7 +123,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: tag, sort 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 6, @@ -322,7 +131,7 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: vodType, sort 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 18, @@ -330,97 +139,42 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): # query: tag, vodType, sort 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', 'info_dict': { - 'id': 'testman-videos', + 'id': 'nicochannel.jp/testman/videos', 'title': '本番チャンネルプラステストマン-videos', }, 'playlist_mincount': 6, }] def _real_extract(self, url): - """ - API parameters: - sort: - -released_at 公開日が新しい順 (newest to oldest) - released_at 公開日が古い順 (oldest to newest) - -number_of_vod_views 再生数が多い順 (most play count) - number_of_vod_views コメントが多い順 (most comments) - vod_type (is "vodType" in "url"): - 0 すべて (all) - 1 会員限定 (members only) - 2 一部無料 (partially free) - 3 レンタル (rental) - 4 生放送アーカイブ (live archives) - 5 アップロード動画 (uploaded videos) - """ - - channel_id = self._match_id(url) - fanclub_site_id = self._find_fanclub_site_id(channel_id) - channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') - qs = parse_qs(url) - - return self.playlist_result( - OnDemandPagedList( - functools.partial( - self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages', - filter_dict({ - 'tag': traverse_obj(qs, ('tag', 0)), - 'sort': traverse_obj(qs, ('sort', 0), default='-released_at'), - 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'), - }), - channel_id, f'{channel_id}/videos'), - self._PAGE_SIZE), - playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos') + return super()._extract_from_url(url) -class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE): +class NiconicoChannelPlusChannelLivesIE(SheetaEmbedIE): IE_NAME = 'NiconicoChannelPlus:channel:lives' IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives' _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/lives' _TESTS = [{ 'url': 'https://nicochannel.jp/testman/lives', 'info_dict': { - 'id': 'testman-lives', + 'id': 'nicochannel.jp/testman/lives', 'title': '本番チャンネルプラステストマン-lives', }, 'playlist_mincount': 18, }, { 'url': 'https://nicochannel.jp/testtarou/lives', 'info_dict': { - 'id': 'testtarou-lives', + 'id': 'nicochannel.jp/testtarou/lives', 'title': 'チャンネルプラステスト太郎-lives', }, 'playlist_mincount': 2, }, { 'url': 'https://nicochannel.jp/testjirou/lives', 'info_dict': { - 'id': 'testjirou-lives', - 'title': 'チャンネルプラステスト二郎-lives', + 'id': 'nicochannel.jp/testjirou/lives', + 'title': 'チャンネルプラステスト二郎21-lives', }, 'playlist_mincount': 6, }] def _real_extract(self, url): - """ - API parameters: - live_type: - 1 放送中 (on air) - 2 放送予定 (scheduled live streams, oldest to newest) - 3 過去の放送 - すべて (all ended live streams, newest to oldest) - 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest) - We use "4" instead of "3" because some recently ended live streams could not be downloaded. - """ - - channel_id = self._match_id(url) - fanclub_site_id = self._find_fanclub_site_id(channel_id) - channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') - - return self.playlist_result( - OnDemandPagedList( - functools.partial( - self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages', - { - 'live_type': 4, - }, - channel_id, f'{channel_id}/lives'), - self._PAGE_SIZE), - playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives') + return super()._extract_from_url(url) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py new file mode 100644 index 000000000..3209b1ecd --- /dev/null +++ b/yt_dlp/extractor/sheeta.py @@ -0,0 +1,836 @@ +import base64 +import functools +import hashlib +import json +import random +import re +import string +import urllib.parse + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + OnDemandPagedList, + filter_dict, + get_domain, + int_or_none, + parse_qs, + traverse_obj, + unified_timestamp, + url_or_none, + urlencode_postdata, + urljoin, + update_url_query, +) + + +class SheetaEmbedIE(InfoExtractor): + _NETRC_MACHINE = 'sheeta' + IE_NAME = 'sheeta' + IE_DESC = 'fan club system developed by DWANGO (ドワンゴ)' + _VALID_URL = False + _WEBPAGE_TESTS = [{ + 'url': 'https://qlover.jp/doku/video/smy4caVHR6trSddiG9uCDiy4', + 'info_dict': { + 'id': 'smy4caVHR6trSddiG9uCDiy4', + 'title': '名取さなの毒にも薬にもならないラジオ#39', + 'ext': 'mp4', + 'channel': '名取さなの毒にも薬にもならないラジオ', + 'channel_id': 'qlover.jp/doku', + 'channel_url': 'https://qlover.jp/doku', + 'age_limit': 0, + 'live_status': 'not_live', + 'thumbnail': str, + 'description': 'md5:75c2143a59b4b70141b77ddb485991fd', + 'timestamp': 1711933200, + 'duration': 1872, + 'comment_count': int, + 'view_count': int, + 'tags': ['名取さな', 'どくラジ', '文化放送', 'ラジオ'], + 'upload_date': '20240401', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://itomiku-fc.jp/live/sm4P8x6oVPFBx59bNBGSgKoE', + 'info_dict': { + 'id': 'sm4P8x6oVPFBx59bNBGSgKoE', + 'title': '【3/9(土)14:00~】「all yours」美来の日SP♪', + 'ext': 'mp4', + 'channel': '伊藤美来 Official Fanclub 「all yours」', + 'channel_id': 'itomiku-fc.jp', + 'channel_url': 'https://itomiku-fc.jp', + 'age_limit': 0, + 'live_status': 'was_live', + 'thumbnail': str, + 'description': 'md5:80a6a14db30d8506f70bec6a28a6c4ad', + 'timestamp': 1709964399, + 'duration': 4542, + 'comment_count': int, + 'view_count': int, + 'tags': ['生放送', '生放送アーカイブ'], + 'upload_date': '20240309', + 'release_timestamp': 1709959800, + 'release_date': '20240309', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://canan8181.com/video/smxar9atjfNBn27bHhcTFLyg', + 'info_dict': { + 'id': 'smxar9atjfNBn27bHhcTFLyg', + 'title': '💛【7月】ドネートお礼しながら感想&どきどきトーク【感想会】', + 'ext': 'mp4', + 'channel': 'Canan official fanclub', + 'channel_id': 'canan8181.com', + 'channel_url': 'https://canan8181.com', + 'age_limit': 15, + 'live_status': 'was_live', + 'thumbnail': str, + 'description': 'md5:0cd80e51da82dbb89deae5ea14aad24d', + 'timestamp': 1659182206, + 'duration': 6997, + 'comment_count': int, + 'view_count': int, + 'tags': ['安眠など♡アーカイブ&動画(わらのおうちプラン以上)'], + 'upload_date': '20220730', + 'release_timestamp': 1659175200, + 'release_date': '20220730', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://11audee.jp/audio/smx3ebEZFRnHeaGzUzgi5A98', + 'info_dict': { + 'id': 'smx3ebEZFRnHeaGzUzgi5A98', + 'title': '#相坂湯 第38回 ロコムジカちゃんの歌唱についてモノ申す!? ある意味レアな?鼻声坂くん!', + 'ext': 'm4a', + 'channel': '相坂優歌 湯上がり何飲む?', + 'channel_id': '11audee.jp', + 'channel_url': 'https://11audee.jp', + 'age_limit': 0, + 'live_status': 'not_live', + 'thumbnail': str, + 'description': 'md5:fdf881191f8057aa6af6042fc17fb94c', + 'timestamp': 1710860400, + 'duration': 631, + 'comment_count': int, + 'view_count': int, + 'tags': ['RADIO'], + 'upload_date': '20240319', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://hololive-fc.com/videos', + 'info_dict': { + 'id': 'hololive-fc.com/videos', + 'title': '旧ホロライブ公式ファンクラブ-videos', + 'age_limit': 0, + 'timestamp': 1715652389, + 'upload_date': '20240514', + }, + 'playlist_mincount': 12, + }, { + 'url': 'https://tokinosora-fc.com/videos?vodType=1', + 'info_dict': { + 'id': 'tokinosora-fc.com/videos', + 'title': 'ときのそらオフィシャルファンクラブ-videos', + 'age_limit': 0, + 'timestamp': 1715652399, + 'upload_date': '20240514', + }, + 'playlist_mincount': 18, + }, { + 'url': 'https://01audee.jp/videos?tag=RADIO&vodType=1&sort=display_date', + 'info_dict': { + 'id': '01audee.jp/videos', + 'title': '大熊和奏 朝のささやき-videos', + 'age_limit': 0, + 'timestamp': 1715652369, + 'upload_date': '20240514', + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://qlover.jp/bokuao/lives', + 'info_dict': { + 'id': 'qlover.jp/bokuao/lives', + 'title': '僕が見たかった青空の 「青天のヘキレキ!」-lives', + 'age_limit': 0, + 'timestamp': 1715652429, + 'upload_date': '20240514', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://06audee.jp/lives', + 'info_dict': { + 'id': '06audee.jp/lives', + 'title': '田中ちえ美のたなかのカナタ!-lives', + 'age_limit': 0, + 'timestamp': 1715652369, + 'upload_date': '20240514', + }, + 'playlist_mincount': 5, + }] + + _DOMAIN = None + _API_BASE_URL = None + _FANCLUB_GROUP_ID = None + _FANCLUB_SITE_ID_AUTH = None + _FANCLUB_SITE_ID_INFO = None + _AUTH_INFO = {} + + _AUTH0_BASE64_TRANS = str.maketrans({ + '+': '-', + '/': '_', + '=': None, + }) + _LIST_PAGE_SIZE = 12 + + def _extract_from_url(self, url): + parsed_url = urllib.parse.urlparse(url) + if '/videos' in parsed_url.path: + return self._extract_video_list_page(url) + elif '/lives' in parsed_url.path: + return self._extract_live_list_page(url) + else: + return self._extract_player_page(url) + + def _extract_from_webpage(self, url, webpage): + if 'GTM-KXT7G5G' in webpage or 'NicoGoogleTagManagerDataLayer' in webpage: + yield self._extract_from_url(url) + raise self.StopExtraction() + + def _call_api(self, path, item_id, *args, **kwargs): + return self._download_json(f'{self._API_BASE_URL}/{path}', item_id, *args, **kwargs) + + def _find_fanclub_site_id(self, channel_id): + fanclub_list_json = self._call_api( + 'content_providers/channel_domain', f'channels/{channel_id}', + query={'current_site_domain': urllib.parse.quote(f'https://{self._DOMAIN}/{channel_id}')}, + note='Fetching channel list', errnote='Unable to fetch channel list', + ) + if fanclub_id := traverse_obj( + fanclub_list_json, ('data', 'content_providers', 'id', {int_or_none}), get_all=False): + return fanclub_id + raise ExtractorError(f'Channel {channel_id} does not exist', expected=True) + + def _extract_base_info(self, channel_id): + site_settings = self._download_json( + f'https://{self._DOMAIN}/site/settings.json', None, + note='Fetching site settings', errnote='Unable to fetch site settings') + self.write_debug(f'site_settings = {site_settings!r}') + + self._API_BASE_URL = site_settings['api_base_url'] + self._FANCLUB_GROUP_ID = site_settings['fanclub_group_id'] + self._FANCLUB_SITE_ID_AUTH = site_settings['fanclub_site_id'] + + if channel_id: + self._FANCLUB_SITE_ID_INFO = self._find_fanclub_site_id(channel_id) + else: + self._FANCLUB_SITE_ID_INFO = self._FANCLUB_SITE_ID_AUTH + + @property + def _auth_info(self): + if not self._AUTH_INFO.get(self._DOMAIN): + self._AUTH_INFO[self._DOMAIN] = {} + return self._AUTH_INFO.get(self._DOMAIN) + + @_auth_info.setter + def _auth_info(self, value): + if not self._AUTH_INFO.get(self._DOMAIN): + self._AUTH_INFO[self._DOMAIN] = {} + self._AUTH_INFO[self._DOMAIN].update(value) + + @property + def _channel_base_info(self): + return traverse_obj(self._call_api( + f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}/page_base_info', f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}', + note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False, + ), ('data', 'fanclub_site', {dict})) or {} + + @property + def _channel_user_info(self): + return traverse_obj(self._call_api( + f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}/user_info', f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}', + note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False, + data=json.dumps('null').encode(), + ), ('data', 'fanclub_site', {dict})) or {} + + def _extract_channel_info(self, channel_id): + if channel_id: + full_channel_id = f'{self._DOMAIN}/{channel_id}' + channel_url = f'https://{self._DOMAIN}/{channel_id}' + else: + full_channel_id = self._DOMAIN + channel_url = f'https://{self._DOMAIN}' + + return { + 'channel': self._channel_base_info.get('fanclub_site_name'), + 'channel_id': full_channel_id, + 'channel_url': channel_url, + 'age_limit': traverse_obj(self._channel_user_info, ( + 'content_provider', 'age_limit', {int_or_none})), + } + + def _extract_player_page(self, url): + self._DOMAIN, channel_id, content_code = re.match( + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/(?:live|video|audio)/(?Psm\w+)', url + ).group('domain', 'channel', 'code') + self._extract_base_info(channel_id) + + data_json = self._call_api( + f'video_pages/{content_code}', content_code, headers={'fc_use_device': 'null'}, + note='Fetching video page info', errnote='Unable to fetch video page info', + )['data']['video_page'] + + live_status = self._get_live_status(data_json, content_code) + formats = self._get_formats(data_json, live_status, content_code) + release_timestamp_str = data_json.get('live_scheduled_start_at') + + if live_status == 'is_upcoming': + if release_timestamp_str: + msg = f'This live event will begin at {release_timestamp_str} UTC' + else: + msg = 'This event has not started yet' + self.raise_no_formats(msg, expected=True, video_id=content_code) + + return { + 'id': content_code, + 'formats': formats, + 'live_status': live_status, + 'release_timestamp': unified_timestamp(release_timestamp_str), + **self._extract_channel_info(channel_id), + **traverse_obj(data_json, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'description': ('description', {str}), + 'timestamp': ('display_date', {unified_timestamp}), + 'duration': ('active_video_filename', 'length', {int_or_none}), + 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}), + 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}), + 'tags': ('video_tags', ..., 'tag', {str}), + }), + '__post_extractor': self.extract_comments( + content_code=content_code, + comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), + } + + def _get_comments(self, content_code, comment_group_id): + item_id = f'{content_code}/comments' + + if not comment_group_id: + return None + + comment_access_token = self._call_api( + f'video_pages/{content_code}/comments_user_token', item_id, + note='Getting comment token', errnote='Unable to get comment token', + )['data']['access_token'] + + comment_list, urlh = self._download_json_handle( + 'https://comm-api.sheeta.com/messages.history', video_id=item_id, + note='Fetching comments', errnote='Unable to fetch comments', + headers={'Content-Type': 'application/json'}, expected_status=404, + query={ + 'sort_direction': 'asc', + 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120, + }, + data=json.dumps({ + 'token': comment_access_token, + 'group_id': comment_group_id, + }).encode()) + if urlh.status == 404: + self.report_warning('Unable to fetch comments due to rate limit', content_code) + return + + for comment in traverse_obj(comment_list, ...): + yield traverse_obj(comment, { + 'author': ('nickname', {str}), + 'author_id': ('sender_id', {str}), + 'id': ('id', {str}, {lambda x: x or None}), + 'text': ('message', {str}), + 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}), + 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}), + }, get_all=False) + + def _get_live_status(self, data_json, content_code): + video_type = data_json.get('type') + live_finished_at = data_json.get('live_finished_at') + + if video_type == 'vod': + if live_finished_at: + live_status = 'was_live' + else: + live_status = 'not_live' + elif video_type == 'live': + if not data_json.get('live_started_at'): + return 'is_upcoming' + + if not live_finished_at: + live_status = 'is_live' + else: + live_status = 'was_live' + + video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) + video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) + + self.write_debug( + f'{content_code}: allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.') + + if not (video_allow_dvr_flg and video_convert_to_vod_flg): + raise ExtractorError( + 'Live was ended, there is no video for download', video_id=content_code, expected=True) + else: + raise ExtractorError(f'Unknown type: {video_type!r}', video_id=content_code) + + self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') + return live_status + + def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): + try: + res = self._call_api(query_path, item_id, **query_kwargs) + return traverse_obj(res, dict_path) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg.keys(): + raise e + self.raise_login_required( + expected_code_msg[e.cause.status], metadata_available=True, + method=self._auth_info.get('login_method')) + return None + + def _get_formats(self, data_json, live_status, content_code): + headers = filter_dict({ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'origin': f'https://{self._DOMAIN}', + 'Authorization': self._get_auth_token(), + }) + + formats = [] + if data_json.get('video'): + payload = {} + if data_json.get('type') == 'live' and live_status == 'was_live': + payload = {'broadcast_type': 'dvr'} + + session_id = self._get_authed_info( + f'video_pages/{content_code}/session_ids', f'{content_code}/session', + ('data', 'session_id', {str}), { + 401: 'Members-only content', + 403: 'Login required', + 408: 'Outdated token', + }, data=json.dumps(payload).encode(), headers=headers, + note='Getting session id', errnote='Unable to get session id') + + if session_id: + m3u8_url = data_json['video_stream']['authenticated_url'].format(session_id=session_id) + formats = self._extract_m3u8_formats(m3u8_url, content_code) + elif data_json.get('audio'): + m3u8_url = self._get_authed_info( + f'video_pages/{content_code}/content_access', f'{content_code}/content_access', + ('data', 'resource', {url_or_none}), { + 403: 'Login required', + 404: 'Members-only content', + 408: 'Outdated token', + }, headers=headers, note='Getting content resource', + errnote='Unable to get content resource') + + if m3u8_url: + audio_type = traverse_obj(data_json, ( + 'audio_filename_transcoded_list', lambda _, v: v['url'] == m3u8_url, + 'video_filename_type', 'value', {str}), get_all=False) + if audio_type == 'audio_free': + # fully free audios are always of "audio_paid" + msg = 'You have no right to access the paid content. ' + if traverse_obj(data_json, 'video_free_periods'): + msg += 'There may be some silent parts in this audio' + else: + msg += 'This audio may be completely blank' + self.raise_login_required( + msg, metadata_available=True, method=self._auth_info.get('login_method')) + + formats = [{ + 'url': m3u8_url, + 'format_id': 'audio', + 'protocol': 'm3u8_native', + 'ext': 'm4a', + 'vcodec': 'none', + 'acodec': 'aac', + 'format_note': audio_type, + }] + else: + raise ExtractorError('Unknown media type', video_id=content_code) + + return formats + + def _get_auth_token(self): + if not self._auth_info.get('auth_token'): + try: + self._login() + return self._auth_info.get('auth_token') + except Exception as e: + raise ExtractorError('Unable to login due to unknown reasons') from e + + if self._auth_info.get('auth_token'): + try: + self._refresh_token() + return self._auth_info.get('auth_token') + except Exception as e: + raise ExtractorError('Unable to refresh token due to unknown reasons') from e + + return None + + def _refresh_token(self): + if not (refresh_func := self._auth_info.get('refresh_func')): + return False + + res = self._download_json( + **refresh_func(self._auth_info), expected_status=(400, 403, 404), + note='Refreshing token', errnote='Unable to refresh token') + if error := traverse_obj( + res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): + self.report_warning(f'Unable to refresh token: {error!r}') + elif token := traverse_obj(res, ('data', 'access_token', {str})): + # niconico + self._auth_info = {'auth_token': f'Bearer {token}'} + return True + elif token := traverse_obj(res, ('access_token', {str})): + # auth0 + self._auth_info = {'auth_token': f'Bearer {token}'} + if refresh_token := traverse_obj(res, ('refresh_token', {str})): + self._auth_info = {'refresh_token': refresh_token} + self.cache.store( + self._NETRC_MACHINE, self._auth_info['cache_key'], {self._auth_info['cache_name']: refresh_token}) + return True + self.report_warning('Unable to find new refresh_token') + else: + self.report_warning('Unable to refresh token') + + return False + + def _login(self): + social_login_providers = traverse_obj(self._call_api( + f'fanclub_groups/{self._FANCLUB_GROUP_ID}/login', None), + ('data', 'fanclub_group', 'fanclub_social_login_providers', ..., {dict})) or [] + self.write_debug(f'social_login_providers = {social_login_providers!r}') + + for provider in social_login_providers: + provider_name = traverse_obj(provider, ('social_login_provider', 'provider_name', {str})) + if provider_name == 'ニコニコ': + redirect_url = update_url_query(provider['url'], { + 'client_id': 'FCS{:05d}'.format(provider['id']), + 'redirect_uri': f'https://{self._DOMAIN}/login', + }) + refresh_url = f'{self._API_BASE_URL}/fanclub_groups/{self._FANCLUB_GROUP_ID}/auth/refresh' + return self._niconico_sns_login(redirect_url, refresh_url) + else: + raise ExtractorError(f'Unsupported social login provider: {provider_name}') + + return self._auth0_login() + + def _niconico_sns_login(self, redirect_url, refresh_url): + self._auth_info = {'login_method': 'any'} + mail_tel, password = self._get_login_info() + if not mail_tel: + return + + cache_key = hashlib.md5(f'{self._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() + self._auth_info = {'cache_key': cache_key} + cache_name = 'niconico_sns' + + if cached_cookies := traverse_obj(self.cache.load( + self._NETRC_MACHINE, cache_key), (cache_name, {dict})): + for name, value in cached_cookies.items(): + self._set_cookie(get_domain(redirect_url), name, value) + + if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): + if cached_cookies: + self.cache.store(self._NETRC_MACHINE, cache_key, None) + + self._niconico_login(mail_tel, password) + + if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): + self.report_warning('Unable to get token after login, please check if ' + 'niconico channel plus is authorized to use your niconico account') + return + + self._auth_info = { + 'refresh_func': lambda data: { + 'url_or_request': data['refresh_url'], + 'video_id': None, + 'headers': {'Authorization': data['auth_token']}, + 'data': b'', + }, + 'refresh_url': refresh_url, + 'auth_token': auth_token, + } + + cookies = dict(traverse_obj(self.cookiejar.get_cookies_for_url( + redirect_url), (..., {lambda item: (item.name, item.value)}))) + self.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: cookies}) + + def _niconico_get_token_by_cookies(self, redirect_url): + urlh = self._request_webpage( + redirect_url, None, note='Getting niconico auth status', + expected_status=404, errnote='Unable to get niconico auth status') + if not urlh.url.startswith(f'https://{self._DOMAIN}/login'): + return None + + if not (sns_login_code := traverse_obj(parse_qs(urlh.url), ('code', 0))): + self.report_warning('Unable to get sns login code') + return None + + token = traverse_obj(self._call_api( + f'fanclub_groups/{self._FANCLUB_GROUP_ID}/sns_login', None, fatal=False, + note='Fetching sns login info', errnote='Unable to fetch sns login info', + data=json.dumps({ + 'key_cloak_user': { + 'code': sns_login_code, + 'redirect_uri': f'https://{self._DOMAIN}/login', + }, + 'fanclub_site': {'id': int(self._FANCLUB_SITE_ID_AUTH)}, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'Referer': f'https://{self._DOMAIN}', + }), ('data', 'access_token', {str})) + if token: + return f'Bearer {token}' + + self.report_warning('Unable to get token from sns login info') + return None + + def _niconico_login(self, mail_tel, password): + login_form_strs = { + 'mail_tel': mail_tel, + 'password': password, + } + page, urlh = self._download_webpage_handle( + 'https://account.nicovideo.jp/login/redirector', None, + note='Logging into niconico', errnote='Unable to log into niconico', + data=urlencode_postdata(login_form_strs), + headers={ + 'Referer': 'https://account.nicovideo.jp/login', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if urlh.url.startswith('https://account.nicovideo.jp/login'): + self.report_warning('Unable to log in: bad username or password') + return False + elif urlh.url.startswith('https://account.nicovideo.jp/mfa'): + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', page, 'mfa post url', group='url') + page, urlh = self._download_webpage_handle( + urljoin('https://account.nicovideo.jp/', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._get_tfa_info('6 digits code') + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if urlh.url.startswith('https://account.nicovideo.jp/mfa') or 'formError' in page: + err_msg = self._html_search_regex( + r'formError\b[^>]*>(.*?)', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + return True + + def _auth0_login(self): + self._auth_info = {'login_method': 'password'} + username, password = self._get_login_info() + if not username: + return + + cache_key = hashlib.md5(f'{self._DOMAIN}:{username}:{password}'.encode()).hexdigest() + cache_name = 'refresh' + self._auth_info = { + 'cache_key': cache_key, + 'cache_name': cache_name, + } + + login_info = self._call_api(f'fanclub_sites/{self._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] + self.write_debug(f'login_info = {login_info}') + auth0_web_client_id = login_info['auth0_web_client_id'] + auth0_domain = login_info['fanclub_group']['auth0_domain'] + + token_url = f'https://{auth0_domain}/oauth/token' + redirect_url = f'https://{self._DOMAIN}/login/login-redirect' + + auth0_client = base64.b64encode(json.dumps({ + 'name': 'auth0-spa-js', + 'version': '2.0.6', + }).encode()).decode() + + self._auth_info = {'refresh_func': lambda data: { + 'url_or_request': token_url, + 'video_id': None, + 'headers': {'Auth0-Client': auth0_client}, + 'data': urlencode_postdata({ + 'client_id': auth0_web_client_id, + 'grant_type': 'refresh_token', + 'refresh_token': data['refresh_token'], + 'redirect_uri': redirect_url, + }), + }} + + def random_str(): + return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) + + state = base64.b64encode(random_str().encode()) + nonce = base64.b64encode(random_str().encode()) + code_verifier = random_str().encode() + code_challenge = base64.b64encode( + hashlib.sha256(code_verifier).digest()).decode().translate(self._AUTH0_BASE64_TRANS) + + authorize_url = update_url_query(f'https://{auth0_domain}/authorize', { + 'client_id': auth0_web_client_id, + 'scope': 'openid profile email offline_access', + 'redirect_uri': redirect_url, + 'audience': f'api.{self._DOMAIN}', + 'prompt': 'login', + 'response_type': 'code', + 'response_mode': 'query', + 'state': state, + 'nonce': nonce, + 'code_challenge': code_challenge, + 'code_challenge_method': 'S256', + 'auth0Client': auth0_client, + }) + + if cached_refresh_token := traverse_obj(self.cache.load( + self._NETRC_MACHINE, cache_key), (cache_name, {str})): + self._auth_info = {'refresh_token': cached_refresh_token} + if self._refresh_token(): + self.write_debug('cached tokens updated') + return + self.cache.store(self._NETRC_MACHINE, cache_key, None) + + login_form = self._hidden_inputs(self._download_webpage( + authorize_url, None, note='Getting login form', errnote='Unable to get login form')) + state_obtained = login_form['state'] + login_url = f'https://{auth0_domain}/u/login?state={state_obtained}' + + login_form.update({ + 'username': username, + 'password': password, + 'action': 'default', + }) + + urlh = self._request_webpage( + login_url, None, note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form), expected_status=(400, 404)) + if urlh.status == 400: + self.report_warning('Unable to log in: bad username or password') + return + if not (urlh.status == 404 and urlh.url.startswith(redirect_url)): + self.report_warning('Unable to log in: Unknown login status') + return + + code = parse_qs(urlh.url)['code'][0] + + token_json = self._download_json( + token_url, None, headers={'Auth0-Client': auth0_client}, + note='Getting auth0 tokens', errnote='Unable to get auth0 tokens', + data=urlencode_postdata({ + 'client_id': auth0_web_client_id, + 'code_verifier': code_verifier, + 'grant_type': 'authorization_code', + 'code': code, + 'redirect_uri': redirect_url, + })) + + access_token = token_json['access_token'] + refresh_token = token_json['refresh_token'] + + auth_token = f'Bearer {access_token}' + + self._auth_info = { + 'auth_token': auth_token, + 'refresh_token': refresh_token, + } + + self.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) + + def _fetch_paged_channel_video_list(self, path, query, channel, item_id, page): + response = self._call_api( + path, item_id, query={ + **query, + 'page': (page + 1), + 'per_page': self._LIST_PAGE_SIZE, + }, + headers={'fc_use_device': 'null'}, + note=f'Fetching channel info (page {page + 1})', + errnote=f'Unable to fetch channel info (page {page + 1})') + + for content_code in traverse_obj( + response, ('data', 'video_pages', 'list', ..., 'content_code', {str})): + yield self.url_result('/'.join(filter( + None, [f'https://{self._DOMAIN}', channel, 'video', content_code])), SheetaEmbedIE) + + def _extract_video_list_page(self, url): + """ + API parameters: + sort: + -display_date 公開日が新しい順 (newest to oldest) + display_date 公開日が古い順 (oldest to newest) + -number_of_vod_views 再生数が多い順 (most play count) + number_of_vod_views コメントが多い順 (most comments) + vod_type (is "vodType" in "url"): + 0 すべて (all) + 1 会員限定 (members only) + 2 一部無料 (partially free) + 3 レンタル (rental) + 4 生放送アーカイブ (live archives) + 5 アップロード動画 (uploaded videos) + 7 無料 (free) + """ + + self._DOMAIN, channel_id = re.match( + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/videos', url + ).group('domain', 'channel') + self._extract_base_info(channel_id) + + channel_info = self._extract_channel_info(channel_id) + full_channel_id = channel_info['channel_id'] + channel_name = channel_info['channel'] + qs = parse_qs(url) + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}/video_pages', + filter_dict({ + 'tag': traverse_obj(qs, ('tag', 0)), + 'sort': traverse_obj(qs, ('sort', 0), default='-display_date'), + 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'), + }), + channel_id, f'{full_channel_id}/videos'), + self._LIST_PAGE_SIZE), + playlist_id=f'{full_channel_id}/videos', playlist_title=f'{channel_name}-videos') + + def _extract_live_list_page(self, url): + """ + API parameters: + live_type: + 1 放送中 (on air) + 2 放送予定 (scheduled live streams, oldest to newest) + 3 過去の放送 - すべて (all ended live streams, newest to oldest) + 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest) + We use "4" instead of "3" because some recently ended live streams could not be downloaded. + """ + + self._DOMAIN, channel_id = re.match( + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/lives', url + ).group('domain', 'channel') + self._extract_base_info(channel_id) + + channel_info = self._extract_channel_info(channel_id) + full_channel_id = channel_info['channel_id'] + channel_name = channel_info['channel'] + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{self._FANCLUB_SITE_ID_INFO}/live_pages', + {'live_type': 4}, channel_id, f'{full_channel_id}/lives'), + self._LIST_PAGE_SIZE), + playlist_id=f'{full_channel_id}/lives', playlist_title=f'{channel_name}-lives') From 4632d395bd2411260c6da506178b007ce224b7a9 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 23 May 2024 13:52:35 +0000 Subject: [PATCH 02/11] for github security check: hash credentials with sha-1 instead of md5 --- yt_dlp/extractor/sheeta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 3209b1ecd..b24f25b74 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -532,7 +532,7 @@ def _niconico_sns_login(self, redirect_url, refresh_url): if not mail_tel: return - cache_key = hashlib.md5(f'{self._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() + cache_key = hashlib.sha1(f'{self._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() self._auth_info = {'cache_key': cache_key} cache_name = 'niconico_sns' @@ -640,7 +640,7 @@ def _auth0_login(self): if not username: return - cache_key = hashlib.md5(f'{self._DOMAIN}:{username}:{password}'.encode()).hexdigest() + cache_key = hashlib.sha1(f'{self._DOMAIN}:{username}:{password}'.encode()).hexdigest() cache_name = 'refresh' self._auth_info = { 'cache_key': cache_key, From 34e76c4a9e29f6a89a543171678e02f45822d0a4 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:06:22 +0000 Subject: [PATCH 03/11] new code style --- yt_dlp/extractor/sheeta.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index b24f25b74..77fa940a3 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -16,13 +16,13 @@ get_domain, int_or_none, parse_qs, - traverse_obj, unified_timestamp, + update_url_query, url_or_none, urlencode_postdata, urljoin, - update_url_query, ) +from ..utils.traversal import traverse_obj class SheetaEmbedIE(InfoExtractor): @@ -75,7 +75,7 @@ class SheetaEmbedIE(InfoExtractor): 'release_timestamp': 1709959800, 'release_date': '20240309', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://canan8181.com/video/smxar9atjfNBn27bHhcTFLyg', 'info_dict': { @@ -98,7 +98,7 @@ class SheetaEmbedIE(InfoExtractor): 'release_timestamp': 1659175200, 'release_date': '20220730', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://11audee.jp/audio/smx3ebEZFRnHeaGzUzgi5A98', 'info_dict': { @@ -119,7 +119,7 @@ class SheetaEmbedIE(InfoExtractor): 'tags': ['RADIO'], 'upload_date': '20240319', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://hololive-fc.com/videos', 'info_dict': { @@ -198,7 +198,7 @@ def _extract_from_url(self, url): def _extract_from_webpage(self, url, webpage): if 'GTM-KXT7G5G' in webpage or 'NicoGoogleTagManagerDataLayer' in webpage: yield self._extract_from_url(url) - raise self.StopExtraction() + raise self.StopExtraction def _call_api(self, path, item_id, *args, **kwargs): return self._download_json(f'{self._API_BASE_URL}/{path}', item_id, *args, **kwargs) @@ -274,7 +274,7 @@ def _extract_channel_info(self, channel_id): def _extract_player_page(self, url): self._DOMAIN, channel_id, content_code = re.match( - r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/(?:live|video|audio)/(?Psm\w+)', url + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/(?:live|video|audio)/(?Psm\w+)', url, ).group('domain', 'channel', 'code') self._extract_base_info(channel_id) @@ -390,7 +390,7 @@ def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, ** res = self._call_api(query_path, item_id, **query_kwargs) return traverse_obj(res, dict_path) except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg.keys(): + if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg: raise e self.raise_login_required( expected_code_msg[e.cause.status], metadata_available=True, @@ -621,7 +621,7 @@ def _niconico_login(self, mail_tel, password): urljoin('https://account.nicovideo.jp/', post_url), None, note='Performing MFA', errnote='Unable to complete MFA', data=urlencode_postdata({ - 'otp': self._get_tfa_info('6 digits code') + 'otp': self._get_tfa_info('6 digits code'), }), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) @@ -785,7 +785,7 @@ def _extract_video_list_page(self, url): """ self._DOMAIN, channel_id = re.match( - r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/videos', url + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/videos', url, ).group('domain', 'channel') self._extract_base_info(channel_id) @@ -819,7 +819,7 @@ def _extract_live_list_page(self, url): """ self._DOMAIN, channel_id = re.match( - r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/lives', url + r'https?://(?P[\w.-]+)(/(?P[\w.-]+))?/lives', url, ).group('domain', 'channel') self._extract_base_info(channel_id) From a9ba89fd03213b370843af327649cf9a7cb0e52b Mon Sep 17 00:00:00 2001 From: chocoie Date: Wed, 29 Jan 2025 20:18:44 +0800 Subject: [PATCH 04/11] refactor: extract auth logic --- yt_dlp/extractor/sheeta.py | 655 +++++++++++++++++++------------------ 1 file changed, 336 insertions(+), 319 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 77fa940a3..1221d50f9 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -25,6 +25,327 @@ from ..utils.traversal import traverse_obj +class AuthManager: + _AUTH_INFO = {} + + def __init__(self, ie: 'SheetaEmbedIE'): + self._ie = ie + self._auth_info = {} + + @property + def _auth_info(self): + if not self._AUTH_INFO.get(self._ie._DOMAIN): + self._AUTH_INFO[self._ie._DOMAIN] = {} + return self._AUTH_INFO.get(self._ie._DOMAIN) + + @_auth_info.setter + def _auth_info(self, value): + if not self._AUTH_INFO.get(self._ie._DOMAIN): + self._AUTH_INFO[self._ie._DOMAIN] = {} + self._AUTH_INFO[self._ie._DOMAIN].update(value) + + def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): + try: + res = self._ie._call_api(query_path, item_id, **query_kwargs) + return traverse_obj(res, dict_path) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg: + raise e + self._ie.raise_login_required( + expected_code_msg[e.cause.status], metadata_available=True, + method=self._auth_info.get('login_method')) + return None + + def _get_auth_token(self): + if not self._auth_info.get('auth_token'): + try: + self._login() + return self._auth_info.get('auth_token') + except Exception as e: + raise ExtractorError('Unable to login due to unknown reasons') from e + + if self._auth_info.get('auth_token'): + try: + self._refresh_token() + return self._auth_info.get('auth_token') + except Exception as e: + raise ExtractorError('Unable to refresh token due to unknown reasons') from e + + return None + + def _refresh_token(self): + if not (refresh_func := self._auth_info.get('refresh_func')): + return False + + res = self._ie._download_json( + **refresh_func(self._auth_info), expected_status=(400, 403, 404), + note='Refreshing token', errnote='Unable to refresh token') + if error := traverse_obj( + res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): + self._ie.report_warning(f'Unable to refresh token: {error!r}') + elif token := traverse_obj(res, ('data', 'access_token', {str})): + # niconico + self._auth_info = {'auth_token': f'Bearer {token}'} + return True + elif token := traverse_obj(res, ('access_token', {str})): + # auth0 + self._auth_info = {'auth_token': f'Bearer {token}'} + if refresh_token := traverse_obj(res, ('refresh_token', {str})): + self._auth_info = {'refresh_token': refresh_token} + self._ie.cache.store( + self._ie._NETRC_MACHINE, self._auth_info['cache_key'], {self._auth_info['cache_name']: refresh_token}) + return True + self._ie.report_warning('Unable to find new refresh_token') + else: + self._ie.report_warning('Unable to refresh token') + + return False + + def _login(self): + social_login_providers = traverse_obj(self._ie._call_api( + f'fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/login', None), + ('data', 'fanclub_group', 'fanclub_social_login_providers', ..., {dict})) or [] + self._ie.write_debug(f'social_login_providers = {social_login_providers!r}') + + for provider in social_login_providers: + provider_name = traverse_obj(provider, ('social_login_provider', 'provider_name', {str})) + if provider_name == 'ニコニコ': + redirect_url = update_url_query(provider['url'], { + 'client_id': 'FCS{:05d}'.format(provider['id']), + 'redirect_uri': f'https://{self._ie._DOMAIN}/login', + }) + refresh_url = f'{self._ie._API_BASE_URL}/fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/auth/refresh' + return self._niconico_sns_login(redirect_url, refresh_url) + else: + raise ExtractorError(f'Unsupported social login provider: {provider_name}') + + return self._auth0_login() + + def _niconico_sns_login(self, redirect_url, refresh_url): + self._auth_info = {'login_method': 'any'} + mail_tel, password = self._ie._get_login_info() + if not mail_tel: + return + + cache_key = hashlib.sha1(f'{self._ie._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() + self._auth_info = {'cache_key': cache_key} + cache_name = 'niconico_sns' + + if cached_cookies := traverse_obj(self._ie.cache.load( + self._ie._NETRC_MACHINE, cache_key), (cache_name, {dict})): + for name, value in cached_cookies.items(): + self._ie._set_cookie(get_domain(redirect_url), name, value) + + if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): + if cached_cookies: + self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, None) + + self._niconico_login(mail_tel, password) + + if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): + self._ie.report_warning('Unable to get token after login, please check if ' + 'niconico channel plus is authorized to use your niconico account') + return + + self._auth_info = { + 'refresh_func': lambda data: { + 'url_or_request': data['refresh_url'], + 'video_id': None, + 'headers': {'Authorization': data['auth_token']}, + 'data': b'', + }, + 'refresh_url': refresh_url, + 'auth_token': auth_token, + } + + cookies = dict(traverse_obj(self._ie.cookiejar.get_cookies_for_url( + redirect_url), (..., {lambda item: (item.name, item.value)}))) + self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, {cache_name: cookies}) + + def _niconico_get_token_by_cookies(self, redirect_url): + urlh = self._ie._request_webpage( + redirect_url, None, note='Getting niconico auth status', + expected_status=404, errnote='Unable to get niconico auth status') + if not urlh.url.startswith(f'https://{self._DOMAIN}/login'): + return None + + if not (sns_login_code := traverse_obj(parse_qs(urlh.url), ('code', 0))): + self._ie.report_warning('Unable to get sns login code') + return None + + token = traverse_obj(self._ie._call_api( + f'fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/sns_login', None, fatal=False, + note='Fetching sns login info', errnote='Unable to fetch sns login info', + data=json.dumps({ + 'key_cloak_user': { + 'code': sns_login_code, + 'redirect_uri': f'https://{self._ie._DOMAIN}/login', + }, + 'fanclub_site': {'id': int(self._ie._FANCLUB_SITE_ID_AUTH)}, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'Referer': f'https://{self._ie._DOMAIN}', + }), ('data', 'access_token', {str})) + if token: + return f'Bearer {token}' + + self._ie.report_warning('Unable to get token from sns login info') + return None + + def _niconico_login(self, mail_tel, password): + login_form_strs = { + 'mail_tel': mail_tel, + 'password': password, + } + page, urlh = self._ie._download_webpage_handle( + 'https://account.nicovideo.jp/login/redirector', None, + note='Logging into niconico', errnote='Unable to log into niconico', + data=urlencode_postdata(login_form_strs), + headers={ + 'Referer': 'https://account.nicovideo.jp/login', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if urlh.url.startswith('https://account.nicovideo.jp/login'): + self._ie.report_warning('Unable to log in: bad username or password') + return False + elif urlh.url.startswith('https://account.nicovideo.jp/mfa'): + post_url = self._ie._search_regex( + r']+action=(["\'])(?P.+?)\1', page, 'mfa post url', group='url') + page, urlh = self._ie._download_webpage_handle( + urljoin('https://account.nicovideo.jp/', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._ie._get_tfa_info('6 digits code'), + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if urlh.url.startswith('https://account.nicovideo.jp/mfa') or 'formError' in page: + err_msg = self._ie._html_search_regex( + r'formError\b[^>]*>(.*?)', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self._ie.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + return True + + def _auth0_login(self): + self._auth_info = {'login_method': 'password'} + username, password = self._ie._get_login_info() + if not username: + return + + cache_key = hashlib.sha1(f'{self._ie._DOMAIN}:{username}:{password}'.encode()).hexdigest() + cache_name = 'refresh' + self._auth_info = { + 'cache_key': cache_key, + 'cache_name': cache_name, + } + + login_info = self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] + self._ie.write_debug(f'login_info = {login_info}') + auth0_web_client_id = login_info['auth0_web_client_id'] + auth0_domain = login_info['fanclub_group']['auth0_domain'] + + token_url = f'https://{auth0_domain}/oauth/token' + redirect_url = f'https://{self._ie._DOMAIN}/login/login-redirect' + + auth0_client = base64.b64encode(json.dumps({ + 'name': 'auth0-spa-js', + 'version': '2.0.6', + }).encode()).decode() + + self._auth_info = {'refresh_func': lambda data: { + 'url_or_request': token_url, + 'video_id': None, + 'headers': {'Auth0-Client': auth0_client}, + 'data': urlencode_postdata({ + 'client_id': auth0_web_client_id, + 'grant_type': 'refresh_token', + 'refresh_token': data['refresh_token'], + 'redirect_uri': redirect_url, + }), + }} + + def random_str(): + return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) + + state = base64.b64encode(random_str().encode()) + nonce = base64.b64encode(random_str().encode()) + code_verifier = random_str().encode() + code_challenge = base64.b64encode( + hashlib.sha256(code_verifier).digest()).decode().translate(self._ie._AUTH0_BASE64_TRANS) + + authorize_url = update_url_query(f'https://{auth0_domain}/authorize', { + 'client_id': auth0_web_client_id, + 'scope': 'openid profile email offline_access', + 'redirect_uri': redirect_url, + 'audience': f'api.{self._ie._DOMAIN}', + 'prompt': 'login', + 'response_type': 'code', + 'response_mode': 'query', + 'state': state, + 'nonce': nonce, + 'code_challenge': code_challenge, + 'code_challenge_method': 'S256', + 'auth0Client': auth0_client, + }) + + if cached_refresh_token := traverse_obj(self._ie.cache.load( + self._ie._NETRC_MACHINE, cache_key), (cache_name, {str})): + self._auth_info = {'refresh_token': cached_refresh_token} + if self._refresh_token(): + self._ie.write_debug('cached tokens updated') + return + self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, None) + + login_form = self._ie._hidden_inputs(self._ie._download_webpage( + authorize_url, None, note='Getting login form', errnote='Unable to get login form')) + state_obtained = login_form['state'] + login_url = f'https://{auth0_domain}/u/login?state={state_obtained}' + + login_form.update({ + 'username': username, + 'password': password, + 'action': 'default', + }) + + urlh = self._ie._request_webpage( + login_url, None, note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form), expected_status=(400, 404)) + if urlh.status == 400: + self._ie.report_warning('Unable to log in: bad username or password') + return + if not (urlh.status == 404 and urlh.url.startswith(redirect_url)): + self._ie.report_warning('Unable to log in: Unknown login status') + return + + code = parse_qs(urlh.url)['code'][0] + + token_json = self._ie._download_json( + token_url, None, headers={'Auth0-Client': auth0_client}, + note='Getting auth0 tokens', errnote='Unable to get auth0 tokens', + data=urlencode_postdata({ + 'client_id': auth0_web_client_id, + 'code_verifier': code_verifier, + 'grant_type': 'authorization_code', + 'code': code, + 'redirect_uri': redirect_url, + })) + + access_token = token_json['access_token'] + refresh_token = token_json['refresh_token'] + + auth_token = f'Bearer {access_token}' + + self._auth_info = { + 'auth_token': auth_token, + 'refresh_token': refresh_token, + } + + self._ie.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) + + class SheetaEmbedIE(InfoExtractor): _NETRC_MACHINE = 'sheeta' IE_NAME = 'sheeta' @@ -177,7 +498,6 @@ class SheetaEmbedIE(InfoExtractor): _FANCLUB_GROUP_ID = None _FANCLUB_SITE_ID_AUTH = None _FANCLUB_SITE_ID_INFO = None - _AUTH_INFO = {} _AUTH0_BASE64_TRANS = str.maketrans({ '+': '-', @@ -186,8 +506,18 @@ class SheetaEmbedIE(InfoExtractor): }) _LIST_PAGE_SIZE = 12 + auth_manager: AuthManager = None + + # @classmethod + # def suitable(cls, url): + # return ( + # not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) + # and super().suitable(url)) + def _extract_from_url(self, url): parsed_url = urllib.parse.urlparse(url) + if not self.auth_manager: + self.auth_manager = AuthManager(self) if '/videos' in parsed_url.path: return self._extract_video_list_page(url) elif '/lives' in parsed_url.path: @@ -229,18 +559,6 @@ def _extract_base_info(self, channel_id): else: self._FANCLUB_SITE_ID_INFO = self._FANCLUB_SITE_ID_AUTH - @property - def _auth_info(self): - if not self._AUTH_INFO.get(self._DOMAIN): - self._AUTH_INFO[self._DOMAIN] = {} - return self._AUTH_INFO.get(self._DOMAIN) - - @_auth_info.setter - def _auth_info(self, value): - if not self._AUTH_INFO.get(self._DOMAIN): - self._AUTH_INFO[self._DOMAIN] = {} - self._AUTH_INFO[self._DOMAIN].update(value) - @property def _channel_base_info(self): return traverse_obj(self._call_api( @@ -385,24 +703,12 @@ def _get_live_status(self, data_json, content_code): self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') return live_status - def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): - try: - res = self._call_api(query_path, item_id, **query_kwargs) - return traverse_obj(res, dict_path) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg: - raise e - self.raise_login_required( - expected_code_msg[e.cause.status], metadata_available=True, - method=self._auth_info.get('login_method')) - return None - def _get_formats(self, data_json, live_status, content_code): headers = filter_dict({ 'Content-Type': 'application/json', 'fc_use_device': 'null', 'origin': f'https://{self._DOMAIN}', - 'Authorization': self._get_auth_token(), + 'Authorization': self.auth_manager._get_auth_token(), }) formats = [] @@ -411,7 +717,7 @@ def _get_formats(self, data_json, live_status, content_code): if data_json.get('type') == 'live' and live_status == 'was_live': payload = {'broadcast_type': 'dvr'} - session_id = self._get_authed_info( + session_id = self.auth_manager._get_authed_info( f'video_pages/{content_code}/session_ids', f'{content_code}/session', ('data', 'session_id', {str}), { 401: 'Members-only content', @@ -424,7 +730,7 @@ def _get_formats(self, data_json, live_status, content_code): m3u8_url = data_json['video_stream']['authenticated_url'].format(session_id=session_id) formats = self._extract_m3u8_formats(m3u8_url, content_code) elif data_json.get('audio'): - m3u8_url = self._get_authed_info( + m3u8_url = self.auth_manager._get_authed_info( f'video_pages/{content_code}/content_access', f'{content_code}/content_access', ('data', 'resource', {url_or_none}), { 403: 'Login required', @@ -445,7 +751,7 @@ def _get_formats(self, data_json, live_status, content_code): else: msg += 'This audio may be completely blank' self.raise_login_required( - msg, metadata_available=True, method=self._auth_info.get('login_method')) + msg, metadata_available=True, method=self.auth_manager._auth_info.get('login_method')) formats = [{ 'url': m3u8_url, @@ -461,295 +767,6 @@ def _get_formats(self, data_json, live_status, content_code): return formats - def _get_auth_token(self): - if not self._auth_info.get('auth_token'): - try: - self._login() - return self._auth_info.get('auth_token') - except Exception as e: - raise ExtractorError('Unable to login due to unknown reasons') from e - - if self._auth_info.get('auth_token'): - try: - self._refresh_token() - return self._auth_info.get('auth_token') - except Exception as e: - raise ExtractorError('Unable to refresh token due to unknown reasons') from e - - return None - - def _refresh_token(self): - if not (refresh_func := self._auth_info.get('refresh_func')): - return False - - res = self._download_json( - **refresh_func(self._auth_info), expected_status=(400, 403, 404), - note='Refreshing token', errnote='Unable to refresh token') - if error := traverse_obj( - res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): - self.report_warning(f'Unable to refresh token: {error!r}') - elif token := traverse_obj(res, ('data', 'access_token', {str})): - # niconico - self._auth_info = {'auth_token': f'Bearer {token}'} - return True - elif token := traverse_obj(res, ('access_token', {str})): - # auth0 - self._auth_info = {'auth_token': f'Bearer {token}'} - if refresh_token := traverse_obj(res, ('refresh_token', {str})): - self._auth_info = {'refresh_token': refresh_token} - self.cache.store( - self._NETRC_MACHINE, self._auth_info['cache_key'], {self._auth_info['cache_name']: refresh_token}) - return True - self.report_warning('Unable to find new refresh_token') - else: - self.report_warning('Unable to refresh token') - - return False - - def _login(self): - social_login_providers = traverse_obj(self._call_api( - f'fanclub_groups/{self._FANCLUB_GROUP_ID}/login', None), - ('data', 'fanclub_group', 'fanclub_social_login_providers', ..., {dict})) or [] - self.write_debug(f'social_login_providers = {social_login_providers!r}') - - for provider in social_login_providers: - provider_name = traverse_obj(provider, ('social_login_provider', 'provider_name', {str})) - if provider_name == 'ニコニコ': - redirect_url = update_url_query(provider['url'], { - 'client_id': 'FCS{:05d}'.format(provider['id']), - 'redirect_uri': f'https://{self._DOMAIN}/login', - }) - refresh_url = f'{self._API_BASE_URL}/fanclub_groups/{self._FANCLUB_GROUP_ID}/auth/refresh' - return self._niconico_sns_login(redirect_url, refresh_url) - else: - raise ExtractorError(f'Unsupported social login provider: {provider_name}') - - return self._auth0_login() - - def _niconico_sns_login(self, redirect_url, refresh_url): - self._auth_info = {'login_method': 'any'} - mail_tel, password = self._get_login_info() - if not mail_tel: - return - - cache_key = hashlib.sha1(f'{self._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() - self._auth_info = {'cache_key': cache_key} - cache_name = 'niconico_sns' - - if cached_cookies := traverse_obj(self.cache.load( - self._NETRC_MACHINE, cache_key), (cache_name, {dict})): - for name, value in cached_cookies.items(): - self._set_cookie(get_domain(redirect_url), name, value) - - if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): - if cached_cookies: - self.cache.store(self._NETRC_MACHINE, cache_key, None) - - self._niconico_login(mail_tel, password) - - if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): - self.report_warning('Unable to get token after login, please check if ' - 'niconico channel plus is authorized to use your niconico account') - return - - self._auth_info = { - 'refresh_func': lambda data: { - 'url_or_request': data['refresh_url'], - 'video_id': None, - 'headers': {'Authorization': data['auth_token']}, - 'data': b'', - }, - 'refresh_url': refresh_url, - 'auth_token': auth_token, - } - - cookies = dict(traverse_obj(self.cookiejar.get_cookies_for_url( - redirect_url), (..., {lambda item: (item.name, item.value)}))) - self.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: cookies}) - - def _niconico_get_token_by_cookies(self, redirect_url): - urlh = self._request_webpage( - redirect_url, None, note='Getting niconico auth status', - expected_status=404, errnote='Unable to get niconico auth status') - if not urlh.url.startswith(f'https://{self._DOMAIN}/login'): - return None - - if not (sns_login_code := traverse_obj(parse_qs(urlh.url), ('code', 0))): - self.report_warning('Unable to get sns login code') - return None - - token = traverse_obj(self._call_api( - f'fanclub_groups/{self._FANCLUB_GROUP_ID}/sns_login', None, fatal=False, - note='Fetching sns login info', errnote='Unable to fetch sns login info', - data=json.dumps({ - 'key_cloak_user': { - 'code': sns_login_code, - 'redirect_uri': f'https://{self._DOMAIN}/login', - }, - 'fanclub_site': {'id': int(self._FANCLUB_SITE_ID_AUTH)}, - }).encode(), headers={ - 'Content-Type': 'application/json', - 'fc_use_device': 'null', - 'Referer': f'https://{self._DOMAIN}', - }), ('data', 'access_token', {str})) - if token: - return f'Bearer {token}' - - self.report_warning('Unable to get token from sns login info') - return None - - def _niconico_login(self, mail_tel, password): - login_form_strs = { - 'mail_tel': mail_tel, - 'password': password, - } - page, urlh = self._download_webpage_handle( - 'https://account.nicovideo.jp/login/redirector', None, - note='Logging into niconico', errnote='Unable to log into niconico', - data=urlencode_postdata(login_form_strs), - headers={ - 'Referer': 'https://account.nicovideo.jp/login', - 'Content-Type': 'application/x-www-form-urlencoded', - }) - if urlh.url.startswith('https://account.nicovideo.jp/login'): - self.report_warning('Unable to log in: bad username or password') - return False - elif urlh.url.startswith('https://account.nicovideo.jp/mfa'): - post_url = self._search_regex( - r']+action=(["\'])(?P.+?)\1', page, 'mfa post url', group='url') - page, urlh = self._download_webpage_handle( - urljoin('https://account.nicovideo.jp/', post_url), None, - note='Performing MFA', errnote='Unable to complete MFA', - data=urlencode_postdata({ - 'otp': self._get_tfa_info('6 digits code'), - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - if urlh.url.startswith('https://account.nicovideo.jp/mfa') or 'formError' in page: - err_msg = self._html_search_regex( - r'formError\b[^>]*>(.*?)', page, 'form_error', - default='There\'s an error but the message can\'t be parsed.', - flags=re.DOTALL) - self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') - return False - return True - - def _auth0_login(self): - self._auth_info = {'login_method': 'password'} - username, password = self._get_login_info() - if not username: - return - - cache_key = hashlib.sha1(f'{self._DOMAIN}:{username}:{password}'.encode()).hexdigest() - cache_name = 'refresh' - self._auth_info = { - 'cache_key': cache_key, - 'cache_name': cache_name, - } - - login_info = self._call_api(f'fanclub_sites/{self._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] - self.write_debug(f'login_info = {login_info}') - auth0_web_client_id = login_info['auth0_web_client_id'] - auth0_domain = login_info['fanclub_group']['auth0_domain'] - - token_url = f'https://{auth0_domain}/oauth/token' - redirect_url = f'https://{self._DOMAIN}/login/login-redirect' - - auth0_client = base64.b64encode(json.dumps({ - 'name': 'auth0-spa-js', - 'version': '2.0.6', - }).encode()).decode() - - self._auth_info = {'refresh_func': lambda data: { - 'url_or_request': token_url, - 'video_id': None, - 'headers': {'Auth0-Client': auth0_client}, - 'data': urlencode_postdata({ - 'client_id': auth0_web_client_id, - 'grant_type': 'refresh_token', - 'refresh_token': data['refresh_token'], - 'redirect_uri': redirect_url, - }), - }} - - def random_str(): - return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) - - state = base64.b64encode(random_str().encode()) - nonce = base64.b64encode(random_str().encode()) - code_verifier = random_str().encode() - code_challenge = base64.b64encode( - hashlib.sha256(code_verifier).digest()).decode().translate(self._AUTH0_BASE64_TRANS) - - authorize_url = update_url_query(f'https://{auth0_domain}/authorize', { - 'client_id': auth0_web_client_id, - 'scope': 'openid profile email offline_access', - 'redirect_uri': redirect_url, - 'audience': f'api.{self._DOMAIN}', - 'prompt': 'login', - 'response_type': 'code', - 'response_mode': 'query', - 'state': state, - 'nonce': nonce, - 'code_challenge': code_challenge, - 'code_challenge_method': 'S256', - 'auth0Client': auth0_client, - }) - - if cached_refresh_token := traverse_obj(self.cache.load( - self._NETRC_MACHINE, cache_key), (cache_name, {str})): - self._auth_info = {'refresh_token': cached_refresh_token} - if self._refresh_token(): - self.write_debug('cached tokens updated') - return - self.cache.store(self._NETRC_MACHINE, cache_key, None) - - login_form = self._hidden_inputs(self._download_webpage( - authorize_url, None, note='Getting login form', errnote='Unable to get login form')) - state_obtained = login_form['state'] - login_url = f'https://{auth0_domain}/u/login?state={state_obtained}' - - login_form.update({ - 'username': username, - 'password': password, - 'action': 'default', - }) - - urlh = self._request_webpage( - login_url, None, note='Logging in', errnote='Unable to log in', - data=urlencode_postdata(login_form), expected_status=(400, 404)) - if urlh.status == 400: - self.report_warning('Unable to log in: bad username or password') - return - if not (urlh.status == 404 and urlh.url.startswith(redirect_url)): - self.report_warning('Unable to log in: Unknown login status') - return - - code = parse_qs(urlh.url)['code'][0] - - token_json = self._download_json( - token_url, None, headers={'Auth0-Client': auth0_client}, - note='Getting auth0 tokens', errnote='Unable to get auth0 tokens', - data=urlencode_postdata({ - 'client_id': auth0_web_client_id, - 'code_verifier': code_verifier, - 'grant_type': 'authorization_code', - 'code': code, - 'redirect_uri': redirect_url, - })) - - access_token = token_json['access_token'] - refresh_token = token_json['refresh_token'] - - auth_token = f'Bearer {access_token}' - - self._auth_info = { - 'auth_token': auth_token, - 'refresh_token': refresh_token, - } - - self.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) - def _fetch_paged_channel_video_list(self, path, query, channel, item_id, page): response = self._call_api( path, item_id, query={ @@ -764,7 +781,7 @@ def _fetch_paged_channel_video_list(self, path, query, channel, item_id, page): for content_code in traverse_obj( response, ('data', 'video_pages', 'list', ..., 'content_code', {str})): yield self.url_result('/'.join(filter( - None, [f'https://{self._DOMAIN}', channel, 'video', content_code])), SheetaEmbedIE) + None, [f'https://{self._DOMAIN}', channel, 'video', content_code]))) def _extract_video_list_page(self, url): """ From 68232ed11c31c87c440e81c121426e88eb49dba6 Mon Sep 17 00:00:00 2001 From: ChocoLZS Date: Thu, 30 Jan 2025 13:53:40 +0800 Subject: [PATCH 05/11] wip: use traverse_obj instead --- yt_dlp/extractor/sheeta.py | 46 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 1221d50f9..1c9998d73 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -26,7 +26,12 @@ class AuthManager: - _AUTH_INFO = {} + _AUTH_INFO_CACHE = {} + _AUTH0_BASE64_TRANS = str.maketrans({ + '+': '-', + '/': '_', + '=': None, + }) def __init__(self, ie: 'SheetaEmbedIE'): self._ie = ie @@ -34,15 +39,15 @@ def __init__(self, ie: 'SheetaEmbedIE'): @property def _auth_info(self): - if not self._AUTH_INFO.get(self._ie._DOMAIN): - self._AUTH_INFO[self._ie._DOMAIN] = {} - return self._AUTH_INFO.get(self._ie._DOMAIN) + if not self._AUTH_INFO_CACHE.get(self._ie._DOMAIN): + self._AUTH_INFO_CACHE[self._ie._DOMAIN] = {} + return self._AUTH_INFO_CACHE.get(self._ie._DOMAIN) @_auth_info.setter def _auth_info(self, value): - if not self._AUTH_INFO.get(self._ie._DOMAIN): - self._AUTH_INFO[self._ie._DOMAIN] = {} - self._AUTH_INFO[self._ie._DOMAIN].update(value) + if not self._AUTH_INFO_CACHE.get(self._ie._DOMAIN): + self._AUTH_INFO_CACHE[self._ie._DOMAIN] = {} + self._AUTH_INFO_CACHE[self._ie._DOMAIN].update(value) def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): try: @@ -242,10 +247,12 @@ def _auth0_login(self): 'cache_name': cache_name, } - login_info = self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] + login_info = traverse_obj( + self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None), + ('data', 'fanclub_site')) self._ie.write_debug(f'login_info = {login_info}') - auth0_web_client_id = login_info['auth0_web_client_id'] - auth0_domain = login_info['fanclub_group']['auth0_domain'] + auth0_web_client_id = login_info.get('auth0_web_client_id') + auth0_domain = traverse_obj(login_info, ('fanclub_group', 'auth0_domain')) token_url = f'https://{auth0_domain}/oauth/token' redirect_url = f'https://{self._ie._DOMAIN}/login/login-redirect' @@ -274,7 +281,7 @@ def random_str(): nonce = base64.b64encode(random_str().encode()) code_verifier = random_str().encode() code_challenge = base64.b64encode( - hashlib.sha256(code_verifier).digest()).decode().translate(self._ie._AUTH0_BASE64_TRANS) + hashlib.sha256(code_verifier).digest()).decode().translate(self._AUTH0_BASE64_TRANS) authorize_url = update_url_query(f'https://{auth0_domain}/authorize', { 'client_id': auth0_web_client_id, @@ -320,7 +327,7 @@ def random_str(): self._ie.report_warning('Unable to log in: Unknown login status') return - code = parse_qs(urlh.url)['code'][0] + code = traverse_obj(parse_qs(urlh.url), ('code', 0)) token_json = self._ie._download_json( token_url, None, headers={'Auth0-Client': auth0_client}, @@ -333,8 +340,8 @@ def random_str(): 'redirect_uri': redirect_url, })) - access_token = token_json['access_token'] - refresh_token = token_json['refresh_token'] + access_token = token_json.get('access_token') + refresh_token = token_json.get('refresh_token') auth_token = f'Bearer {access_token}' @@ -499,21 +506,10 @@ class SheetaEmbedIE(InfoExtractor): _FANCLUB_SITE_ID_AUTH = None _FANCLUB_SITE_ID_INFO = None - _AUTH0_BASE64_TRANS = str.maketrans({ - '+': '-', - '/': '_', - '=': None, - }) _LIST_PAGE_SIZE = 12 auth_manager: AuthManager = None - # @classmethod - # def suitable(cls, url): - # return ( - # not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) - # and super().suitable(url)) - def _extract_from_url(self, url): parsed_url = urllib.parse.urlparse(url) if not self.auth_manager: From a9fe89e291541eb1d5de43f8d01abac0a8bb4f8e Mon Sep 17 00:00:00 2001 From: ChocoLZS Date: Thu, 30 Jan 2025 15:59:56 +0800 Subject: [PATCH 06/11] revert: traverse_obj & use direct cache key --- yt_dlp/extractor/sheeta.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 1c9998d73..0cac2f544 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -132,7 +132,7 @@ def _niconico_sns_login(self, redirect_url, refresh_url): if not mail_tel: return - cache_key = hashlib.sha1(f'{self._ie._DOMAIN}:{mail_tel}:{password}'.encode()).hexdigest() + cache_key = f'{self._ie._DOMAIN}:{mail_tel}' self._auth_info = {'cache_key': cache_key} cache_name = 'niconico_sns' @@ -240,19 +240,17 @@ def _auth0_login(self): if not username: return - cache_key = hashlib.sha1(f'{self._ie._DOMAIN}:{username}:{password}'.encode()).hexdigest() + cache_key = f'{self._ie._DOMAIN}:{username}' cache_name = 'refresh' self._auth_info = { 'cache_key': cache_key, 'cache_name': cache_name, } - login_info = traverse_obj( - self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None), - ('data', 'fanclub_site')) + login_info = self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] self._ie.write_debug(f'login_info = {login_info}') - auth0_web_client_id = login_info.get('auth0_web_client_id') - auth0_domain = traverse_obj(login_info, ('fanclub_group', 'auth0_domain')) + auth0_web_client_id = login_info['auth0_web_client_id'] + auth0_domain = login_info['fanclub_group']['auth0_domain'] token_url = f'https://{auth0_domain}/oauth/token' redirect_url = f'https://{self._ie._DOMAIN}/login/login-redirect' @@ -327,7 +325,7 @@ def random_str(): self._ie.report_warning('Unable to log in: Unknown login status') return - code = traverse_obj(parse_qs(urlh.url), ('code', 0)) + code = parse_qs(urlh.url)['code'][0] token_json = self._ie._download_json( token_url, None, headers={'Auth0-Client': auth0_client}, @@ -340,8 +338,8 @@ def random_str(): 'redirect_uri': redirect_url, })) - access_token = token_json.get('access_token') - refresh_token = token_json.get('refresh_token') + access_token = token_json['access_token'] + refresh_token = token_json['refresh_token'] auth_token = f'Bearer {access_token}' From 346653488d80a828aed35211088972cbdd173db3 Mon Sep 17 00:00:00 2001 From: ChocoLZS Date: Thu, 30 Jan 2025 16:30:35 +0800 Subject: [PATCH 07/11] refactor: refresh token logic --- yt_dlp/extractor/sheeta.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 0cac2f544..ce0e08c4a 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -1,4 +1,5 @@ import base64 +import enum import functools import hashlib import json @@ -25,6 +26,11 @@ from ..utils.traversal import traverse_obj +class AuthType(enum.Enum): + AUTH0 = 'auth0' + NICONICO = 'niconico' + + class AuthManager: _AUTH_INFO_CACHE = {} _AUTH0_BASE64_TRANS = str.maketrans({ @@ -79,11 +85,17 @@ def _get_auth_token(self): return None def _refresh_token(self): - if not (refresh_func := self._auth_info.get('refresh_func')): + if not (refresh_func_params := self._auth_info.get('refresh_func_params')): return False + if self._auth_info.get('auth_type') == AuthType.AUTH0: + refresh_func_params['data'] = urlencode_postdata({ + **refresh_func_params['data'], + 'refresh_token': self._auth_info.get('refresh_token'), + }) + res = self._ie._download_json( - **refresh_func(self._auth_info), expected_status=(400, 403, 404), + **refresh_func_params, expected_status=(400, 403, 404), note='Refreshing token', errnote='Unable to refresh token') if error := traverse_obj( res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): @@ -127,7 +139,7 @@ def _login(self): return self._auth0_login() def _niconico_sns_login(self, redirect_url, refresh_url): - self._auth_info = {'login_method': 'any'} + self._auth_info = {'login_method': 'any', 'auth_type': AuthType.NICONICO} mail_tel, password = self._ie._get_login_info() if not mail_tel: return @@ -153,13 +165,12 @@ def _niconico_sns_login(self, redirect_url, refresh_url): return self._auth_info = { - 'refresh_func': lambda data: { - 'url_or_request': data['refresh_url'], + 'refresh_func_params': { + 'url_or_request': refresh_url, 'video_id': None, - 'headers': {'Authorization': data['auth_token']}, + 'headers': {'Authorization': auth_token}, 'data': b'', }, - 'refresh_url': refresh_url, 'auth_token': auth_token, } @@ -235,7 +246,7 @@ def _niconico_login(self, mail_tel, password): return True def _auth0_login(self): - self._auth_info = {'login_method': 'password'} + self._auth_info = {'login_method': 'password', 'auth_type': AuthType.AUTH0} username, password = self._ie._get_login_info() if not username: return @@ -260,17 +271,15 @@ def _auth0_login(self): 'version': '2.0.6', }).encode()).decode() - self._auth_info = {'refresh_func': lambda data: { + self._auth_info = {'refresh_func_params': { 'url_or_request': token_url, 'video_id': None, 'headers': {'Auth0-Client': auth0_client}, - 'data': urlencode_postdata({ + 'data': { 'client_id': auth0_web_client_id, 'grant_type': 'refresh_token', - 'refresh_token': data['refresh_token'], 'redirect_uri': redirect_url, - }), - }} + }}} def random_str(): return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) From d567311b9c427df224ff909eb6f216405342f992 Mon Sep 17 00:00:00 2001 From: ChocoLZS Date: Thu, 30 Jan 2025 20:02:19 +0800 Subject: [PATCH 08/11] chore: rm type annotation --- yt_dlp/extractor/sheeta.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index ce0e08c4a..868623e50 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -27,8 +27,8 @@ class AuthType(enum.Enum): - AUTH0 = 'auth0' - NICONICO = 'niconico' + OAUTH_AUTH0 = 'auth0' + SOCIAL_NICONICO = 'niconico' class AuthManager: @@ -39,7 +39,7 @@ class AuthManager: '=': None, }) - def __init__(self, ie: 'SheetaEmbedIE'): + def __init__(self, ie): self._ie = ie self._auth_info = {} @@ -55,7 +55,7 @@ def _auth_info(self, value): self._AUTH_INFO_CACHE[self._ie._DOMAIN] = {} self._AUTH_INFO_CACHE[self._ie._DOMAIN].update(value) - def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): + def get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): try: res = self._ie._call_api(query_path, item_id, **query_kwargs) return traverse_obj(res, dict_path) @@ -67,7 +67,7 @@ def _get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, ** method=self._auth_info.get('login_method')) return None - def _get_auth_token(self): + def get_auth_token(self): if not self._auth_info.get('auth_token'): try: self._login() @@ -88,7 +88,7 @@ def _refresh_token(self): if not (refresh_func_params := self._auth_info.get('refresh_func_params')): return False - if self._auth_info.get('auth_type') == AuthType.AUTH0: + if self._auth_info.get('auth_type') == AuthType.OAUTH_AUTH0: refresh_func_params['data'] = urlencode_postdata({ **refresh_func_params['data'], 'refresh_token': self._auth_info.get('refresh_token'), @@ -139,7 +139,7 @@ def _login(self): return self._auth0_login() def _niconico_sns_login(self, redirect_url, refresh_url): - self._auth_info = {'login_method': 'any', 'auth_type': AuthType.NICONICO} + self._auth_info = {'login_method': 'any', 'auth_type': AuthType.SOCIAL_NICONICO} mail_tel, password = self._ie._get_login_info() if not mail_tel: return @@ -515,7 +515,7 @@ class SheetaEmbedIE(InfoExtractor): _LIST_PAGE_SIZE = 12 - auth_manager: AuthManager = None + auth_manager = None def _extract_from_url(self, url): parsed_url = urllib.parse.urlparse(url) @@ -711,7 +711,7 @@ def _get_formats(self, data_json, live_status, content_code): 'Content-Type': 'application/json', 'fc_use_device': 'null', 'origin': f'https://{self._DOMAIN}', - 'Authorization': self.auth_manager._get_auth_token(), + 'Authorization': self.auth_manager.get_auth_token(), }) formats = [] @@ -720,7 +720,7 @@ def _get_formats(self, data_json, live_status, content_code): if data_json.get('type') == 'live' and live_status == 'was_live': payload = {'broadcast_type': 'dvr'} - session_id = self.auth_manager._get_authed_info( + session_id = self.auth_manager.get_authed_info( f'video_pages/{content_code}/session_ids', f'{content_code}/session', ('data', 'session_id', {str}), { 401: 'Members-only content', @@ -733,7 +733,7 @@ def _get_formats(self, data_json, live_status, content_code): m3u8_url = data_json['video_stream']['authenticated_url'].format(session_id=session_id) formats = self._extract_m3u8_formats(m3u8_url, content_code) elif data_json.get('audio'): - m3u8_url = self.auth_manager._get_authed_info( + m3u8_url = self.auth_manager.get_authed_info( f'video_pages/{content_code}/content_access', f'{content_code}/content_access', ('data', 'resource', {url_or_none}), { 403: 'Login required', From f902e219b474b08d6852c805d7612db83bde36b6 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 31 Jan 2025 11:35:05 +0000 Subject: [PATCH 09/11] wip --- yt_dlp/extractor/sheeta.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 868623e50..8221d5e1b 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -95,7 +95,7 @@ def _refresh_token(self): }) res = self._ie._download_json( - **refresh_func_params, expected_status=(400, 403, 404), + **refresh_func_params, video_id=None, expected_status=(400, 403, 404), note='Refreshing token', errnote='Unable to refresh token') if error := traverse_obj( res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): @@ -167,7 +167,6 @@ def _niconico_sns_login(self, redirect_url, refresh_url): self._auth_info = { 'refresh_func_params': { 'url_or_request': refresh_url, - 'video_id': None, 'headers': {'Authorization': auth_token}, 'data': b'', }, @@ -271,15 +270,17 @@ def _auth0_login(self): 'version': '2.0.6', }).encode()).decode() - self._auth_info = {'refresh_func_params': { - 'url_or_request': token_url, - 'video_id': None, - 'headers': {'Auth0-Client': auth0_client}, - 'data': { - 'client_id': auth0_web_client_id, - 'grant_type': 'refresh_token', - 'redirect_uri': redirect_url, - }}} + self._auth_info = { + 'refresh_func_params': { + 'url_or_request': token_url, + 'headers': {'Auth0-Client': auth0_client}, + 'data': { + 'client_id': auth0_web_client_id, + 'grant_type': 'refresh_token', + 'redirect_uri': redirect_url, + }, + }, + } def random_str(): return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) From f413b6a487fdb5d09f0628a36fcf44963ebf0a57 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 4 Feb 2025 17:15:09 +0000 Subject: [PATCH 10/11] minor fixes --- yt_dlp/extractor/sheeta.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 8221d5e1b..1f0ae4851 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -181,7 +181,7 @@ def _niconico_get_token_by_cookies(self, redirect_url): urlh = self._ie._request_webpage( redirect_url, None, note='Getting niconico auth status', expected_status=404, errnote='Unable to get niconico auth status') - if not urlh.url.startswith(f'https://{self._DOMAIN}/login'): + if not urlh.url.startswith(f'https://{self._ie._DOMAIN}/login'): return None if not (sns_login_code := traverse_obj(parse_qs(urlh.url), ('code', 0))): @@ -245,7 +245,7 @@ def _niconico_login(self, mail_tel, password): return True def _auth0_login(self): - self._auth_info = {'login_method': 'password', 'auth_type': AuthType.AUTH0} + self._auth_info = {'login_method': 'password', 'auth_type': AuthType.OAUTH_AUTH0} username, password = self._ie._get_login_info() if not username: return @@ -358,7 +358,7 @@ def random_str(): 'refresh_token': refresh_token, } - self._ie.cache.store(self._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) + self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) class SheetaEmbedIE(InfoExtractor): From 90cba0ac2a8d50690e54508bd68946926d4d0ead Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 7 Feb 2025 03:36:56 +0000 Subject: [PATCH 11/11] Introduce "SheetaAuth0Client"; drop old API calls; drop old IEs Time stands with us. --- README.md | 2 +- yt_dlp/extractor/_extractors.py | 5 - yt_dlp/extractor/niconicochannelplus.py | 180 ------ yt_dlp/extractor/sheeta.py | 692 ++++++++++++++---------- 4 files changed, 393 insertions(+), 486 deletions(-) delete mode 100644 yt_dlp/extractor/niconicochannelplus.py diff --git a/README.md b/README.md index 45c56434a..a32b8d29a 100644 --- a/README.md +++ b/README.md @@ -1812,7 +1812,7 @@ #### hotstar * `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` -#### niconicochannelplus +#### sheeta * `max_comments`: Maximum number of comments to extract - default is `120` #### tiktok diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index efac0e3b1..a53daaaf9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1352,11 +1352,6 @@ NicovideoSearchURLIE, NicovideoTagURLIE, ) -from .niconicochannelplus import ( - NiconicoChannelPlusChannelLivesIE, - NiconicoChannelPlusChannelVideosIE, - NiconicoChannelPlusIE, -) from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( CPTwentyFourIE, diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py deleted file mode 100644 index 8cfede78b..000000000 --- a/yt_dlp/extractor/niconicochannelplus.py +++ /dev/null @@ -1,180 +0,0 @@ -from .sheeta import SheetaEmbedIE - - -class NiconicoChannelPlusIE(SheetaEmbedIE): - IE_NAME = 'NiconicoChannelPlus' - IE_DESC = 'ニコニコチャンネルプラス' - _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' - _TESTS = [{ - 'url': 'https://nicochannel.jp/kaorin/video/sm89Hd4SEduy8WTsb4KxAhBL', - 'info_dict': { - 'id': 'sm89Hd4SEduy8WTsb4KxAhBL', - 'title': '前田佳織里の世界攻略計画 #2', - 'ext': 'mp4', - 'channel': '前田佳織里の世界攻略計画', - 'channel_id': 'nicochannel.jp/kaorin', - 'channel_url': 'https://nicochannel.jp/kaorin', - 'live_status': 'not_live', - 'thumbnail': str, - 'description': 'md5:02573495c8be849c0cb88df6f1b85f8b', - 'timestamp': 1644546015, - 'duration': 4093, - 'comment_count': int, - 'view_count': int, - 'tags': ['前田攻略', '前田佳織里', '前田佳織里の世界攻略計画'], - 'upload_date': '20220211', - }, - 'params': { - 'skip_download': True, - }, - }, { - # age limited video; test purpose channel. - 'url': 'https://nicochannel.jp/testman/video/smJPZg3nwAxP8UECPsHDiCGM', - 'info_dict': { - 'id': 'smJPZg3nwAxP8UECPsHDiCGM', - 'title': 'DW_itaba_LSM検証_1080p60fps_9000Kbpsで打ち上げたときの挙動確認(パススルーあり)', - 'ext': 'mp4', - 'channel': '本番チャンネルプラステストマン', - 'channel_id': 'nicochannel.jp/testman', - 'channel_url': 'https://nicochannel.jp/testman', - 'age_limit': 18, - 'live_status': 'was_live', - 'thumbnail': str, - 'description': 'TEST', - 'timestamp': 1701329428, - 'duration': 229, - 'comment_count': int, - 'view_count': int, - 'tags': ['検証用'], - 'upload_date': '20231130', - 'release_timestamp': 1701328800, - 'release_date': '20231130', - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - return super()._extract_from_url(url) - - -class NiconicoChannelPlusChannelVideosIE(SheetaEmbedIE): - IE_NAME = 'NiconicoChannelPlus:channel:videos' - IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos' - _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/videos(?:\?.*)?' - _TESTS = [{ - # query: None - 'url': 'https://nicochannel.jp/testman/videos', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 18, - }, { - # query: None - 'url': 'https://nicochannel.jp/testtarou/videos', - 'info_dict': { - 'id': 'nicochannel.jp/testtarou/videos', - 'title': 'チャンネルプラステスト太郎-videos', - }, - 'playlist_mincount': 2, - }, { - # query: None - 'url': 'https://nicochannel.jp/testjirou/videos', - 'info_dict': { - 'id': 'nicochannel.jp/testjirou/videos', - 'title': 'チャンネルプラステスト二郎21-videos', - }, - 'playlist_mincount': 12, - }, { - # query: tag - 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 6, - }, { - # query: vodType - 'url': 'https://nicochannel.jp/testman/videos?vodType=1', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 18, - }, { - # query: sort - 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 18, - }, { - # query: tag, vodType - 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 6, - }, { - # query: tag, sort - 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 6, - }, { - # query: vodType, sort - 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 18, - }, { - # query: tag, vodType, sort - 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', - 'info_dict': { - 'id': 'nicochannel.jp/testman/videos', - 'title': '本番チャンネルプラステストマン-videos', - }, - 'playlist_mincount': 6, - }] - - def _real_extract(self, url): - return super()._extract_from_url(url) - - -class NiconicoChannelPlusChannelLivesIE(SheetaEmbedIE): - IE_NAME = 'NiconicoChannelPlus:channel:lives' - IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives' - _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/lives' - _TESTS = [{ - 'url': 'https://nicochannel.jp/testman/lives', - 'info_dict': { - 'id': 'nicochannel.jp/testman/lives', - 'title': '本番チャンネルプラステストマン-lives', - }, - 'playlist_mincount': 18, - }, { - 'url': 'https://nicochannel.jp/testtarou/lives', - 'info_dict': { - 'id': 'nicochannel.jp/testtarou/lives', - 'title': 'チャンネルプラステスト太郎-lives', - }, - 'playlist_mincount': 2, - }, { - 'url': 'https://nicochannel.jp/testjirou/lives', - 'info_dict': { - 'id': 'nicochannel.jp/testjirou/lives', - 'title': 'チャンネルプラステスト二郎21-lives', - }, - 'playlist_mincount': 6, - }] - - def _real_extract(self, url): - return super()._extract_from_url(url) diff --git a/yt_dlp/extractor/sheeta.py b/yt_dlp/extractor/sheeta.py index 1f0ae4851..6150eb466 100644 --- a/yt_dlp/extractor/sheeta.py +++ b/yt_dlp/extractor/sheeta.py @@ -1,5 +1,6 @@ import base64 -import enum +import copy +import datetime as dt import functools import hashlib import json @@ -13,8 +14,10 @@ from ..utils import ( ExtractorError, OnDemandPagedList, + extract_attributes, filter_dict, - get_domain, + get_element_html_by_attribute, + get_element_html_by_id, int_or_none, parse_qs, unified_timestamp, @@ -26,13 +29,7 @@ from ..utils.traversal import traverse_obj -class AuthType(enum.Enum): - OAUTH_AUTH0 = 'auth0' - SOCIAL_NICONICO = 'niconico' - - -class AuthManager: - _AUTH_INFO_CACHE = {} +class SheetaAuth0Client: _AUTH0_BASE64_TRANS = str.maketrans({ '+': '-', '/': '_', @@ -40,196 +37,124 @@ class AuthManager: }) def __init__(self, ie): + self._section = 'sheeta' + self._mem_cache = {} + self._context = {} self._ie = ie - self._auth_info = {} + + def _load(self, domain, username): + if data := traverse_obj(self._mem_cache, (domain, username), default={}): + return data + + if data := traverse_obj(self._ie.cache.load(self._section, domain), username, default={}): + if domain not in self._mem_cache: + self._mem_cache[domain] = {} + self._mem_cache[domain][username] = data + + return data + + def _store(self, domain, username, data): + if not self._mem_cache.get(domain, {}): + self._mem_cache[domain] = {} + self._mem_cache[domain][username] = data + + self._ie.cache.store(self._section, domain, self._mem_cache[domain]) @property def _auth_info(self): - if not self._AUTH_INFO_CACHE.get(self._ie._DOMAIN): - self._AUTH_INFO_CACHE[self._ie._DOMAIN] = {} - return self._AUTH_INFO_CACHE.get(self._ie._DOMAIN) + if not self._context or not self._context['username']: + return {} + return self._load(self._context['domain'], self._context['username']) @_auth_info.setter def _auth_info(self, value): - if not self._AUTH_INFO_CACHE.get(self._ie._DOMAIN): - self._AUTH_INFO_CACHE[self._ie._DOMAIN] = {} - self._AUTH_INFO_CACHE[self._ie._DOMAIN].update(value) - - def get_authed_info(self, query_path, item_id, dict_path, expected_code_msg, **query_kwargs): - try: - res = self._ie._call_api(query_path, item_id, **query_kwargs) - return traverse_obj(res, dict_path) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg: - raise e - self._ie.raise_login_required( - expected_code_msg[e.cause.status], metadata_available=True, - method=self._auth_info.get('login_method')) - return None - - def get_auth_token(self): - if not self._auth_info.get('auth_token'): - try: - self._login() - return self._auth_info.get('auth_token') - except Exception as e: - raise ExtractorError('Unable to login due to unknown reasons') from e - - if self._auth_info.get('auth_token'): - try: - self._refresh_token() - return self._auth_info.get('auth_token') - except Exception as e: - raise ExtractorError('Unable to refresh token due to unknown reasons') from e - - return None - - def _refresh_token(self): - if not (refresh_func_params := self._auth_info.get('refresh_func_params')): - return False - - if self._auth_info.get('auth_type') == AuthType.OAUTH_AUTH0: - refresh_func_params['data'] = urlencode_postdata({ - **refresh_func_params['data'], - 'refresh_token': self._auth_info.get('refresh_token'), - }) - - res = self._ie._download_json( - **refresh_func_params, video_id=None, expected_status=(400, 403, 404), - note='Refreshing token', errnote='Unable to refresh token') - if error := traverse_obj( - res, ('error', 'message', {lambda x: base64.b64decode(x).decode()}), ('error', 'message')): - self._ie.report_warning(f'Unable to refresh token: {error!r}') - elif token := traverse_obj(res, ('data', 'access_token', {str})): - # niconico - self._auth_info = {'auth_token': f'Bearer {token}'} - return True - elif token := traverse_obj(res, ('access_token', {str})): - # auth0 - self._auth_info = {'auth_token': f'Bearer {token}'} - if refresh_token := traverse_obj(res, ('refresh_token', {str})): - self._auth_info = {'refresh_token': refresh_token} - self._ie.cache.store( - self._ie._NETRC_MACHINE, self._auth_info['cache_key'], {self._auth_info['cache_name']: refresh_token}) - return True - self._ie.report_warning('Unable to find new refresh_token') - else: - self._ie.report_warning('Unable to refresh token') - - return False - - def _login(self): - social_login_providers = traverse_obj(self._ie._call_api( - f'fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/login', None), - ('data', 'fanclub_group', 'fanclub_social_login_providers', ..., {dict})) or [] - self._ie.write_debug(f'social_login_providers = {social_login_providers!r}') - - for provider in social_login_providers: - provider_name = traverse_obj(provider, ('social_login_provider', 'provider_name', {str})) - if provider_name == 'ニコニコ': - redirect_url = update_url_query(provider['url'], { - 'client_id': 'FCS{:05d}'.format(provider['id']), - 'redirect_uri': f'https://{self._ie._DOMAIN}/login', - }) - refresh_url = f'{self._ie._API_BASE_URL}/fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/auth/refresh' - return self._niconico_sns_login(redirect_url, refresh_url) - else: - raise ExtractorError(f'Unsupported social login provider: {provider_name}') - - return self._auth0_login() - - def _niconico_sns_login(self, redirect_url, refresh_url): - self._auth_info = {'login_method': 'any', 'auth_type': AuthType.SOCIAL_NICONICO} - mail_tel, password = self._ie._get_login_info() - if not mail_tel: + if not self._context or not self._context['username']: return - cache_key = f'{self._ie._DOMAIN}:{mail_tel}' - self._auth_info = {'cache_key': cache_key} - cache_name = 'niconico_sns' + domain, username = self._context['domain'], self._context['username'] - if cached_cookies := traverse_obj(self._ie.cache.load( - self._ie._NETRC_MACHINE, cache_key), (cache_name, {dict})): - for name, value in cached_cookies.items(): - self._ie._set_cookie(get_domain(redirect_url), name, value) + data = self._load(domain, username) + self._store(domain, username, {**data, **value}) - if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): - if cached_cookies: - self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, None) + def get_token(self): + if not (username := self._login_info[0]): + return + self._context = {'username': username, 'domain': self._ie._DOMAIN} - self._niconico_login(mail_tel, password) + try: + if self._refresh_token(): + # always refresh if possible + return self._auth_info['auth_token'] + if auth_token := self._auth_info.get('auth_token'): + # unable to refresh, check the cache + return auth_token + except Exception as e: + self._ie.report_warning(f'Unable to refresh token: {e}') - if not (auth_token := self._niconico_get_token_by_cookies(redirect_url)): - self._ie.report_warning('Unable to get token after login, please check if ' - 'niconico channel plus is authorized to use your niconico account') - return + try: + self._login() + return self._auth_info['auth_token'] + except Exception as e: + self._ie.report_warning(f'Unable to get token: {e}') - self._auth_info = { - 'refresh_func_params': { - 'url_or_request': refresh_url, - 'headers': {'Authorization': auth_token}, - 'data': b'', - }, - 'auth_token': auth_token, - } - - cookies = dict(traverse_obj(self._ie.cookiejar.get_cookies_for_url( - redirect_url), (..., {lambda item: (item.name, item.value)}))) - self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, {cache_name: cookies}) - - def _niconico_get_token_by_cookies(self, redirect_url): - urlh = self._ie._request_webpage( - redirect_url, None, note='Getting niconico auth status', - expected_status=404, errnote='Unable to get niconico auth status') - if not urlh.url.startswith(f'https://{self._ie._DOMAIN}/login'): - return None - - if not (sns_login_code := traverse_obj(parse_qs(urlh.url), ('code', 0))): - self._ie.report_warning('Unable to get sns login code') - return None - - token = traverse_obj(self._ie._call_api( - f'fanclub_groups/{self._ie._FANCLUB_GROUP_ID}/sns_login', None, fatal=False, - note='Fetching sns login info', errnote='Unable to fetch sns login info', - data=json.dumps({ - 'key_cloak_user': { - 'code': sns_login_code, - 'redirect_uri': f'https://{self._ie._DOMAIN}/login', - }, - 'fanclub_site': {'id': int(self._ie._FANCLUB_SITE_ID_AUTH)}, - }).encode(), headers={ - 'Content-Type': 'application/json', - 'fc_use_device': 'null', - 'Referer': f'https://{self._ie._DOMAIN}', - }), ('data', 'access_token', {str})) - if token: - return f'Bearer {token}' - - self._ie.report_warning('Unable to get token from sns login info') return None - def _niconico_login(self, mail_tel, password): - login_form_strs = { - 'mail_tel': mail_tel, + def clear_token(self): + self._auth_info = {'auth_token': ''} + + def _refresh_token(self): + if not (refresh_params := copy.deepcopy(self._auth_info.get('refresh_params'))): + return False + + refresh_params['data'] = urlencode_postdata(filter_dict({ + **refresh_params['data'], + 'refresh_token': self._auth_info.get('refresh_token'), + })) + + res = self._ie._download_json( + **refresh_params, video_id=None, expected_status=(400, 403, 404), + note='Refreshing token', errnote='Unable to refresh token') + if token := res.get('access_token'): + self._auth_info = {'auth_token': f'Bearer {token}'} + if refresh_token := res.get('refresh_token'): + self._auth_info = {'refresh_token': refresh_token} + return True + self._ie.report_warning('Unable to find new refresh_token') + return False + + raise ExtractorError(f'Unable to refresh token: {res!r}') + + @property + def _login_info(self): + return self._ie._get_login_info(netrc_machine=self._ie._DOMAIN) + + def _auth0_niconico_login(self, username, password, login_url): + page = self._ie._download_webpage( + login_url, None, data=urlencode_postdata({'connection': 'niconico'}), + note='Fetching niconico login page', errnote='Unable to fetch niconico login page') + niconico_login_url = urljoin( + 'https://account.nicovideo.jp/', extract_attributes(get_element_html_by_id('login_form', page, tag='form'))['action']) + + login_form = { + 'auth_id': dt.datetime.now(), + 'mail_tel': username, 'password': password, } page, urlh = self._ie._download_webpage_handle( - 'https://account.nicovideo.jp/login/redirector', None, - note='Logging into niconico', errnote='Unable to log into niconico', - data=urlencode_postdata(login_form_strs), - headers={ + niconico_login_url, None, note='Logging into niconico', errnote='Unable to log into niconico', + data=urlencode_postdata(login_form), expected_status=404, headers={ 'Referer': 'https://account.nicovideo.jp/login', 'Content-Type': 'application/x-www-form-urlencoded', }) if urlh.url.startswith('https://account.nicovideo.jp/login'): - self._ie.report_warning('Unable to log in: bad username or password') - return False - elif urlh.url.startswith('https://account.nicovideo.jp/mfa'): - post_url = self._ie._search_regex( - r']+action=(["\'])(?P.+?)\1', page, 'mfa post url', group='url') + raise ExtractorError('Unable to log in: bad username or password', expected=True) + + if urlh.url.startswith('https://account.nicovideo.jp/mfa'): + post_url = extract_attributes( + get_element_html_by_attribute('method', 'POST', page, tag='form'))['action'] page, urlh = self._ie._download_webpage_handle( urljoin('https://account.nicovideo.jp/', post_url), None, - note='Performing MFA', errnote='Unable to complete MFA', + note='Performing MFA', errnote='Unable to complete MFA', expected_status=404, data=urlencode_postdata({ 'otp': self._ie._get_tfa_info('6 digits code'), }), headers={ @@ -240,48 +165,41 @@ def _niconico_login(self, mail_tel, password): r'formError\b[^>]*>(.*?)', page, 'form_error', default='There\'s an error but the message can\'t be parsed.', flags=re.DOTALL) - self._ie.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') - return False - return True + raise ExtractorError(f'Unable to log in: MFA challenge failed, "{err_msg}"', expected=True) - def _auth0_login(self): - self._auth_info = {'login_method': 'password', 'auth_type': AuthType.OAUTH_AUTH0} - username, password = self._ie._get_login_info() - if not username: - return + return parse_qs(urlh.url)['code'][0] - cache_key = f'{self._ie._DOMAIN}:{username}' - cache_name = 'refresh' - self._auth_info = { - 'cache_key': cache_key, - 'cache_name': cache_name, + def _auth0_normal_login(self, username, password, login_url, redirect_url): + login_form = { + 'username': username, + 'password': password, + 'action': 'default', } + urlh = self._ie._request_webpage( + login_url, None, note='Logging into auth0', errnote='Unable to log into auth0', + data=urlencode_postdata(login_form), expected_status=(400, 404)) + if urlh.status == 400: + raise ExtractorError('Unable to log in: bad username or password', expected=True) + if not (urlh.status == 404 and urlh.url.startswith(redirect_url)): + raise ExtractorError('Unable to log in: unknown login status') + + return parse_qs(urlh.url)['code'][0] + + def _login(self): login_info = self._ie._call_api(f'fanclub_sites/{self._ie._FANCLUB_SITE_ID_AUTH}/login', None)['data']['fanclub_site'] self._ie.write_debug(f'login_info = {login_info}') auth0_web_client_id = login_info['auth0_web_client_id'] auth0_domain = login_info['fanclub_group']['auth0_domain'] token_url = f'https://{auth0_domain}/oauth/token' - redirect_url = f'https://{self._ie._DOMAIN}/login/login-redirect' + redirect_uri = f'https://{self._ie._DOMAIN}/login/login-redirect' auth0_client = base64.b64encode(json.dumps({ 'name': 'auth0-spa-js', 'version': '2.0.6', }).encode()).decode() - self._auth_info = { - 'refresh_func_params': { - 'url_or_request': token_url, - 'headers': {'Auth0-Client': auth0_client}, - 'data': { - 'client_id': auth0_web_client_id, - 'grant_type': 'refresh_token', - 'redirect_uri': redirect_url, - }, - }, - } - def random_str(): return ''.join(random.choices(string.digits + string.ascii_letters, k=43)) @@ -294,7 +212,7 @@ def random_str(): authorize_url = update_url_query(f'https://{auth0_domain}/authorize', { 'client_id': auth0_web_client_id, 'scope': 'openid profile email offline_access', - 'redirect_uri': redirect_url, + 'redirect_uri': redirect_uri, 'audience': f'api.{self._ie._DOMAIN}', 'prompt': 'login', 'response_type': 'code', @@ -305,37 +223,14 @@ def random_str(): 'code_challenge_method': 'S256', 'auth0Client': auth0_client, }) + login_url = f'https://{auth0_domain}/u/login?state=%s' % parse_qs(self._ie._request_webpage( + authorize_url, None, note='Getting state value', errnote='Unable to get state value').url)['state'][0] - if cached_refresh_token := traverse_obj(self._ie.cache.load( - self._ie._NETRC_MACHINE, cache_key), (cache_name, {str})): - self._auth_info = {'refresh_token': cached_refresh_token} - if self._refresh_token(): - self._ie.write_debug('cached tokens updated') - return - self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, None) - - login_form = self._ie._hidden_inputs(self._ie._download_webpage( - authorize_url, None, note='Getting login form', errnote='Unable to get login form')) - state_obtained = login_form['state'] - login_url = f'https://{auth0_domain}/u/login?state={state_obtained}' - - login_form.update({ - 'username': username, - 'password': password, - 'action': 'default', - }) - - urlh = self._ie._request_webpage( - login_url, None, note='Logging in', errnote='Unable to log in', - data=urlencode_postdata(login_form), expected_status=(400, 404)) - if urlh.status == 400: - self._ie.report_warning('Unable to log in: bad username or password') - return - if not (urlh.status == 404 and urlh.url.startswith(redirect_url)): - self._ie.report_warning('Unable to log in: Unknown login status') - return - - code = parse_qs(urlh.url)['code'][0] + username, password = self._login_info + if username.startswith('niconico:'): + code = self._auth0_niconico_login(username.removeprefix('niconico:'), password, login_url) + else: + code = self._auth0_normal_login(username, password, login_url, redirect_uri) token_json = self._ie._download_json( token_url, None, headers={'Auth0-Client': auth0_client}, @@ -345,28 +240,219 @@ def random_str(): 'code_verifier': code_verifier, 'grant_type': 'authorization_code', 'code': code, - 'redirect_uri': redirect_url, + 'redirect_uri': redirect_uri, })) - access_token = token_json['access_token'] - refresh_token = token_json['refresh_token'] - - auth_token = f'Bearer {access_token}' - - self._auth_info = { - 'auth_token': auth_token, - 'refresh_token': refresh_token, - } - - self._ie.cache.store(self._ie._NETRC_MACHINE, cache_key, {cache_name: refresh_token}) + self._auth_info = {'auth_token': f'Bearer {token_json["access_token"]}'} + if refresh_token := token_json.get('refresh_token'): + self._auth_info = { + 'refresh_token': refresh_token, + 'refresh_params': { + 'url_or_request': token_url, + 'headers': {'Auth0-Client': auth0_client}, + 'data': { + 'client_id': auth0_web_client_id, + 'redirect_uri': redirect_uri, + 'grant_type': 'refresh_token', + }, + }, + } class SheetaEmbedIE(InfoExtractor): - _NETRC_MACHINE = 'sheeta' IE_NAME = 'sheeta' IE_DESC = 'fan club system developed by DWANGO (ドワンゴ)' _VALID_URL = False _WEBPAGE_TESTS = [{ + 'url': 'https://nicochannel.jp/kaorin/video/sm89Hd4SEduy8WTsb4KxAhBL', + 'info_dict': { + 'id': 'sm89Hd4SEduy8WTsb4KxAhBL', + 'title': '前田佳織里の世界攻略計画 #2', + 'ext': 'mp4', + 'channel': '前田佳織里の世界攻略計画', + 'channel_id': 'nicochannel.jp/kaorin', + 'channel_url': 'https://nicochannel.jp/kaorin', + 'age_limit': 0, + 'live_status': 'not_live', + 'thumbnail': str, + 'description': 'md5:02573495c8be849c0cb88df6f1b85f8b', + 'timestamp': 1644546015, + 'duration': 4093, + 'comment_count': int, + 'view_count': int, + 'tags': ['前田攻略', '前田佳織里', '前田佳織里の世界攻略計画'], + 'upload_date': '20220211', + }, + 'params': { + 'skip_download': True, + }, + }, { + # age limited video; test purpose channel. + 'url': 'https://nicochannel.jp/testman/video/smJPZg3nwAxP8UECPsHDiCGM', + 'info_dict': { + 'id': 'smJPZg3nwAxP8UECPsHDiCGM', + 'title': 'DW_itaba_LSM検証_1080p60fps_9000Kbpsで打ち上げたときの挙動確認(パススルーあり)', + 'ext': 'mp4', + 'channel': '本番チャンネルプラステストマン', + 'channel_id': 'nicochannel.jp/testman', + 'channel_url': 'https://nicochannel.jp/testman', + 'age_limit': 18, + 'live_status': 'was_live', + 'thumbnail': str, + 'description': 'TEST', + 'timestamp': 1701329428, + 'duration': 229, + 'comment_count': int, + 'view_count': int, + 'tags': ['検証用'], + 'upload_date': '20231130', + 'release_timestamp': 1701328800, + 'release_date': '20231130', + }, + 'params': { + 'skip_download': True, + }, + }, { + # query: None + 'url': 'https://nicochannel.jp/testman/videos', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 18, + }, { + # query: None + 'url': 'https://nicochannel.jp/testtarou/videos', + 'info_dict': { + 'id': 'nicochannel.jp/testtarou/videos', + 'title': 'チャンネルプラステスト太郎-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 2, + }, { + # query: None + 'url': 'https://nicochannel.jp/testjirou/videos', + 'info_dict': { + 'id': 'nicochannel.jp/testjirou/videos', + 'title': 'チャンネルプラステスト"二郎21-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 12, + }, { + # query: tag + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 6, + }, { + # query: vodType + 'url': 'https://nicochannel.jp/testman/videos?vodType=1', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 18, + }, { + # query: sort + 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 6, + }, { + # query: tag, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 6, + }, { + # query: vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'nicochannel.jp/testman/videos', + 'title': '本番チャンネルプラステストマン-videos', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://nicochannel.jp/testman/lives', + 'info_dict': { + 'id': 'nicochannel.jp/testman/lives', + 'title': '本番チャンネルプラステストマン-lives', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 18, + }, { + 'url': 'https://nicochannel.jp/testtarou/lives', + 'info_dict': { + 'id': 'nicochannel.jp/testtarou/lives', + 'title': 'チャンネルプラステスト太郎-lives', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nicochannel.jp/testjirou/lives', + 'info_dict': { + 'id': 'nicochannel.jp/testjirou/lives', + 'title': 'チャンネルプラステスト"二郎21-lives', + 'age_limit': 0, + 'timestamp': 1737957232, + 'upload_date': '20250127', + }, + 'playlist_mincount': 6, + }, { 'url': 'https://qlover.jp/doku/video/smy4caVHR6trSddiG9uCDiy4', 'info_dict': { 'id': 'smy4caVHR6trSddiG9uCDiy4', @@ -436,14 +522,14 @@ class SheetaEmbedIE(InfoExtractor): }, 'params': {'skip_download': True}, }, { - 'url': 'https://11audee.jp/audio/smx3ebEZFRnHeaGzUzgi5A98', + 'url': 'https://audee-membership.jp/aisaka-yuuka/audio/smx3ebEZFRnHeaGzUzgi5A98', 'info_dict': { 'id': 'smx3ebEZFRnHeaGzUzgi5A98', 'title': '#相坂湯 第38回 ロコムジカちゃんの歌唱についてモノ申す!? ある意味レアな?鼻声坂くん!', 'ext': 'm4a', 'channel': '相坂優歌 湯上がり何飲む?', - 'channel_id': '11audee.jp', - 'channel_url': 'https://11audee.jp', + 'channel_id': 'audee-membership.jp/aisaka-yuuka', + 'channel_url': 'https://audee-membership.jp/aisaka-yuuka', 'age_limit': 0, 'live_status': 'not_live', 'thumbnail': str, @@ -462,8 +548,8 @@ class SheetaEmbedIE(InfoExtractor): 'id': 'hololive-fc.com/videos', 'title': '旧ホロライブ公式ファンクラブ-videos', 'age_limit': 0, - 'timestamp': 1715652389, - 'upload_date': '20240514', + 'timestamp': 1737957238, + 'upload_date': '20250127', }, 'playlist_mincount': 12, }, { @@ -472,18 +558,18 @@ class SheetaEmbedIE(InfoExtractor): 'id': 'tokinosora-fc.com/videos', 'title': 'ときのそらオフィシャルファンクラブ-videos', 'age_limit': 0, - 'timestamp': 1715652399, - 'upload_date': '20240514', + 'timestamp': 1737957234, + 'upload_date': '20250127', }, 'playlist_mincount': 18, }, { - 'url': 'https://01audee.jp/videos?tag=RADIO&vodType=1&sort=display_date', + 'url': 'https://audee-membership.jp/okuma-wakana/videos?tag=RADIO&vodType=1&sort=display_date', 'info_dict': { - 'id': '01audee.jp/videos', + 'id': 'audee-membership.jp/okuma-wakana/videos', 'title': '大熊和奏 朝のささやき-videos', 'age_limit': 0, - 'timestamp': 1715652369, - 'upload_date': '20240514', + 'timestamp': 1737957233, + 'upload_date': '20250127', }, 'playlist_mincount': 6, }, { @@ -492,18 +578,18 @@ class SheetaEmbedIE(InfoExtractor): 'id': 'qlover.jp/bokuao/lives', 'title': '僕が見たかった青空の 「青天のヘキレキ!」-lives', 'age_limit': 0, - 'timestamp': 1715652429, - 'upload_date': '20240514', + 'timestamp': 1737957231, + 'upload_date': '20250127', }, 'playlist_mincount': 1, }, { - 'url': 'https://06audee.jp/lives', + 'url': 'https://audee-membership.jp/tanaka-chiemi/lives', 'info_dict': { - 'id': '06audee.jp/lives', + 'id': 'audee-membership.jp/tanaka-chiemi/lives', 'title': '田中ちえ美のたなかのカナタ!-lives', 'age_limit': 0, - 'timestamp': 1715652369, - 'upload_date': '20240514', + 'timestamp': 1737957233, + 'upload_date': '20250127', }, 'playlist_mincount': 5, }] @@ -515,13 +601,15 @@ class SheetaEmbedIE(InfoExtractor): _FANCLUB_SITE_ID_INFO = None _LIST_PAGE_SIZE = 12 + _LOGIN_METHOD = 'password' - auth_manager = None + _auth0_client = None def _extract_from_url(self, url): + if not self._auth0_client: + self._auth0_client = SheetaAuth0Client(self) + parsed_url = urllib.parse.urlparse(url) - if not self.auth_manager: - self.auth_manager = AuthManager(self) if '/videos' in parsed_url.path: return self._extract_video_list_page(url) elif '/lives' in parsed_url.path: @@ -537,6 +625,31 @@ def _extract_from_webpage(self, url, webpage): def _call_api(self, path, item_id, *args, **kwargs): return self._download_json(f'{self._API_BASE_URL}/{path}', item_id, *args, **kwargs) + def _call_api_authed(self, path, item_id, **kwargs): + expected_code_msg = { + 401: 'Invalid token', + 403: 'Login required', + 404: 'Members-only content', + 408: 'Outdated token', + } + headers = filter_dict({ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'origin': f'https://{self._DOMAIN}', + 'Authorization': self._auth0_client.get_token(), + }) + + try: + return self._call_api(path, item_id, headers=headers, **kwargs) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or e.cause.status not in expected_code_msg: + raise e + self.raise_login_required('%s (%d)' % ( + expected_code_msg[e.cause.status], e.cause.status), metadata_available=True, method=self._LOGIN_METHOD) + if e.cause.status == 401: + self._auth0_client.clear_token() + return None + def _find_fanclub_site_id(self, channel_id): fanclub_list_json = self._call_api( 'content_providers/channel_domain', f'channels/{channel_id}', @@ -606,7 +719,6 @@ def _extract_player_page(self, url): )['data']['video_page'] live_status = self._get_live_status(data_json, content_code) - formats = self._get_formats(data_json, live_status, content_code) release_timestamp_str = data_json.get('live_scheduled_start_at') if live_status == 'is_upcoming': @@ -618,7 +730,7 @@ def _extract_player_page(self, url): return { 'id': content_code, - 'formats': formats, + 'formats': list(self._yield_formats(data_json, live_status, content_code)), 'live_status': live_status, 'release_timestamp': unified_timestamp(release_timestamp_str), **self._extract_channel_info(channel_id), @@ -707,41 +819,26 @@ def _get_live_status(self, data_json, content_code): self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') return live_status - def _get_formats(self, data_json, live_status, content_code): - headers = filter_dict({ - 'Content-Type': 'application/json', - 'fc_use_device': 'null', - 'origin': f'https://{self._DOMAIN}', - 'Authorization': self.auth_manager.get_auth_token(), - }) - - formats = [] + def _yield_formats(self, data_json, live_status, content_code): if data_json.get('video'): payload = {} if data_json.get('type') == 'live' and live_status == 'was_live': payload = {'broadcast_type': 'dvr'} - session_id = self.auth_manager.get_authed_info( + session_id = traverse_obj(self._call_api_authed( f'video_pages/{content_code}/session_ids', f'{content_code}/session', - ('data', 'session_id', {str}), { - 401: 'Members-only content', - 403: 'Login required', - 408: 'Outdated token', - }, data=json.dumps(payload).encode(), headers=headers, - note='Getting session id', errnote='Unable to get session id') + data=json.dumps(payload).encode(), note='Getting session id', errnote='Unable to get session id'), + ('data', 'session_id', {str})) if session_id: m3u8_url = data_json['video_stream']['authenticated_url'].format(session_id=session_id) - formats = self._extract_m3u8_formats(m3u8_url, content_code) - elif data_json.get('audio'): - m3u8_url = self.auth_manager.get_authed_info( + yield from self._extract_m3u8_formats(m3u8_url, content_code) + + if data_json.get('audio'): + m3u8_url = traverse_obj(self._call_api_authed( f'video_pages/{content_code}/content_access', f'{content_code}/content_access', - ('data', 'resource', {url_or_none}), { - 403: 'Login required', - 404: 'Members-only content', - 408: 'Outdated token', - }, headers=headers, note='Getting content resource', - errnote='Unable to get content resource') + note='Getting content resource', errnote='Unable to get content resource'), + ('data', 'resource', {url_or_none})) if m3u8_url: audio_type = traverse_obj(data_json, ( @@ -755,21 +852,16 @@ def _get_formats(self, data_json, live_status, content_code): else: msg += 'This audio may be completely blank' self.raise_login_required( - msg, metadata_available=True, method=self.auth_manager._auth_info.get('login_method')) + msg, metadata_available=True, method=self._LOGIN_METHOD) - formats = [{ + yield { 'url': m3u8_url, - 'format_id': 'audio', + 'format_id': audio_type, 'protocol': 'm3u8_native', 'ext': 'm4a', 'vcodec': 'none', 'acodec': 'aac', - 'format_note': audio_type, - }] - else: - raise ExtractorError('Unknown media type', video_id=content_code) - - return formats + } def _fetch_paged_channel_video_list(self, path, query, channel, item_id, page): response = self._call_api(