import calendar import itertools import time import urllib.parse from .wrestleuniverse import WrestleUniverseBaseIE from ..utils import ( ExtractorError, float_or_none, merge_dicts, parse_iso8601, parse_qs, str_or_none, update_url, url_or_none, ) from ..utils.traversal import traverse_obj class VoicyBaseIE(WrestleUniverseBaseIE): _LOGIN_HEADERS = { 'Content-Type': 'application/json', 'X-Client-Version': 'Chrome/JsCore/10.13.2/FirebaseCore-web', 'X-Firebase-Gmpid': '1:212371279501:web:318567ddcbb953adcc5cc4', } _LOGIN_HINT = ( 'Use --username refresh --password , --username and --password, ' '--netrc-cmd, or --netrc (voicy) to provide account credentials') _LOGIN_QUERY = {'key': 'AIzaSyC5Rg-sxiYu6ySD8V-f6Eljwll8gHvgUK4'} _NETRC_MACHINE = 'voicy' @WrestleUniverseBaseIE._TOKEN.getter def _TOKEN(self): if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()): if not self._REFRESH_TOKEN: self.raise_login_required( f'No refreshToken provided. {self._LOGIN_HINT}', method=None) self._refresh_token() return self._REAL_TOKEN def _perform_login(self, username, password): if username.lower() == 'refresh': self._REFRESH_TOKEN = password return self._refresh_token() return super()._perform_login(username, password) def _call_api(self, path, some_id, note='Downloading JSON metadata', headers=None, query=None, fatal=True): return self._download_json( f'https://vmedia-player-api.voicy.jp/v1/{path}', some_id, note=note, headers={ 'Authorization': f'Bearer {self._TOKEN}', } | (headers or {}), query=query, fatal=fatal, ) class VoicyIE(VoicyBaseIE): IE_NAME = 'voicy' IE_DESC = 'Voicy' _VALID_URL = [ r'https?://(?:www\.)?voicy\.jp/channel/(?P\d+)/(?P\d+)', r'http://r\.voicy\.jp/\w+', ] _TESTS = [{ 'url': 'https://voicy.jp/channel/3402/6361249', 'info_dict': { 'id': '8576738', 'ext': 'm4a', 'title': '2025.1.19「ブラジル沖の白石康次郎さん&息子の成人式を『1年』間違えたお母さん」', 'categories': ['トーク', '声優・アナウンサー'], 'channel': '安住紳一郎の日曜天国', 'channel_id': '3402', 'comment_count': int, 'description': 'md5:f39bb238ff7661c3b7e8934f8578cf33', 'display_id': '6361249', 'duration': 1588.741, 'like_count': int, 'release_date': '20250119', 'release_timestamp': 1737272164, 'series': '2025.1.19「ブラジル沖の白石康次郎さん&息子の成人式を『1年』間違えたお母さん」', 'series_id': '6361249', 'thumbnail': r're:https://files\.voicy\.jp/img/speaker/.+$', 'uploader': 'TBS RADIO', 'uploader_id': '17328', 'view_count': int, }, }, { 'url': 'https://voicy.jp/channel/3272/1141448', 'info_dict': { 'id': '2757390', 'ext': 'mp3', 'title': '5/3 お久しぶり雑談回', 'categories': ['トーク', '声優・アナウンサー'], 'channel': '松嵜麗のボイログ!', 'channel_id': '3272', 'comment_count': int, 'description': 'md5:4dee911d23cf1eedeb49687881878119', 'display_id': '1141448', 'duration': 433.24, 'like_count': int, 'release_date': '20240502', 'release_timestamp': 1714662728, 'series': '最近のわたし', 'series_id': '1141448', 'tags': ['最近のマイブーム'], 'thumbnail': r're:https://files\.voicy\.jp/img/speaker/.+$', 'uploader': '声優・松嵜麗', 'uploader_id': '16462', 'view_count': int, }, }, { 'url': 'https://voicy.jp/channel/1417/6436213', 'info_dict': { 'id': '6436213', 'title': '第100回 グリム兄弟「麦のほ」', }, 'playlist_count': 5, 'skip': 'Only available for premium supporters', }, { 'url': 'http://r.voicy.jp/7Qm2JbexmY6', 'only_matching': True, }] def _real_extract(self, url): if not url.startswith('https'): return self.url_result(update_url(url, scheme='https')) channel_id, audio_id = self._match_valid_url(url).groups() channel_info = { 'channel_id': channel_id, **traverse_obj(self._call_api(f'channel/{channel_id}', channel_id), { 'categories': ('category', ('name', ('subcategory', 'name'), {str})), 'channel': ('name', {str}), 'thumbnail': ('image', {url_or_none}), 'uploader': ('personality', 'name', {str}), 'uploader_id': ('personality', 'id', {str_or_none}), }), } audio_info = self._call_api(f'channels/{channel_id}/stories/{audio_id}', audio_id) common_info = { 'description': self._call_api( f'channels/{channel_id}/stories/{audio_id}/description', audio_id, fatal=False, ).get('description'), 'display_id': audio_id, 'series': audio_info['name'], 'series_id': audio_id, **traverse_obj(audio_info, { 'comment_count': ('comment_count', {int}), 'like_count': ('like_count', {int}), 'release_timestamp': ('published', {parse_iso8601}), 'tags': ('hashtags', ..., 'name', {str}), }), } all_entries = traverse_obj(audio_info, ('chapters', ..., { 'id': ('id', {str_or_none}), 'title': ('name', {str}), 'duration': ('voice', 'duration', {float_or_none(scale=1000)}), 'manifest': ('voice', 'file', {url_or_none}), 'view_count': ('play_count', {int}), })) entries = [] for entry in all_entries: if manifest := entry.pop('manifest', None): ext = 'm4a' if 'audio_hls_aac' in manifest else 'mp3' merged = merge_dicts(entry, channel_info, common_info, { 'formats': self._extract_m3u8_formats(manifest, audio_id, ext), }) if len(all_entries) == 1: return merged entries.append(merged) if not entries: self.raise_login_required( f'Premium(VIP) authentication required. {self._LOGIN_HINT}', method=None) return self.playlist_result(entries, audio_id, audio_info['name']) class VoicyLiveIE(VoicyBaseIE): IE_NAME = 'voicy:live' _VALID_URL = r'https?://(?:www\.)?voicy\.jp/channel/\d+/live/(?P\d+)' _TESTS = [{ 'url': 'https://voicy.jp/channel/1417/live/4858078', 'only_matching': True, }] def _real_extract(self, url): audio_id = self._match_id(url) live_info = self._call_api( f'live/{audio_id}', audio_id, headers={'X-Platform': '3'}) if live_info['status'] != 'ended': raise ExtractorError('WebRTC is not currently supported', expected=True) if share_url := traverse_obj(live_info, ('archive', 'share_url', {url_or_none})): return self.url_result(share_url, VoicyIE) self.raise_no_formats( 'This livestream has ended and no archive is available', expected=True) class VoicyPlaylistBaseIE(VoicyBaseIE): def _entries(self, path, some_id, query, keys, ie=VoicyIE): pagination = '' for page in itertools.count(1): info = self._call_api( path, some_id, f'Downloading page {page}', query={ 'page_size': '100', 'page_token': pagination, } | (query or {}), ) yield from (self.url_result(s, ie) for s in traverse_obj(info, (*keys, 'share_url', {url_or_none}))) if not (pagination := traverse_obj(info, ('pagination', 'next_page_token', {str}))): break self._sleep(1, some_id) class VoicyChannelIE(VoicyPlaylistBaseIE): IE_NAME = 'voicy:channel' _VALID_URL = r'https?://(?:www\.)?voicy\.jp/channel/(?P\d+)(?:/(?Pall|backnumber/\d+|premium))?(?:\?|$)' _TESTS = [{ 'url': 'https://voicy.jp/channel/3402', 'info_dict': { 'id': '3402', 'title': '安住紳一郎の日曜天国', }, 'playlist_mincount': 107, }, { 'url': 'https://voicy.jp/channel/1/premium', 'info_dict': { 'id': '1', 'title': 'Voicy社長の頭の中', }, 'playlist_mincount': 145, }, { 'url': 'https://voicy.jp/channel/2856/all', 'info_dict': { 'id': '2856', 'title': 'そんなこんなで、茅原実里です', }, 'playlist_mincount': 62, }, { 'url': 'https://voicy.jp/channel/3321/all?type=all&month=202412', 'info_dict': { 'id': '3321', 'title': '海外安全チャンネル・りょーあん', }, 'playlist_count': 4, }, { 'url': 'https://voicy.jp/channel/1417/backnumber/202501', 'info_dict': { 'id': '1417', 'title': '繪ほんの中には 公式チャンネル', }, 'playlist_count': 5, }] def _real_extract(self, url): channel_id, _type = self._match_valid_url(url).groups() channel_info = self._call_api(f'channel/{channel_id}', channel_id) query = { 'filter_type' if k == 'type' else k: v[0] for k, v in parse_qs(url).items() if v } | {'channel_view_id': channel_id, 'order': 'new'} if _type == 'premium': query['filter_type'] = 'premium' elif (ym := query.pop('month', None) or (_type not in ('all', None) and _type.split('/')[-1])): y, m = map(int, (ym[:4], ym[4:])) d = calendar.monthrange(y, m)[1] query.update({ 'from': f'{y}-{m:02d}-01T00:00:00+09:00', 'to': f'{y}-{m:02d}-{d}T23:59:59+09:00', }) return self.playlist_result(self._entries( 'stories', channel_id, query, ('stories', ...), ), channel_id, channel_info['name']) class VoicyTopicIE(VoicyPlaylistBaseIE): IE_NAME = 'voicy:topic' _VALID_URL = r'https?://(?:www\.)?voicy\.jp/(?P{})(?:/(?P[\w%]+))?(?:/(?P[\w-]+))?'.format( '|'.join(('audiobook', 'category', 'follow(?:ing-paystory)?', 'hashtag', 'paystory', 'pickup', 'search', 'voicedrama'))) _TESTS = [{ 'url': 'https://voicy.jp/audiobook', 'info_dict': { 'id': '111', 'title': 'audiobook', }, 'playlist_mincount': 96, }, { 'url': 'https://voicy.jp/category/talk/voiceactor-announcer', 'info_dict': { 'id': 'voiceactor-announcer', 'title': '声優・アナウンサー', }, 'playlist_mincount': 70, }, { 'url': 'https://voicy.jp/category/sports/all', 'info_dict': { 'id': 'sports', 'title': 'スポーツ', }, 'playlist_mincount': 174, }, { 'url': 'https://voicy.jp/hashtag/%E3%82%B9%E3%83%9E%E3%83%BC%E3%83%88%E5%AE%B6%E9%9B%BB', 'info_dict': { 'id': 'hashtag', 'title': 'スマート家電', }, 'playlist_mincount': 14, }, { 'url': 'https://voicy.jp/search/%E6%81%B5%E6%96%B9%E5%B7%BB%E3%81%8D', 'info_dict': { 'id': 'search', 'title': '恵方巻き', }, 'playlist_mincount': 102, }, { 'url': 'https://voicy.jp/follow', 'only_matching': True, }] def _real_extract(self, url): topic = self._match_id(url) topic_id, is_story, ie = { 'audiobook': ('111', False, VoicyChannelIE), 'category': ('category', False, VoicyChannelIE), 'follow': ('1', True, VoicyIE), 'following-paystory': ('94', True, VoicyIE), 'hashtag': ('hashtag', True, VoicyIE), 'paystory': ('70', True, VoicyIE), 'pickup': ('21', False, VoicyChannelIE), 'search': ('search', True, VoicyIE), 'voicedrama': ('93', False, VoicyChannelIE), }[topic] keys = ('channels', ..., *('story',) * is_story) if topic == 'category': ctg, sub_ctg = self._match_valid_url(url).group('ctg_or_kwd', 'sub_ctg') category = topic_id = sub_ctg if (has_sub := sub_ctg != 'all') else ctg category_id, topic = traverse_obj(self._call_api('channel/categories', None), ( *((..., 'subcategories') if has_sub else ()), lambda _, v: v['view_id'] == category, ('id', 'name'), {str_or_none}, )) path = f'channel/categories/{category_id}' query = {'exclude_story': 'true'} elif topic in ('hashtag', 'search'): keyword = self._match_valid_url(urllib.parse.unquote(url)).group('ctg_or_kwd') if not keyword: raise ExtractorError('Invalid URL', expected=True) path = 'search/channels/story' query = { 'search_type': { 'hashtag': 'hashtag', 'search': 'words', }[topic], 'words': keyword, } topic = keyword elif topic_id == '1': path = 'user/me/channels/story/following' query = {'series_filter': '2'} else: path = f'topics/channels{"/story" * is_story}' query = {'topic_id': topic_id} keys = (..., *keys) return self.playlist_result(self._entries(path, topic_id, query, keys, ie), topic_id, topic)