1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

[ie/gem.cbc.ca:playlist] Fix extractor

Authored by: bashonly
This commit is contained in:
bashonly 2025-02-18 20:15:32 -06:00
parent 8e9563651b
commit b0b31facd8
No known key found for this signature in database
GPG key ID: 783F096F253D15B0

View file

@ -518,7 +518,41 @@ def entries():
return self.playlist_result(entries(), playlist_id) return self.playlist_result(entries(), playlist_id)
class CBCGemIE(InfoExtractor): class CBCGemBaseIE(InfoExtractor):
_NETRC_MACHINE = 'cbcgem'
_GEO_COUNTRIES = ['CA']
def _call_show_api(self, item_id, display_id=None):
return self._download_json(
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
display_id or item_id, query={'device': 'web'})
def _extract_item_info(self, item_info):
episode_number = None
title = traverse_obj(item_info, ('title', {str}))
if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)):
episode_number = int_or_none(mobj.group('episode'))
title = mobj.group('title')
return {
'episode_number': episode_number,
**traverse_obj(item_info, {
'id': ('url', {str}),
'episode_id': ('url', {str}),
'description': ('description', {str}),
'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}),
'episode_number': ('episodeNumber', {int_or_none}),
'duration': ('metadata', 'duration', {int_or_none}),
'release_timestamp': ('metadata', 'airDate', {unified_timestamp}),
'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}),
'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}),
}),
'episode': title,
'title': title,
}
class CBCGemIE(CBCGemBaseIE):
IE_NAME = 'gem.cbc.ca' IE_NAME = 'gem.cbc.ca'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
_TESTS = [{ _TESTS = [{
@ -574,9 +608,7 @@ class CBCGemIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_GEO_COUNTRIES = ['CA']
_TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
_NETRC_MACHINE = 'cbcgem'
_claims_token = None _claims_token = None
def _new_claims_token(self, email, password): def _new_claims_token(self, email, password):
@ -639,9 +671,7 @@ def _real_initialize(self):
def _real_extract(self, url): def _real_extract(self, url):
video_id, season_number = self._match_valid_url(url).group('id', 'season') video_id, season_number = self._match_valid_url(url).group('id', 'season')
video_info = self._download_json( video_info = self._call_show_api(video_id)
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{video_id}',
video_id, query={'device': 'web'})
item_info = traverse_obj(video_info, ( item_info = traverse_obj(video_info, (
'content', ..., 'lineups', ..., 'items', 'content', ..., 'lineups', ..., 'items',
lambda _, v: v['url'] == video_id, any, {require('item info')})) lambda _, v: v['url'] == video_id, any, {require('item info')}))
@ -691,38 +721,21 @@ def _real_extract(self, url):
if 'descriptive' in fmt['format_id'].lower(): if 'descriptive' in fmt['format_id'].lower():
fmt['preference'] = -2 fmt['preference'] = -2
episode_number = None
title = traverse_obj(item_info, ('title', {str}))
if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)):
episode_number = int_or_none(mobj.group('episode'))
title = mobj.group('title')
return { return {
'season_number': int_or_none(season_number), 'season_number': int_or_none(season_number),
'episode_number': episode_number,
**traverse_obj(video_info, { **traverse_obj(video_info, {
'series': ('title', {str}), 'series': ('title', {str}),
'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}), 'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}),
'genres': ('structuredMetadata', 'genre', ..., {str}), 'genres': ('structuredMetadata', 'genre', ..., {str}),
}), }),
**traverse_obj(item_info, { **self._extract_item_info(item_info),
'description': ('description', {str}),
'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}),
'episode_number': ('episodeNumber', {int_or_none}),
'duration': ('metadata', 'duration', {int_or_none}),
'release_timestamp': ('metadata', 'airDate', {unified_timestamp}),
'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}),
'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}),
}),
'id': video_id, 'id': video_id,
'episode_id': video_id, 'episode_id': video_id,
'episode': title,
'title': title,
'formats': formats, 'formats': formats,
} }
class CBCGemPlaylistIE(InfoExtractor): class CBCGemPlaylistIE(CBCGemBaseIE):
IE_NAME = 'gem.cbc.ca:playlist' IE_NAME = 'gem.cbc.ca:playlist'
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
@ -732,70 +745,35 @@ class CBCGemPlaylistIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s06', 'id': 'schitts-creek/s06',
'title': 'Season 6', 'title': 'Season 6',
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season_number': 6, 'season_number': 6,
'season': 'Season 6', 'season': 'Season 6',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
}, },
}, { }, {
'url': 'https://gem.cbc.ca/schitts-creek/s06', 'url': 'https://gem.cbc.ca/schitts-creek/s06',
'only_matching': True, 'only_matching': True,
}] }]
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
def _entries(self, season_info):
for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])):
yield self.url_result(
f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE,
**self._extract_item_info(episode))
def _real_extract(self, url): def _real_extract(self, url):
match = self._match_valid_url(url) season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season')
season_id = match.group('id') show_info = self._call_show_api(show, display_id=season_id)
show = match.group('show') season_info = traverse_obj(show_info, (
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426) 'content', ..., 'lineups',
season = int(match.group('season')) lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')}))
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) return self.playlist_result(
self._entries(season_info), season_id,
if season_info is None: **traverse_obj(season_info, {
raise ExtractorError(f'Couldn\'t find season {season} of {show}') 'title': ('title', {str}),
'season': ('title', {str}),
episodes = [] 'season_number': ('seasonNumber', {int_or_none}),
for episode in season_info['assets']: }), series=traverse_obj(show_info, ('title', {str})))
episodes.append({
'_type': 'url_transparent',
'ie_key': 'CBCGem',
'url': 'https://gem.cbc.ca/media/' + episode['id'],
'id': episode['id'],
'title': episode.get('title'),
'description': episode.get('description'),
'thumbnail': episode.get('image'),
'series': episode.get('series'),
'season_number': episode.get('season'),
'season': season_info['title'],
'season_id': season_info.get('id'),
'episode_number': episode.get('episode'),
'episode': episode.get('title'),
'episode_id': episode['id'],
'duration': episode.get('duration'),
'categories': [episode.get('category')],
})
thumbnail = None
tn_uri = season_info.get('image')
# the-national was observed to use a "data:image/png;base64"
# URI for their 'image' value. The image was 1x1, and is
# probably just a placeholder, so it is ignored.
if tn_uri is not None and not tn_uri.startswith('data:'):
thumbnail = tn_uri
return {
'_type': 'playlist',
'entries': episodes,
'id': season_id,
'title': season_info['title'],
'description': season_info.get('description'),
'thumbnail': thumbnail,
'series': show_info.get('title'),
'season_number': season_info.get('season'),
'season': season_info['title'],
}
class CBCGemLiveIE(InfoExtractor): class CBCGemLiveIE(InfoExtractor):