From 156c80bf0a22ba1241c8d46ae9c0e98654004439 Mon Sep 17 00:00:00 2001 From: flanter21 Date: Thu, 13 Feb 2025 18:43:21 +0000 Subject: [PATCH 1/4] [ie/BlackboardCollaborate] Add support for subtitles, live chat, filesize and videos behind a login wall --- yt_dlp/extractor/blackboardcollaborate.py | 75 +++++++++++++++++++---- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979..a8832b288 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ +import base64 +import json + from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + mimetype2ext, + parse_iso8601, +) +from ..utils.traversal import traverse_obj + +'''APIs references - Blackboard Learn: https://developer.blackboard.com/portal/displayApi + - Blackboard Collaborate: https://github.com/blackboard/BBDN-Collab-Postman-REST''' class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/\?]+) + \??(authToken=(?P[\w\.\-]+))?''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -43,21 +54,63 @@ class BlackboardCollaborateIE(InfoExtractor): ] def _real_extract(self, url): + # Prepare for requests mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = mobj.group('token') + + headers = {'Authorization': f'Bearer {token}'} + base_url = f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}' + + # Try request the way the player handles it when behind a login + if video_info := self._download_json(f'{base_url}/data/secure', video_id, 'Trying auth token', + headers=headers, fatal=False): + video_extra = self._download_json(f'{base_url}', video_id, 'Retrieving extra attributes', + headers=headers, fatal=False) + + # Blackboard will allow redownloading from the same IP without authentication for a while, so if previous method fails, try this + else: + video_info = self._download_json(f'{base_url}/data', video_id, 'Trying fallback') + video_extra = 0 + + # Get metadata + duration = video_info.get('duration') / 1000 + title = video_info.get('name') + upload_date = video_info.get('created') + + # Get streams + stream_formats = [] + streams = video_info.get('extStreams') # Can also use video_info.get('streams') but I don't know its structure + + for current_stream in streams: + stream_formats.append({ + 'url': current_stream['streamUrl'], + 'container': mimetype2ext(current_stream.get('contentType')), + 'filesize': video_extra.get('storageSize', None), + 'aspect_ratio': video_info.get('aspectRatio', ''), + }) + + # Get subtitles + subtitles = {} + subs = video_info.get('subtitles') + for current_subs in subs: + lang_code = current_subs.get('lang') + subtitles.setdefault(lang_code, []).append({ + 'name': current_subs.get('label'), + 'url': current_subs['url'], + }) + + # Get chat + chats = video_info.get('chats') + for current_chat in chats: + subtitles.setdefault('live_chat', []).append({'url': current_chat['url']}) return { 'duration': duration, - 'formats': formats, + 'formats': stream_formats, 'id': video_id, 'timestamp': parse_iso8601(upload_date), + 'subtitles': subtitles, 'title': title, } From 1e93e34189408e4d04e59df7ffb19f66aab0156a Mon Sep 17 00:00:00 2001 From: flanter21 Date: Thu, 13 Feb 2025 18:44:11 +0000 Subject: [PATCH 2/4] [ie/BlackboardCollaborate] Add new tests --- yt_dlp/extractor/blackboardcollaborate.py | 50 ++++++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index a8832b288..4ebc179bd 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -30,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor): 'id': '0a633b6a88824deb8c918f470b22b256', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'ext': 'mp4', - 'duration': 1896000, - 'timestamp': 1620331399, + 'duration': 1896, + 'timestamp': 1620333295, 'upload_date': '20210506', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600', + 'md5': '108db6a8f83dcb0c2a07793649581865', + 'info_dict': { + 'id': '4bde2dee104f40289a10f8e554270600', + 'title': 'Meeting - Azerbaycanca erize formasi', + 'ext': 'mp4', + 'duration': 880, + 'timestamp': 1671176868, + 'upload_date': '20221216', + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6', + 'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5', + 'info_dict': { + 'id': 'f83be390ecff46c0bf7dccb9dddcf5f6', + 'title': 'Keynote lecture by Laura Carvalho - recording_1', + 'ext': 'mp4', + 'duration': 5506, + 'timestamp': 1662721705, + 'upload_date': '20220909', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496', + 'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f', + 'info_dict': { + 'id': 'c3e1e7c9e83d4cd9981c93c74888d496', + 'title': 'International Ally User Group - recording_18', + 'ext': 'mp4', + 'duration': 3479, + 'timestamp': 1721919621, + 'upload_date': '20240725', + 'subtitles': { + 'en': 'mincount:1', + 'live_chat': 'mincount:1', + }, }, }, { From 7196111afc7e29a3f4ccfac9c54a1d40e172a3da Mon Sep 17 00:00:00 2001 From: flanter21 Date: Sun, 23 Feb 2025 22:40:40 +0000 Subject: [PATCH 3/4] [ie/BlackboardCollaborate] Add support for bbcollab.com/launch URL format --- yt_dlp/extractor/_extractors.py | 5 ++++- yt_dlp/extractor/blackboardcollaborate.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 403e1f1f6..2ca3a5afe 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -275,7 +275,10 @@ BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 4ebc179bd..97fe055de 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -160,3 +160,22 @@ def _real_extract(self, url): 'subtitles': subtitles, 'title': title, } + + +class BlackboardCollaborateLaunchIE(InfoExtractor): + _VALID_URL = r'https?://[a-z]+(?:-lti)?\.bbcollab\.com/launch/(?P[\w\.\-]+)' + + _TESTS = [ + { + 'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + token = self._match_valid_url(url)['token'] + video_id = traverse_obj(json.loads(base64.b64decode(token.split('.')[1] + '===')), ('resourceAccessTicket', 'resourceId')) + + redirect_url = self._request_webpage(url, video_id=video_id).url + return self.url_result(redirect_url, + ie=BlackboardCollaborateIE.ie_key(), video_id=video_id) From e34ce312bd421c466ff501e4b2fa1f8b3d73f852 Mon Sep 17 00:00:00 2001 From: flanter21 Date: Mon, 3 Mar 2025 17:01:17 +0000 Subject: [PATCH 4/4] [ie/BlackboardCollaborate] Implement code review suggestions --- yt_dlp/extractor/blackboardcollaborate.py | 66 ++++++++++------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 97fe055de..eeea9ae26 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -3,8 +3,12 @@ from .common import InfoExtractor from ..utils import ( + int_or_none, mimetype2ext, parse_iso8601, + parse_qs, + str_or_none, + url_or_none, ) from ..utils.traversal import traverse_obj @@ -99,62 +103,50 @@ class BlackboardCollaborateIE(InfoExtractor): }, ] + def _call_api(self, region, video_id, api_call='', token=None, note='Downloading JSON metadata', fatal=False): + return self._download_json(f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/{api_call}', + video_id, note=note, + headers={'Authorization': f'Bearer {token}'} if token else '', fatal=fatal) + def _real_extract(self, url): - # Prepare for requests mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - token = mobj.group('token') + token = mobj.group('token') or parse_qs(url).get('authToken') - headers = {'Authorization': f'Bearer {token}'} - base_url = f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}' - - # Try request the way the player handles it when behind a login - if video_info := self._download_json(f'{base_url}/data/secure', video_id, 'Trying auth token', - headers=headers, fatal=False): - video_extra = self._download_json(f'{base_url}', video_id, 'Retrieving extra attributes', - headers=headers, fatal=False) - - # Blackboard will allow redownloading from the same IP without authentication for a while, so if previous method fails, try this + if video_info := self._call_api(region, video_id, 'data/secure', token, 'Trying auth token'): + video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes') else: - video_info = self._download_json(f'{base_url}/data', video_id, 'Trying fallback') - video_extra = 0 + video_info = self._call_api(region, video_id, 'data', note='Trying fallback', fatal=True) + video_extra = {} - # Get metadata - duration = video_info.get('duration') / 1000 + duration = int_or_none(video_info.get('duration'), 1000) title = video_info.get('name') upload_date = video_info.get('created') - # Get streams - stream_formats = [] - streams = video_info.get('extStreams') # Can also use video_info.get('streams') but I don't know its structure + formats = traverse_obj(video_info, ('extStreams', ..., { + 'url': ('streamUrl', {url_or_none}), + 'container': ('contentType', {mimetype2ext}), + 'aspect_ratio': ('aspectRatio'), + })) - for current_stream in streams: - stream_formats.append({ - 'url': current_stream['streamUrl'], - 'container': mimetype2ext(current_stream.get('contentType')), - 'filesize': video_extra.get('storageSize', None), - 'aspect_ratio': video_info.get('aspectRatio', ''), - }) + for cur_format in formats: + cur_format['filesize'] = int_or_none(video_extra.get('storageSize')) - # Get subtitles subtitles = {} - subs = video_info.get('subtitles') - for current_subs in subs: + for current_subs in video_info.get('subtitles'): lang_code = current_subs.get('lang') subtitles.setdefault(lang_code, []).append({ - 'name': current_subs.get('label'), - 'url': current_subs['url'], + 'name': str_or_none(current_subs.get('label')), + 'url': url_or_none(current_subs['url']), }) - # Get chat - chats = video_info.get('chats') - for current_chat in chats: - subtitles.setdefault('live_chat', []).append({'url': current_chat['url']}) + for current_chat in video_info.get('chats'): + subtitles.setdefault('live_chat', []).append({'url': url_or_none(current_chat['url'])}) return { 'duration': duration, - 'formats': stream_formats, + 'formats': formats, 'id': video_id, 'timestamp': parse_iso8601(upload_date), 'subtitles': subtitles, @@ -163,7 +155,7 @@ def _real_extract(self, url): class BlackboardCollaborateLaunchIE(InfoExtractor): - _VALID_URL = r'https?://[a-z]+(?:-lti)?\.bbcollab\.com/launch/(?P[\w\.\-]+)' + _VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P[\w\.\-]+)' _TESTS = [ {