From 370347c3f895b3f4e0f1f1a7dce1dfb6eed5c58d Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Fri, 27 Dec 2024 21:52:35 +0800 Subject: [PATCH] Deprioritize TranscriptUrl --- yt_dlp/extractor/mediasite.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 8b7040c2b..09f957394 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -7,6 +7,7 @@ ExtractorError, determine_ext, float_or_none, + join_nonempty, mimetype2ext, smuggle_url, str_or_none, @@ -269,11 +270,11 @@ def _real_extract(self, url): formats.extend(stream_formats) # XXX: Presentation['Presenters'] - transcripts = presentation.get('Transcripts', {}) + transcripts = presentation.get('Transcripts', []) captions, subtitles = {}, {} for transcript in transcripts: lang_code = traverse_obj( - transcript, (('DetailedLanguageCode', 'LanguageCode'), {str}), get_all=False) + transcript, (('DetailedLanguageCode', 'LanguageCode'), {str}), get_all=False) or 'und' lang_name = transcript.get('Language') t = { 'url': transcript.get('CaptionsUrl'), @@ -283,15 +284,19 @@ def _real_extract(self, url): captions.setdefault(lang_code, []).append(t) else: subtitles.setdefault(lang_code, []).append(t) - if transcript_url := presentation.get('TranscriptUrl'): + if transcript_url := url_or_none(presentation.get('TranscriptUrl')): + if 'playbackTicket=' not in transcript_url: + transcript_url = join_nonempty( + transcript_url, traverse_obj(presentation, ('Streams', 0, 'SlidePlaybackTicketId', {str})), + delim='?playbackTicket=') if determine_ext(transcript_url) != 'txt': if len(transcripts) == 1: - (captions or subtitles).setdefault(lang_code, []).append({ + (captions or subtitles)[lang_code].insert(0, { 'url': transcript_url, 'name': lang_name, }) else: - subtitles.setdefault('und', []).append({'url': transcript_url}) + subtitles.setdefault('und', []).insert(0, {'url': transcript_url}) return { 'id': resource_id,