From c4d187c7a335de1276be026959973ea1f7b92549 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 28 Dec 2023 23:40:43 +0530 Subject: [PATCH 01/24] Embed thumbnail using mutagen for mp3 files, fix lyrics issue #5635 --- yt_dlp/postprocessor/embedthumbnail.py | 17 +++++++------ yt_dlp/postprocessor/ffmpeg.py | 33 ++++++++++++++++++++------ 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d7be0b398..a3605e9ab 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,12 +90,15 @@ def run(self, info): success = True if info['ext'] == 'mp3': - options = [ - '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] - - self._report_run('ffmpeg', filename) - self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) + # Using ffmpeg to embed the thumbnail in mp3 files is messing up lyrics + # Using using mutagen instead + audio = mutagen.id3.ID3(filename) + if 'APIC' in audio:del audio['APIC'] + with open(thumbnail_filename, 'rb') as thumbfile: + audio['APIC'] = mutagen.id3.APIC( + encoding=3, mime='image/%s' % thumbnail_ext, type=3, + desc=u'Cover (front)', data=thumbfile.read()) + audio.save() elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) @@ -215,7 +218,7 @@ def run(self, info): else: raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov') - if success and temp_filename != filename: + if info['ext']!='mp3' and success and temp_filename != filename: os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7c904417b..c7cb69166 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -2,6 +2,7 @@ import contextvars import itertools import json +import mutagen import os import re import subprocess @@ -58,8 +59,6 @@ 'alac': ('m4a', None, ('-acodec', 'alac')), 'wav': ('wav', None, ('-f', 'wav')), } - - def create_mapping_re(supported): return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported))) @@ -586,6 +585,7 @@ def _options(target_ext): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + AUDIO_EXTS = ('mp3','m4a','flac','opus','acc') SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') def __init__(self, downloader=None, already_have_subtitle=False): @@ -594,8 +594,8 @@ def __init__(self, downloader=None, already_have_subtitle=False): @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in self.SUPPORTED_EXTS: - self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') + if info['ext'] not in self.SUPPORTED_EXTS + self.AUDIO_EXTS: + self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS+self.AUDIO_EXTS)} files') return [], info subtitles = info.get('requested_subtitles') if not subtitles: @@ -637,7 +637,6 @@ def run(self, info): if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': mp4_ass_warn = True self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') - if not sub_langs: return [], info @@ -659,12 +658,32 @@ def run(self, info): temp_filename = prepend_extension(filename, 'temp') self.to_screen('Embedding subtitles in "%s"' % filename) - self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) - os.replace(temp_filename, filename) + if info['ext'] in self.AUDIO_EXTS: + self.embed_lyrics(input_files) + else: + self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) + os.replace(temp_filename, filename) files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, info + def embed_lyrics(self, input_files): + audio_file = input_files[0] + subs = input_files[1] + if not subs.endswith('.lrc'): + self.report_error('LRC subtitles required. Use "--convert-subs lrc" to convert') + else: + with open(subs, 'r', encoding='utf-8') as f: + lyrics=f.read().strip() + if audio_file.endswith('.mp3'): + audio = mutagen.id3.ID3(audio_file) + audio.add(mutagen.id3.USLT(encoding=3, lang='eng', desc='', text=lyrics)) + audio.save() + else: + metadata = mutagen.File(audio_file) + metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = [lyrics] + metadata.save() + class FFmpegMetadataPP(FFmpegPostProcessor): From e564c56eac868b0d19372d2d62ace86885de1ab6 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 00:17:44 +0530 Subject: [PATCH 02/24] changed report_error to PostProcessingError --- yt_dlp/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index c7cb69166..a76f2828f 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -585,7 +585,7 @@ def _options(target_ext): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): - AUDIO_EXTS = ('mp3','m4a','flac','opus','acc') + AUDIO_EXTS = ('mp3','m4a','flac','opus') SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') def __init__(self, downloader=None, already_have_subtitle=False): @@ -671,7 +671,7 @@ def embed_lyrics(self, input_files): audio_file = input_files[0] subs = input_files[1] if not subs.endswith('.lrc'): - self.report_error('LRC subtitles required. Use "--convert-subs lrc" to convert') + raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') else: with open(subs, 'r', encoding='utf-8') as f: lyrics=f.read().strip() From ebed5745e837752fe3247b0e6b017a49e34acf68 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 00:38:17 +0530 Subject: [PATCH 03/24] Import mutagen from dependencies module --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index a76f2828f..05ce9f563 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -2,7 +2,6 @@ import contextvars import itertools import json -import mutagen import os import re import subprocess @@ -10,6 +9,7 @@ from .common import PostProcessor from ..compat import functools, imghdr +from ..dependencies import mutagen from ..utils import ( MEDIA_EXTENSIONS, ISO639Utils, From 8297e51b34cebe71244cc27fd9dad4a82fffa1ab Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 02:05:07 +0530 Subject: [PATCH 04/24] trying to correct formatting --- yt_dlp/postprocessor/embedthumbnail.py | 5 +++-- yt_dlp/postprocessor/ffmpeg.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index a3605e9ab..48c8abfed 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -93,7 +93,8 @@ def run(self, info): # Using ffmpeg to embed the thumbnail in mp3 files is messing up lyrics # Using using mutagen instead audio = mutagen.id3.ID3(filename) - if 'APIC' in audio:del audio['APIC'] + if 'APIC' in audio: + del audio['APIC'] with open(thumbnail_filename, 'rb') as thumbfile: audio['APIC'] = mutagen.id3.APIC( encoding=3, mime='image/%s' % thumbnail_ext, type=3, @@ -218,7 +219,7 @@ def run(self, info): else: raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov') - if info['ext']!='mp3' and success and temp_filename != filename: + if info['ext'] != 'mp3' and success and temp_filename != filename: os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 05ce9f563..a88009a51 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -59,6 +59,8 @@ 'alac': ('m4a', None, ('-acodec', 'alac')), 'wav': ('wav', None, ('-f', 'wav')), } + + def create_mapping_re(supported): return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported))) @@ -585,7 +587,7 @@ def _options(target_ext): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): - AUDIO_EXTS = ('mp3','m4a','flac','opus') + AUDIO_EXTS = ('mp3', 'm4a', 'flac', 'opus') SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') def __init__(self, downloader=None, already_have_subtitle=False): @@ -674,7 +676,7 @@ def embed_lyrics(self, input_files): raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') else: with open(subs, 'r', encoding='utf-8') as f: - lyrics=f.read().strip() + lyrics = f.read().strip() if audio_file.endswith('.mp3'): audio = mutagen.id3.ID3(audio_file) audio.add(mutagen.id3.USLT(encoding=3, lang='eng', desc='', text=lyrics)) From 54605db1faf179cb6c2089b3601a3eb992807e95 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 13:56:17 +0530 Subject: [PATCH 05/24] Refactor thumbnail embedding logic in embedthumbnail.py --- yt_dlp/postprocessor/embedthumbnail.py | 37 ++++++++++++++++++-------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 48c8abfed..b0b613f25 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,16 +90,31 @@ def run(self, info): success = True if info['ext'] == 'mp3': - # Using ffmpeg to embed the thumbnail in mp3 files is messing up lyrics - # Using using mutagen instead - audio = mutagen.id3.ID3(filename) - if 'APIC' in audio: - del audio['APIC'] - with open(thumbnail_filename, 'rb') as thumbfile: - audio['APIC'] = mutagen.id3.APIC( - encoding=3, mime='image/%s' % thumbnail_ext, type=3, - desc=u'Cover (front)', data=thumbfile.read()) - audio.save() + # Method 1: Use mutagen + # Prioritize mutagen over ffmpeg since ffmpeg messes up the lyrics data + if mutagen: + try: + self._report_run('mutagen', filename) + audio = mutagen.id3.ID3(filename) + if 'APIC' in audio: + del audio['APIC'] + with open(thumbnail_filename, 'rb') as thumbfile: + audio['APIC'] = mutagen.id3.APIC( + encoding=3, mime='image/%s' % thumbnail_ext, type=3, + desc=u'Cover (front)', data=thumbfile.read()) + audio.save() + temp_filename = filename # Mutagen saves to the original file + except Exception as err: + self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err)) + success = False + # Method 2: Use ffmpeg + else: + options = [ + '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', + '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + + self._report_run('ffmpeg', filename) + self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) @@ -219,7 +234,7 @@ def run(self, info): else: raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov') - if info['ext'] != 'mp3' and success and temp_filename != filename: + if success and temp_filename != filename: os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) From 8c31540e3ab91da858cc0ca742f0e9e2117a0d9d Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 13:58:04 +0530 Subject: [PATCH 06/24] linting --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index b0b613f25..cfa51437d 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -103,7 +103,7 @@ def run(self, info): encoding=3, mime='image/%s' % thumbnail_ext, type=3, desc=u'Cover (front)', data=thumbfile.read()) audio.save() - temp_filename = filename # Mutagen saves to the original file + temp_filename = filename # Mutagen saves to the original file except Exception as err: self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err)) success = False From 79bb8f51e45ba0d094855930a3e112eb592e4386 Mon Sep 17 00:00:00 2001 From: fjueic <89282645+fjueic@users.noreply.github.com> Date: Fri, 29 Dec 2023 07:37:00 -0800 Subject: [PATCH 07/24] Update yt_dlp/postprocessor/embedthumbnail.py Co-authored-by: Simon Sawicki --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index cfa51437d..fb778e233 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -105,7 +105,7 @@ def run(self, info): audio.save() temp_filename = filename # Mutagen saves to the original file except Exception as err: - self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err)) + self.report_warning(f'unable to embed using mutagen; {err}') success = False # Method 2: Use ffmpeg else: From 51af60e6d71387682cff55ba1aeb011b999ccbf8 Mon Sep 17 00:00:00 2001 From: fjueic <89282645+fjueic@users.noreply.github.com> Date: Fri, 29 Dec 2023 07:38:41 -0800 Subject: [PATCH 08/24] Update yt_dlp/postprocessor/ffmpeg.py Co-authored-by: Simon Sawicki --- yt_dlp/postprocessor/ffmpeg.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index a88009a51..7107574a7 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -674,17 +674,17 @@ def embed_lyrics(self, input_files): subs = input_files[1] if not subs.endswith('.lrc'): raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') + + with open(subs, 'r', encoding='utf-8') as f: + lyrics = f.read().strip() + if audio_file.endswith('.mp3'): + audio = mutagen.id3.ID3(audio_file) + audio.add(mutagen.id3.USLT(encoding=3, lang='eng', desc='', text=lyrics)) + audio.save() else: - with open(subs, 'r', encoding='utf-8') as f: - lyrics = f.read().strip() - if audio_file.endswith('.mp3'): - audio = mutagen.id3.ID3(audio_file) - audio.add(mutagen.id3.USLT(encoding=3, lang='eng', desc='', text=lyrics)) - audio.save() - else: - metadata = mutagen.File(audio_file) - metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = [lyrics] - metadata.save() + metadata = mutagen.File(audio_file) + metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = [lyrics] + metadata.save() class FFmpegMetadataPP(FFmpegPostProcessor): From 823543c2a4e31672560dfde59bd61105bdf655e1 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 22:11:18 +0530 Subject: [PATCH 09/24] Fix encoding issues in embedthumbnail.py and embed_lyrics() --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index fb778e233..5fd92fd34 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -100,7 +100,7 @@ def run(self, info): del audio['APIC'] with open(thumbnail_filename, 'rb') as thumbfile: audio['APIC'] = mutagen.id3.APIC( - encoding=3, mime='image/%s' % thumbnail_ext, type=3, + encoding=mutagen.id3.Encoding.UTF8, mime='image/%s' % thumbnail_ext, type=3, desc=u'Cover (front)', data=thumbfile.read()) audio.save() temp_filename = filename # Mutagen saves to the original file diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7107574a7..7baff0a67 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -672,6 +672,8 @@ def run(self, info): def embed_lyrics(self, input_files): audio_file = input_files[0] subs = input_files[1] + if len(input_files) > 2: + self.report_warning('More than one subtitle file found. Only one will be embedded') if not subs.endswith('.lrc'): raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') @@ -679,7 +681,7 @@ def embed_lyrics(self, input_files): lyrics = f.read().strip() if audio_file.endswith('.mp3'): audio = mutagen.id3.ID3(audio_file) - audio.add(mutagen.id3.USLT(encoding=3, lang='eng', desc='', text=lyrics)) + audio.add(mutagen.id3.USLT(encoding=mutagen.id3.Encoding.UTF8, lang='und', desc='', text=lyrics)) audio.save() else: metadata = mutagen.File(audio_file) From e9a49de89cb68c1f674d53003a6b07643edb2ab0 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 29 Dec 2023 22:37:54 +0530 Subject: [PATCH 10/24] added the newline back which i removed by mistake --- yt_dlp/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7baff0a67..e69ec670e 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -639,6 +639,7 @@ def run(self, info): if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': mp4_ass_warn = True self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') + if not sub_langs: return [], info From fb3ec5f67eca05f18aaee2f82b7c2a5fcf55f024 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sat, 30 Dec 2023 15:52:28 +0530 Subject: [PATCH 11/24] Fix embedding of lyrics in audio files --- yt_dlp/postprocessor/embedthumbnail.py | 4 ++- yt_dlp/postprocessor/ffmpeg.py | 37 +++++++++++++------------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 5fd92fd34..a1a21c368 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -107,8 +107,10 @@ def run(self, info): except Exception as err: self.report_warning(f'unable to embed using mutagen; {err}') success = False - # Method 2: Use ffmpeg else: + success = False + # Method 2: Use ffmpeg + if not success: options = [ '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e69ec670e..1219369e9 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -587,8 +587,8 @@ def _options(target_ext): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): - AUDIO_EXTS = ('mp3', 'm4a', 'flac', 'opus') - SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') + SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus') + SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS) def __init__(self, downloader=None, already_have_subtitle=False): super().__init__(downloader) @@ -596,8 +596,8 @@ def __init__(self, downloader=None, already_have_subtitle=False): @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in self.SUPPORTED_EXTS + self.AUDIO_EXTS: - self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS+self.AUDIO_EXTS)} files') + if info['ext'] not in self.SUPPORTED_EXTS: + self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info subtitles = info.get('requested_subtitles') if not subtitles: @@ -661,8 +661,8 @@ def run(self, info): temp_filename = prepend_extension(filename, 'temp') self.to_screen('Embedding subtitles in "%s"' % filename) - if info['ext'] in self.AUDIO_EXTS: - self.embed_lyrics(input_files) + if info['ext'] in self.SUPPORTS_LYRICS: + self.embed_lyrics(info['filepath'],sub_dict=info['requested_subtitles']) else: self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.replace(temp_filename, filename) @@ -670,23 +670,24 @@ def run(self, info): files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, info - def embed_lyrics(self, input_files): - audio_file = input_files[0] - subs = input_files[1] - if len(input_files) > 2: - self.report_warning('More than one subtitle file found. Only one will be embedded') - if not subs.endswith('.lrc'): + def embed_lyrics(self, audio_file,sub_dict): + if len(sub_dict) > 1: + self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') + + if sub_dict[list(sub_dict.keys())[0]]['ext'] != 'lrc': raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') - with open(subs, 'r', encoding='utf-8') as f: - lyrics = f.read().strip() + lyrics_list = [] + for lyrics in sub_dict.keys(): + lyrics_list.append(sub_dict[lyrics]['data']) if audio_file.endswith('.mp3'): - audio = mutagen.id3.ID3(audio_file) - audio.add(mutagen.id3.USLT(encoding=mutagen.id3.Encoding.UTF8, lang='und', desc='', text=lyrics)) - audio.save() + for lyrics in lyrics_list: + audio = mutagen.id3.ID3(audio_file) + audio.add(mutagen.id3.USLT(encoding=3, lang='und', desc='', text=lyrics)) + audio.save() else: metadata = mutagen.File(audio_file) - metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = [lyrics] + metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = lyrics_list metadata.save() From 83d4013ccb37c36790e6ce4eeed8b5598e84f16e Mon Sep 17 00:00:00 2001 From: Rohit Date: Sat, 30 Dec 2023 16:00:08 +0530 Subject: [PATCH 12/24] linting --- yt_dlp/postprocessor/ffmpeg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 1219369e9..627958e60 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -662,7 +662,7 @@ def run(self, info): temp_filename = prepend_extension(filename, 'temp') self.to_screen('Embedding subtitles in "%s"' % filename) if info['ext'] in self.SUPPORTS_LYRICS: - self.embed_lyrics(info['filepath'],sub_dict=info['requested_subtitles']) + self.embed_lyrics(info['filepath'], sub_dict=info['requested_subtitles']) else: self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.replace(temp_filename, filename) @@ -670,7 +670,7 @@ def run(self, info): files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, info - def embed_lyrics(self, audio_file,sub_dict): + def embed_lyrics(self, audio_file, sub_dict): if len(sub_dict) > 1: self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') From 6729562666392f22430e170d7645ede5164c96f4 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 17 Jan 2024 21:58:25 +0530 Subject: [PATCH 13/24] asked changes --- yt_dlp/postprocessor/embedthumbnail.py | 2 -- yt_dlp/postprocessor/ffmpeg.py | 12 +++++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index a1a21c368..74eff81ef 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -96,8 +96,6 @@ def run(self, info): try: self._report_run('mutagen', filename) audio = mutagen.id3.ID3(filename) - if 'APIC' in audio: - del audio['APIC'] with open(thumbnail_filename, 'rb') as thumbfile: audio['APIC'] = mutagen.id3.APIC( encoding=mutagen.id3.Encoding.UTF8, mime='image/%s' % thumbnail_ext, type=3, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 627958e60..996c821f8 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -674,17 +674,15 @@ def embed_lyrics(self, audio_file, sub_dict): if len(sub_dict) > 1: self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') - if sub_dict[list(sub_dict.keys())[0]]['ext'] != 'lrc': + if not all(sub['ext'] == 'lrc' for sub in sub_dict.values()): raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') - lyrics_list = [] - for lyrics in sub_dict.keys(): - lyrics_list.append(sub_dict[lyrics]['data']) + lyrics_list = [sub['data'] for sub in sub_dict.values()] if audio_file.endswith('.mp3'): + audio = mutagen.id3.ID3(audio_file) for lyrics in lyrics_list: - audio = mutagen.id3.ID3(audio_file) - audio.add(mutagen.id3.USLT(encoding=3, lang='und', desc='', text=lyrics)) - audio.save() + audio.add(mutagen.id3.USLT(encoding=mutagen.id3.Encoding.UTF8, lang='und', text=lyrics)) + audio.save() else: metadata = mutagen.File(audio_file) metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = lyrics_list From 428277de52e6130ec10387a3e79ddb7e76f014c5 Mon Sep 17 00:00:00 2001 From: fjueic <89282645+fjueic@users.noreply.github.com> Date: Wed, 17 Jan 2024 22:19:52 +0530 Subject: [PATCH 14/24] Update yt_dlp/postprocessor/embedthumbnail.py Co-authored-by: Simon Sawicki --- yt_dlp/postprocessor/embedthumbnail.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 74eff81ef..40f924a8d 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -98,8 +98,8 @@ def run(self, info): audio = mutagen.id3.ID3(filename) with open(thumbnail_filename, 'rb') as thumbfile: audio['APIC'] = mutagen.id3.APIC( - encoding=mutagen.id3.Encoding.UTF8, mime='image/%s' % thumbnail_ext, type=3, - desc=u'Cover (front)', data=thumbfile.read()) + encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}', + type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read()) audio.save() temp_filename = filename # Mutagen saves to the original file except Exception as err: From 34fa76f93f7ad5f35c68fbb06530c4904eb043c2 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 19 Jan 2024 23:16:50 +0530 Subject: [PATCH 15/24] --embed-subs usage update --- README.md | 6 ++++-- yt_dlp/options.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 06aceec02..5d7e1bf2b 100644 --- a/README.md +++ b/README.md @@ -1000,8 +1000,10 @@ ## Post-Processing Options: post-processing (default) --post-overwrites Overwrite post-processed files (default) --no-post-overwrites Do not overwrite post-processed files - --embed-subs Embed subtitles in the video (only for mp4, - webm and mkv videos) + --embed-subs Embed subtitles in downloaded media. + This option is available for video (mp4, webm, mkv) + and audio (m4a, mp3, ogg, flac). + When embedding subtitles in audio files, use --convert-subtitles lrc --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index e9d927717..d6cbdaf2b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1582,7 +1582,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in the video (only for mp4, webm and mkv videos)') + help='Embed subtitles in downloaded media. This option is available for video (mp4, webm, mkv) and audio (m4a, mp3, ogg, flac). ' + 'When embedding subtitles in audio files, use --convert-subtitles lrc') postproc.add_option( '--no-embed-subs', action='store_false', dest='embedsubtitles', From 54792c9da84bccfed4425d71ddfcce267f3bf798 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 4 Feb 2024 02:32:31 +0530 Subject: [PATCH 16/24] readme using make_readme.py, ffmpeg waring first, option.py update --- README.md | 7 ++++--- yt_dlp/options.py | 6 ++++-- yt_dlp/postprocessor/ffmpeg.py | 5 ++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 5d7e1bf2b..979d10fbe 100644 --- a/README.md +++ b/README.md @@ -1001,9 +1001,10 @@ ## Post-Processing Options: --post-overwrites Overwrite post-processed files (default) --no-post-overwrites Do not overwrite post-processed files --embed-subs Embed subtitles in downloaded media. - This option is available for video (mp4, webm, mkv) - and audio (m4a, mp3, ogg, flac). - When embedding subtitles in audio files, use --convert-subtitles lrc + Available for video (mp4, webm, mkv) and + audio (m4a, mp3, ogg, flac). Use --convert- + subtitles lrc when embedding subtitles in + audio files --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index d6cbdaf2b..3fa52c8ca 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1582,8 +1582,10 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in downloaded media. This option is available for video (mp4, webm, mkv) and audio (m4a, mp3, ogg, flac). ' - 'When embedding subtitles in audio files, use --convert-subtitles lrc') + help=( + 'Embed subtitles in downloaded media. ' + 'Available for video (mp4, webm, mkv) and audio (m4a, mp3, ogg, flac). ' + 'Use --convert-subtitles lrc when embedding subtitles in audio files')) postproc.add_option( '--no-embed-subs', action='store_false', dest='embedsubtitles', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 996c821f8..9a685c5a5 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -671,11 +671,10 @@ def run(self, info): return files_to_delete, info def embed_lyrics(self, audio_file, sub_dict): - if len(sub_dict) > 1: - self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') - if not all(sub['ext'] == 'lrc' for sub in sub_dict.values()): raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') + if len(sub_dict) > 1: + self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') lyrics_list = [sub['data'] for sub in sub_dict.values()] if audio_file.endswith('.mp3'): From b18cc9c633ae73ae69f1a16fdf682bd5ac58444c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 20 Feb 2024 09:03:28 +0530 Subject: [PATCH 17/24] Rewrite --- README.md | 4 +- yt_dlp/options.py | 3 +- yt_dlp/postprocessor/embedthumbnail.py | 11 +-- yt_dlp/postprocessor/ffmpeg.py | 112 +++++++++++++------------ yt_dlp/utils/_utils.py | 2 + 5 files changed, 70 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 979d10fbe..c621e425a 100644 --- a/README.md +++ b/README.md @@ -1002,9 +1002,7 @@ ## Post-Processing Options: --no-post-overwrites Do not overwrite post-processed files --embed-subs Embed subtitles in downloaded media. Available for video (mp4, webm, mkv) and - audio (m4a, mp3, ogg, flac). Use --convert- - subtitles lrc when embedding subtitles in - audio files + "lrc" in audio (m4a, mp3, ogg, flac) --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 3fa52c8ca..ad4e2190c 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1584,8 +1584,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): action='store_true', dest='embedsubtitles', default=False, help=( 'Embed subtitles in downloaded media. ' - 'Available for video (mp4, webm, mkv) and audio (m4a, mp3, ogg, flac). ' - 'Use --convert-subtitles lrc when embedding subtitles in audio files')) + 'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)')) postproc.add_option( '--no-embed-subs', action='store_false', dest='embedsubtitles', diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 40f924a8d..4abab40d3 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -91,8 +91,10 @@ def run(self, info): success = True if info['ext'] == 'mp3': # Method 1: Use mutagen - # Prioritize mutagen over ffmpeg since ffmpeg messes up the lyrics data - if mutagen: + if not mutagen: + self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted') + success = False + else: try: self._report_run('mutagen', filename) audio = mutagen.id3.ID3(filename) @@ -101,12 +103,11 @@ def run(self, info): encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}', type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read()) audio.save() - temp_filename = filename # Mutagen saves to the original file + temp_filename = filename except Exception as err: self.report_warning(f'unable to embed using mutagen; {err}') success = False - else: - success = False + # Method 2: Use ffmpeg if not success: options = [ diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 9a685c5a5..41dd1df9a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -596,16 +596,16 @@ def __init__(self, downloader=None, already_have_subtitle=False): @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in self.SUPPORTED_EXTS: + filename, ext = info['filepath'], info['ext'] + if ext not in self.SUPPORTED_EXTS: self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info + subtitles = info.get('requested_subtitles') if not subtitles: self.to_screen('There aren\'t any subtitles to embed') return [], info - filename = info['filepath'] - # Disabled temporarily. There needs to be a way to override this # in case of duration actually mismatching in extractor # See: https://github.com/yt-dlp/yt-dlp/issues/1870, https://github.com/yt-dlp/yt-dlp/issues/1385 @@ -616,76 +616,84 @@ def run(self, info): return [], info ''' - ext = info['ext'] - sub_langs, sub_names, sub_filenames = [], [], [] - webm_vtt_warn = False - mp4_ass_warn = False + warnings = set() + def warn_once(msg): + if msg not in warnings: + warnings.add(msg) + self.report_warning(msg) + + subtitles_to_embed = {} for lang, sub_info in subtitles.items(): if not os.path.exists(sub_info.get('filepath', '')): self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') - continue - sub_ext = sub_info['ext'] - if sub_ext == 'json': - self.report_warning('JSON subtitles cannot be embedded') - elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': - sub_langs.append(lang) - sub_names.append(sub_info.get('name')) - sub_filenames.append(sub_info['filepath']) + elif sub_info['ext'] == 'json': + warn_once('JSON subtitles cannot be embedded') + elif ext == 'webm' and sub_info['ext'] != 'vtt': + warn_once('Only WebVTT subtitles can be embedded in webm files') + elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc': + warn_once(f'Only lrc subtitles can be embedded in {ext} files') + elif ext in self.SUPPORTS_LYRICS and not mutagen: + raise PostProcessingError( + f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`') else: - if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': - webm_vtt_warn = True - self.report_warning('Only WebVTT subtitles can be embedded in webm files') - if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': - mp4_ass_warn = True - self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') + if ext == 'mp4' and sub_info['ext'] == 'ass': + warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues') + subtitles_to_embed[lang] = sub_info - if not sub_langs: + if not subtitles_to_embed: return [], info - input_files = [filename] + sub_filenames + sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()] + files_to_delete = [] if self._already_have_subtitle else sub_files + + if ext in self.SUPPORTS_LYRICS: + self._embed_lyrics(subtitles_to_embed, info['filepath'], ext) + return files_to_delete, info opts = [ - *self.stream_copy_opts(ext=info['ext']), + *self.stream_copy_opts(ext=ext), # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', ] - for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): - opts.extend(['-map', '%d:0' % (i + 1)]) - lang_code = ISO639Utils.short2long(lang) or lang - opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) - if name: - opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name, - '-metadata:s:s:%d' % i, 'title=%s' % name]) + for i, (lang, sub) in enumerate(subtitles_to_embed.items()): + lang = ISO639Utils.short2long(lang) or lang + opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}']) + if name := sub['name']: + opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', + f'-metadata:s:s:{i}', f'title={name}']) temp_filename = prepend_extension(filename, 'temp') - self.to_screen('Embedding subtitles in "%s"' % filename) - if info['ext'] in self.SUPPORTS_LYRICS: - self.embed_lyrics(info['filepath'], sub_dict=info['requested_subtitles']) - else: - self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) - os.replace(temp_filename, filename) + self.to_screen(f'Embedding subtitles in "{filename}"') + self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts) + os.replace(temp_filename, filename) - files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, info - def embed_lyrics(self, audio_file, sub_dict): - if not all(sub['ext'] == 'lrc' for sub in sub_dict.values()): - raise PostProcessingError('LRC subtitles required. Use "--convert-subs lrc" to convert') - if len(sub_dict) > 1: - self.report_warning('More than one subtitle file found. Your media player will likely be unable to display all of them.') + def _embed_lyrics(self, subtitles, filename, ext): + assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values()) + self.to_screen(f'Embedding lyrics in "{filename}"') + if len(subtitles) > 1: + self.report_warning( + f'Your media player may be unable to display multiple subtitles in {ext}') - lyrics_list = [sub['data'] for sub in sub_dict.values()] - if audio_file.endswith('.mp3'): - audio = mutagen.id3.ID3(audio_file) - for lyrics in lyrics_list: - audio.add(mutagen.id3.USLT(encoding=mutagen.id3.Encoding.UTF8, lang='und', text=lyrics)) - audio.save() + for lang, sub in subtitles.items(): + if not sub.get('data'): + with open(sub['filepath'], encoding='utf-8') as f: + sub['data'] = f.read() + + if ext == 'mp3': + metadata = mutagen.id3.ID3(filename) + for lang, sub in subtitles.items(): + metadata.add(mutagen.id3.USLT( + encoding=mutagen.id3.Encoding.UTF8, + lang=ISO639Utils.short2long(lang) or 'und', + text=sub['data'])) else: - metadata = mutagen.File(audio_file) - metadata['©lyr' if audio_file.endswith('.m4a') else 'lyrics'] = lyrics_list - metadata.save() + metadata = mutagen.File(filename) + metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()] + metadata.save() class FFmpegMetadataPP(FFmpegPostProcessor): diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 361617c02..54cac7e6b 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -3733,6 +3733,8 @@ class ISO639Utils: @classmethod def short2long(cls, code): """Convert language code from ISO 639-1 to ISO 639-2/T""" + if code in cls._lang_map.values(): + return code return cls._lang_map.get(code[:2]) @classmethod From f1129a54fd7c014e3aae7466c37660efe9ae260f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 20 Feb 2024 08:57:02 +0530 Subject: [PATCH 18/24] Add compat-option --- README.md | 2 +- yt_dlp/options.py | 3 ++- yt_dlp/postprocessor/embedthumbnail.py | 12 ++++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c621e425a..ce02017e4 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ ### Differences in default behavior * Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this -* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead +* Thumbnail embedding in `mp4`/`mp3` are done with `mutagen` if possible. Use `--compat-options no-embed-thumbnail-mutagen` to force the use of `ffmpeg`/`AtomicParsley` instead * Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ad4e2190c..9de301a56 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -468,11 +468,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'allowed_values': { 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter', - 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', + 'no-attach-info-json', 'no-embed-thumbnail-mutagen', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'prefer-legacy-http-handler', 'manifest-filesize-approx' }, 'aliases': { + 'embed-thumbnail-atomicparsley': ['no-embed-thumbnail-mutagen'], # compat 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 4abab40d3..8712c2e3f 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -88,10 +88,15 @@ def run(self, info): mtime = os.stat(encodeFilename(filename)).st_mtime + avoid_mutagen = any( + opt in self.get_param('compat_opts', []) + for opt in ('no-embed-thumbnail-mutagen', 'embed-thumbnail-atomicparsley')) success = True if info['ext'] == 'mp3': # Method 1: Use mutagen - if not mutagen: + if avoid_mutagen: + success = False + elif not mutagen: self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted') success = False else: @@ -135,9 +140,8 @@ def run(self, info): self.run_ffmpeg(filename, temp_filename, options) elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']: - prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', []) # Method 1: Use mutagen - if not mutagen or prefer_atomicparsley: + if avoid_mutagen or not mutagen: success = False else: try: @@ -166,7 +170,7 @@ def run(self, info): self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg') success = False else: - if not prefer_atomicparsley: + if not avoid_mutagen: self.to_screen('mutagen was not found. Falling back to AtomicParsley') cmd = [encodeFilename(atomicparsley, True), encodeFilename(filename, True), From 06d20ff8bd42420a7ec404ccf51d57be780a4d82 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 25 Feb 2024 22:06:41 +0530 Subject: [PATCH 19/24] SYLT usage --- yt_dlp/postprocessor/ffmpeg.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 41dd1df9a..d97930655 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -683,13 +683,27 @@ def _embed_lyrics(self, subtitles, filename, ext): with open(sub['filepath'], encoding='utf-8') as f: sub['data'] = f.read() + def totime(time): + time = time.split(":") + return int((int(time[0])*60 + float(time[1]))*1000) + def convert_lrc_to_sylt(lrc): + lrc = lrc.split("\n") + lrc = [i.strip() for i in lrc] + lrc = [i for i in lrc if i] + lrc = [i for i in lrc if i[-1] != "]"] + lrc = [i.split("]") for i in lrc] + lrc = [[i[0][1:], i[1]] for i in lrc] + lrc = [(i[1], totime(i[0])) for i in lrc] + return lrc if ext == 'mp3': metadata = mutagen.id3.ID3(filename) for lang, sub in subtitles.items(): - metadata.add(mutagen.id3.USLT( + metadata.add(mutagen.id3.SYLT( encoding=mutagen.id3.Encoding.UTF8, lang=ISO639Utils.short2long(lang) or 'und', - text=sub['data'])) + format=2, + type=1, + text=convert_lrc_to_sylt(sub['data']))) else: metadata = mutagen.File(filename) metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()] From bd3676f6e5482d0e3d3ad7ebb185a8e5d32a9493 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 27 Feb 2024 08:51:37 +0530 Subject: [PATCH 20/24] Rename compat option --- README.md | 2 +- yt_dlp/options.py | 4 ++-- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce02017e4..7395b7d39 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ ### Differences in default behavior * Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this -* Thumbnail embedding in `mp4`/`mp3` are done with `mutagen` if possible. Use `--compat-options no-embed-thumbnail-mutagen` to force the use of `ffmpeg`/`AtomicParsley` instead +* Thumbnail embedding in `mp4`/`mp3` are done with `mutagen` if possible. Use `--compat-options avoid-mutagen` to force the use of `ffmpeg`/`AtomicParsley` instead * Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9de301a56..462d8f2a6 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -468,12 +468,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'allowed_values': { 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter', - 'no-attach-info-json', 'no-embed-thumbnail-mutagen', 'no-external-downloader-progress', + 'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'prefer-legacy-http-handler', 'manifest-filesize-approx' }, 'aliases': { - 'embed-thumbnail-atomicparsley': ['no-embed-thumbnail-mutagen'], # compat + 'embed-thumbnail-atomicparsley': ['avoid-mutagen'], # compat 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 8712c2e3f..c858d4f05 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -90,7 +90,7 @@ def run(self, info): avoid_mutagen = any( opt in self.get_param('compat_opts', []) - for opt in ('no-embed-thumbnail-mutagen', 'embed-thumbnail-atomicparsley')) + for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley')) success = True if info['ext'] == 'mp3': # Method 1: Use mutagen From 3f30a6b78ed331f843c8a6b6420371476a36b479 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 27 Feb 2024 08:42:44 +0530 Subject: [PATCH 21/24] Abstract out lrc parsing --- yt_dlp/postprocessor/ffmpeg.py | 21 +++++--------------- yt_dlp/utils/subtitles.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 16 deletions(-) create mode 100644 yt_dlp/utils/subtitles.py diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d97930655..59ca879cf 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -33,6 +33,7 @@ variadic, write_json_file, ) +from ..utils.subtitles import Subtitle, parse_lrc EXT_TO_OUT_FORMATS = { 'aac': 'adts', @@ -683,27 +684,15 @@ def _embed_lyrics(self, subtitles, filename, ext): with open(sub['filepath'], encoding='utf-8') as f: sub['data'] = f.read() - def totime(time): - time = time.split(":") - return int((int(time[0])*60 + float(time[1]))*1000) - def convert_lrc_to_sylt(lrc): - lrc = lrc.split("\n") - lrc = [i.strip() for i in lrc] - lrc = [i for i in lrc if i] - lrc = [i for i in lrc if i[-1] != "]"] - lrc = [i.split("]") for i in lrc] - lrc = [[i[0][1:], i[1]] for i in lrc] - lrc = [(i[1], totime(i[0])) for i in lrc] - return lrc if ext == 'mp3': metadata = mutagen.id3.ID3(filename) for lang, sub in subtitles.items(): metadata.add(mutagen.id3.SYLT( - encoding=mutagen.id3.Encoding.UTF8, + encoding=mutagen.id3.Encoding.UTF8, format=2, type=1, lang=ISO639Utils.short2long(lang) or 'und', - format=2, - type=1, - text=convert_lrc_to_sylt(sub['data']))) + text=[(line.text, int(line.start * 1000)) + for line in parse_lrc(sub['data']) + if isinstance(line, Subtitle)])) else: metadata = mutagen.File(filename) metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()] diff --git a/yt_dlp/utils/subtitles.py b/yt_dlp/utils/subtitles.py new file mode 100644 index 000000000..784acecde --- /dev/null +++ b/yt_dlp/utils/subtitles.py @@ -0,0 +1,36 @@ +import re +from dataclasses import dataclass +from typing import TypeAlias + +Seconds: TypeAlias = float + + +@dataclass +class Metadata: + name: str + value: str + + +@dataclass +class Subtitle: + text: str + start: Seconds + end: Seconds = None + + +def parse_lrc(text): + for line in text.split('\n'): + times = [] + while mobj := re.fullmatch(r'\[(?P