diff --git a/README.md b/README.md index 634cff5a5..eb7ed6daa 100644 --- a/README.md +++ b/README.md @@ -934,8 +934,9 @@ ## Post-Processing Options: post-processing (default) --post-overwrites Overwrite post-processed files (default) --no-post-overwrites Do not overwrite post-processed files - --embed-subs Embed subtitles in the video (only for mp4, - webm and mkv videos) + --embed-subs Embed subtitles in downloaded media. + Available for video (mp4, webm, mkv) and + "lrc" in audio (m4a, mp3, ogg, flac) --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 91c2635a7..9faf23280 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -491,11 +491,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): 'allowed_values': { 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter', - 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', + 'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', }, 'aliases': { + 'embed-thumbnail-atomicparsley': ['avoid-mutagen'], # compat 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'], '2021': ['2022', 'no-certifi', 'filename-sanitization'], @@ -1631,7 +1632,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='Embed subtitles in the video (only for mp4, webm and mkv videos)') + help=( + 'Embed subtitles in downloaded media. ' + 'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)')) postproc.add_option( '--no-embed-subs', action='store_false', dest='embedsubtitles', diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index d8ba220ca..857a4771f 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -86,14 +86,39 @@ def run(self, info): mtime = os.stat(filename).st_mtime + avoid_mutagen = any( + opt in self.get_param('compat_opts', []) + for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley')) success = True if info['ext'] == 'mp3': - options = [ - '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + # Method 1: Use mutagen + if avoid_mutagen: + success = False + elif not mutagen: + self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted') + success = False + else: + try: + self._report_run('mutagen', filename) + audio = mutagen.id3.ID3(filename) + with open(thumbnail_filename, 'rb') as thumbfile: + audio['APIC'] = mutagen.id3.APIC( + encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}', + type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read()) + audio.save() + temp_filename = filename + except Exception as err: + self.report_warning(f'unable to embed using mutagen; {err}') + success = False - self._report_run('ffmpeg', filename) - self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) + # Method 2: Use ffmpeg + if not success: + options = [ + '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', + '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] + + self._report_run('ffmpeg', filename) + self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) @@ -113,9 +138,8 @@ def run(self, info): self.run_ffmpeg(filename, temp_filename, options) elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']: - prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', []) # Method 1: Use mutagen - if not mutagen or prefer_atomicparsley: + if avoid_mutagen or not mutagen: success = False else: self._report_run('mutagen', filename) @@ -151,7 +175,7 @@ def run(self, info): self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg') success = False else: - if not prefer_atomicparsley: + if not avoid_mutagen: self.to_screen('mutagen was not found. Falling back to AtomicParsley') cmd = [atomicparsley, filename, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e59e9832b..e16bf189a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -9,6 +9,7 @@ import time from .common import PostProcessor +from ..dependencies import mutagen from ..compat import imghdr from ..utils import ( MEDIA_EXTENSIONS, @@ -32,6 +33,7 @@ variadic, write_json_file, ) +from ..utils.subtitles import Subtitle, parse_lrc EXT_TO_OUT_FORMATS = { 'aac': 'adts', @@ -586,7 +588,8 @@ def _options(target_ext): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') + SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus') + SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS) def __init__(self, downloader=None, already_have_subtitle=False): super().__init__(downloader) @@ -594,9 +597,11 @@ def __init__(self, downloader=None, already_have_subtitle=False): @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in self.SUPPORTED_EXTS: + ext = info['ext'] + if ext not in self.SUPPORTED_EXTS: self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info + subtitles = info.get('requested_subtitles') if not subtitles: self.to_screen('There aren\'t any subtitles to embed') @@ -614,57 +619,87 @@ def run(self, info): return [], info ''' - ext = info['ext'] - sub_langs, sub_names, sub_filenames = [], [], [] - webm_vtt_warn = False - mp4_ass_warn = False + warnings = set() + def warn_once(msg): + if msg not in warnings: + warnings.add(msg) + self.report_warning(msg) + + subtitles_to_embed = {} for lang, sub_info in subtitles.items(): if not os.path.exists(sub_info.get('filepath', '')): self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') - continue - sub_ext = sub_info['ext'] - if sub_ext == 'json': - self.report_warning('JSON subtitles cannot be embedded') - elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'): - sub_langs.append(lang) - sub_names.append(sub_info.get('name')) - sub_filenames.append(sub_info['filepath']) + elif sub_info['ext'] == 'json': + warn_once('JSON subtitles cannot be embedded') + elif ext == 'webm' and sub_info['ext'] != 'vtt': + warn_once('Only WebVTT subtitles can be embedded in webm files') + elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc': + warn_once(f'Only lrc subtitles can be embedded in {ext} files') + elif ext in self.SUPPORTS_LYRICS and not mutagen: + raise PostProcessingError( + f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`') else: - if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': - webm_vtt_warn = True - self.report_warning('Only WebVTT subtitles can be embedded in webm files') - if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': - mp4_ass_warn = True - self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') + if ext == 'mp4' and sub_info['ext'] == 'ass': + warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues') + subtitles_to_embed[lang] = sub_info - if not sub_langs: + if not subtitles_to_embed: return [], info - input_files = [filename, *sub_filenames] + sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()] + files_to_delete = [] if self._already_have_subtitle else sub_files + + if ext in self.SUPPORTS_LYRICS: + self._embed_lyrics(subtitles_to_embed, info['filepath'], ext) + return files_to_delete, info opts = [ - *self.stream_copy_opts(ext=info['ext']), + *self.stream_copy_opts(ext=ext), # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', ] - for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): - opts.extend(['-map', f'{i + 1}:0']) - lang_code = ISO639Utils.short2long(lang) or lang - opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}']) - if name: + for i, (lang, sub) in enumerate(subtitles_to_embed.items()): + lang = ISO639Utils.short2long(lang) or lang + opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}']) + if name := sub['name']: opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', f'-metadata:s:s:{i}', f'title={name}']) temp_filename = prepend_extension(filename, 'temp') self.to_screen(f'Embedding subtitles in "{filename}"') - self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) + self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts) os.replace(temp_filename, filename) - files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, info + def _embed_lyrics(self, subtitles, filename, ext): + assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values()) + self.to_screen(f'Embedding lyrics in "{filename}"') + if len(subtitles) > 1: + self.report_warning( + f'Your media player may be unable to display multiple subtitles in {ext}', only_once=True) + + for sub in subtitles.values(): + if not sub.get('data'): + with open(sub['filepath'], encoding='utf-8') as f: + sub['data'] = f.read() + + if ext == 'mp3': + metadata = mutagen.id3.ID3(filename) + for lang, sub in subtitles.items(): + metadata.add(mutagen.id3.SYLT( + encoding=mutagen.id3.Encoding.UTF8, format=2, type=1, + lang=ISO639Utils.short2long(lang) or 'und', + text=[(line.text, int(line.start * 1000)) + for line in parse_lrc(sub['data']) + if isinstance(line, Subtitle)])) + else: + metadata = mutagen.File(filename) + metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()] + metadata.save() + class FFmpegMetadataPP(FFmpegPostProcessor): diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 4093c238c..f7db7a810 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -3806,6 +3806,8 @@ class ISO639Utils: @classmethod def short2long(cls, code): """Convert language code from ISO 639-1 to ISO 639-2/T""" + if code in cls._lang_map.values(): + return code return cls._lang_map.get(code[:2]) @classmethod diff --git a/yt_dlp/utils/subtitles.py b/yt_dlp/utils/subtitles.py new file mode 100644 index 000000000..663e3e6e9 --- /dev/null +++ b/yt_dlp/utils/subtitles.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass + +Seconds = float + + +@dataclass +class Metadata: + name: str + value: str + + +@dataclass +class Subtitle: + text: str + start: Seconds + end: Seconds | None = None + + +def parse_lrc(text): + for line in text.split('\n'): + times = [] + while mobj := re.fullmatch(r'\[(?P