Merge a36d6df3f6 into 05c8023a27

2025-03-09 12:50:23 -05:00 · 2025-03-07 23:03:32 +01:00 · 2025-03-07 23:03:32 +01:00 · 5380818509
commit 5380818509
parent 05c8023a27 a36d6df3f6
6 changed files with 144 additions and 42 deletions
--- a/README.md
+++ b/README.md
@ -934,8 +934,9 @@ ## Post-Processing Options:
                                    post-processing (default)
    --post-overwrites               Overwrite post-processed files (default)
    --no-post-overwrites            Do not overwrite post-processed files
-    --embed-subs                    Embed subtitles in the video (only for mp4,
-                                    webm and mkv videos)
+    --embed-subs                    Embed subtitles in downloaded media.
+                                    Available for video (mp4, webm, mkv) and
+                                    "lrc" in audio (m4a, mp3, ogg, flac)
    --no-embed-subs                 Do not embed subtitles (default)
    --embed-thumbnail               Embed thumbnail in the video as cover art
    --no-embed-thumbnail            Do not embed thumbnail (default)
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@ -491,11 +491,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
            'allowed_values': {
                'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
                'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
-                'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
+                'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress',
                'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
                'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
                'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
            }, 'aliases': {
+                'embed-thumbnail-atomicparsley': ['avoid-mutagen'],  # compat
                'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
                'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
                '2021': ['2022', 'no-certifi', 'filename-sanitization'],
@ -1631,7 +1632,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
    postproc.add_option(
        '--embed-subs',
        action='store_true', dest='embedsubtitles', default=False,
-        help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
+        help=(
+            'Embed subtitles in downloaded media. '
+            'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)'))
    postproc.add_option(
        '--no-embed-subs',
        action='store_false', dest='embedsubtitles',
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@ -86,14 +86,39 @@ def run(self, info):

        mtime = os.stat(filename).st_mtime

+        avoid_mutagen = any(
+            opt in self.get_param('compat_opts', [])
+            for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley'))
        success = True
        if info['ext'] == 'mp3':
-            options = [
-                '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
-                '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
+            # Method 1: Use mutagen
+            if avoid_mutagen:
+                success = False
+            elif not mutagen:
+                self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted')
+                success = False
+            else:
+                try:
+                    self._report_run('mutagen', filename)
+                    audio = mutagen.id3.ID3(filename)
+                    with open(thumbnail_filename, 'rb') as thumbfile:
+                        audio['APIC'] = mutagen.id3.APIC(
+                            encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}',
+                            type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read())
+                    audio.save()
+                    temp_filename = filename
+                except Exception as err:
+                    self.report_warning(f'unable to embed using mutagen; {err}')
+                    success = False

-            self._report_run('ffmpeg', filename)
-            self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+            # Method 2: Use ffmpeg
+            if not success:
+                options = [
+                    '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
+                    '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
+
+                self._report_run('ffmpeg', filename)
+                self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)

        elif info['ext'] in ['mkv', 'mka']:
            options = list(self.stream_copy_opts())
@ -113,9 +138,8 @@ def run(self, info):
            self.run_ffmpeg(filename, temp_filename, options)

        elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
-            prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
            # Method 1: Use mutagen
-            if not mutagen or prefer_atomicparsley:
+            if avoid_mutagen or not mutagen:
                success = False
            else:
                self._report_run('mutagen', filename)
@ -151,7 +175,7 @@ def run(self, info):
                    self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
                    success = False
                else:
-                    if not prefer_atomicparsley:
+                    if not avoid_mutagen:
                        self.to_screen('mutagen was not found. Falling back to AtomicParsley')
                    cmd = [atomicparsley,
                           filename,
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@ -9,6 +9,7 @@
 import time

 from .common import PostProcessor
+from ..dependencies import mutagen
 from ..compat import imghdr
 from ..utils import (
    MEDIA_EXTENSIONS,
@ -32,6 +33,7 @@
    variadic,
    write_json_file,
 )
+from ..utils.subtitles import Subtitle, parse_lrc

 EXT_TO_OUT_FORMATS = {
    'aac': 'adts',
@ -586,7 +588,8 @@ def _options(target_ext):


 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
-    SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
+    SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus')
+    SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS)

    def __init__(self, downloader=None, already_have_subtitle=False):
        super().__init__(downloader)
@ -594,9 +597,11 @@ def __init__(self, downloader=None, already_have_subtitle=False):

    @PostProcessor._restrict_to(images=False)
    def run(self, info):
-        if info['ext'] not in self.SUPPORTED_EXTS:
+        ext = info['ext']
+        if ext not in self.SUPPORTED_EXTS:
            self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
            return [], info
+
        subtitles = info.get('requested_subtitles')
        if not subtitles:
            self.to_screen('There aren\'t any subtitles to embed')
@ -614,57 +619,87 @@ def run(self, info):
            return [], info
        '''

-        ext = info['ext']
-        sub_langs, sub_names, sub_filenames = [], [], []
-        webm_vtt_warn = False
-        mp4_ass_warn = False
+        warnings = set()

+        def warn_once(msg):
+            if msg not in warnings:
+                warnings.add(msg)
+                self.report_warning(msg)
+
+        subtitles_to_embed = {}
        for lang, sub_info in subtitles.items():
            if not os.path.exists(sub_info.get('filepath', '')):
                self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
-                continue
-            sub_ext = sub_info['ext']
-            if sub_ext == 'json':
-                self.report_warning('JSON subtitles cannot be embedded')
-            elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'):
-                sub_langs.append(lang)
-                sub_names.append(sub_info.get('name'))
-                sub_filenames.append(sub_info['filepath'])
+            elif sub_info['ext'] == 'json':
+                warn_once('JSON subtitles cannot be embedded')
+            elif ext == 'webm' and sub_info['ext'] != 'vtt':
+                warn_once('Only WebVTT subtitles can be embedded in webm files')
+            elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc':
+                warn_once(f'Only lrc subtitles can be embedded in {ext} files')
+            elif ext in self.SUPPORTS_LYRICS and not mutagen:
+                raise PostProcessingError(
+                    f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`')
            else:
-                if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
-                    webm_vtt_warn = True
-                    self.report_warning('Only WebVTT subtitles can be embedded in webm files')
-            if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
-                mp4_ass_warn = True
-                self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
+                if ext == 'mp4' and sub_info['ext'] == 'ass':
+                    warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
+                subtitles_to_embed[lang] = sub_info

-        if not sub_langs:
+        if not subtitles_to_embed:
            return [], info

-        input_files = [filename, *sub_filenames]
+        sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()]
+        files_to_delete = [] if self._already_have_subtitle else sub_files
+
+        if ext in self.SUPPORTS_LYRICS:
+            self._embed_lyrics(subtitles_to_embed, info['filepath'], ext)
+            return files_to_delete, info

        opts = [
-            *self.stream_copy_opts(ext=info['ext']),
+            *self.stream_copy_opts(ext=ext),
            # Don't copy the existing subtitles, we may be running the
            # postprocessor a second time
            '-map', '-0:s',
        ]
-        for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
-            opts.extend(['-map', f'{i + 1}:0'])
-            lang_code = ISO639Utils.short2long(lang) or lang
-            opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}'])
-            if name:
+        for i, (lang, sub) in enumerate(subtitles_to_embed.items()):
+            lang = ISO639Utils.short2long(lang) or lang
+            opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}'])
+            if name := sub['name']:
                opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
                             f'-metadata:s:s:{i}', f'title={name}'])

        temp_filename = prepend_extension(filename, 'temp')
        self.to_screen(f'Embedding subtitles in "{filename}"')
-        self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+        self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts)
        os.replace(temp_filename, filename)

-        files_to_delete = [] if self._already_have_subtitle else sub_filenames
        return files_to_delete, info

+    def _embed_lyrics(self, subtitles, filename, ext):
+        assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values())
+        self.to_screen(f'Embedding lyrics in "{filename}"')
+        if len(subtitles) > 1:
+            self.report_warning(
+                f'Your media player may be unable to display multiple subtitles in {ext}', only_once=True)
+
+        for sub in subtitles.values():
+            if not sub.get('data'):
+                with open(sub['filepath'], encoding='utf-8') as f:
+                    sub['data'] = f.read()
+
+        if ext == 'mp3':
+            metadata = mutagen.id3.ID3(filename)
+            for lang, sub in subtitles.items():
+                metadata.add(mutagen.id3.SYLT(
+                    encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
+                    lang=ISO639Utils.short2long(lang) or 'und',
+                    text=[(line.text, int(line.start * 1000))
+                          for line in parse_lrc(sub['data'])
+                          if isinstance(line, Subtitle)]))
+        else:
+            metadata = mutagen.File(filename)
+            metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
+        metadata.save()
+

 class FFmpegMetadataPP(FFmpegPostProcessor):

--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -3806,6 +3806,8 @@ class ISO639Utils:
    @classmethod
    def short2long(cls, code):
        """Convert language code from ISO 639-1 to ISO 639-2/T"""
+        if code in cls._lang_map.values():
+            return code
        return cls._lang_map.get(code[:2])

    @classmethod
--- a/yt_dlp/utils/subtitles.py
+++ b/yt_dlp/utils/subtitles.py
@ -0,0 +1,37 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+Seconds = float
+
+
+@dataclass
+class Metadata:
+    name: str
+    value: str
+
+
+@dataclass
+class Subtitle:
+    text: str
+    start: Seconds
+    end: Seconds | None = None
+
+
+def parse_lrc(text):
+    for line in text.split('\n'):
+        times = []
+        while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
+            times.append(sum(
+                float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
+            line = mobj.group('content')
+
+        for t in times:
+            yield Subtitle(start=t, text=line.strip())
+
+        if not times:
+            if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
+                yield Metadata(mobj.group('name'), mobj.group('value').strip())
+            elif line.strip():
+                yield ValueError(line)