From 7a52926a4376f96b72cec30e1a53e96755292cb7 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sat, 14 Dec 2024 18:41:55 -0500 Subject: [PATCH 1/7] Add mutagen metadata postprocessor --- yt_dlp/__init__.py | 5 + yt_dlp/options.py | 4 + yt_dlp/postprocessor/__init__.py | 1 + yt_dlp/postprocessor/mutagen.py | 230 +++++++++++++++++++++++++++++++ 4 files changed, 240 insertions(+) create mode 100644 yt_dlp/postprocessor/mutagen.py diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..eaf0e1179 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -691,6 +691,10 @@ def get_postprocessors(opts): 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, } + if opts.prefer_mutagen: + yield { + 'key': 'Mutagen', + } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata @@ -917,6 +921,7 @@ def parse_options(argv=None): 'bidi_workaround': opts.bidi_workaround, 'debug_printtraffic': opts.debug_printtraffic, 'prefer_ffmpeg': opts.prefer_ffmpeg, + 'prefer_mutagen': opts.prefer_mutagen, 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'dynamic_mpd': opts.dynamic_mpd, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 930d9d4be..456f00572 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1800,6 +1800,10 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' 'This option can be used multiple times to add different postprocessors')) + postproc.add_option( + '--prefer-mutagen', '--no-prefer-mutagen', + action='store_true', dest='prefer_mutagen', + help=optparse.SUPPRESS_HELP) sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) ' diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 7b1620544..dba3764eb 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -30,6 +30,7 @@ ) from .modify_chapters import ModifyChaptersPP from .movefilesafterdownload import MoveFilesAfterDownloadPP +from .mutagen import MutagenPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP diff --git a/yt_dlp/postprocessor/mutagen.py b/yt_dlp/postprocessor/mutagen.py new file mode 100644 index 000000000..9901063a1 --- /dev/null +++ b/yt_dlp/postprocessor/mutagen.py @@ -0,0 +1,230 @@ +from __future__ import annotations +import collections +from functools import singledispatchmethod +import os +import re +from typing import TypedDict + +from yt_dlp.compat import imghdr +from yt_dlp.utils._utils import PostProcessingError, variadic +from ..dependencies import mutagen + +if mutagen: + import mutagen + from mutagen import ( + FileType, + aiff, + dsdiff, + dsf, + flac, + id3, + mp3, + mp4, + oggopus, + oggspeex, + oggtheora, + oggvorbis, + trueaudio, + wave, + ) + +from yt_dlp.postprocessor.common import PostProcessor + + +class MutagenPPError(PostProcessingError): + pass + + +class MutagenPP(PostProcessor): + def __init__(self, downloader=None): + PostProcessor.__init__(self, downloader) + + class MetadataInfo(TypedDict): + title: str | None + date: str | None + description: str | None + synopsis: str | None + purl: str | None + comment: str | None + track: str | None + artist: str | None + composer: str | None + genre: str | None + album: str | None + album_artist: str | None + disc: str | None + show: str | None + season_number: str | None + episode_id: str | None + episode_sort: str | None + + @singledispatchmethod + @staticmethod + def _assemble_metadata(file: FileType, meta: MetadataInfo) -> None: + raise MutagenPPError(f'Filetype {file.__class__.__name__} is not currently supported') + + @staticmethod + def _set_metadata(file: FileType, meta: MetadataInfo, file_name: str, meta_name: str): + if meta[meta_name]: + file[file_name] = meta[meta_name] + + @_assemble_metadata.register(oggvorbis.OggVorbis) + @_assemble_metadata.register(oggtheora.OggTheora) + @_assemble_metadata.register(oggspeex.OggSpeex) + @_assemble_metadata.register(oggopus.OggOpus) + @_assemble_metadata.register(flac.FLAC) + @staticmethod + def _(file: oggopus.OggOpus, meta: MetadataInfo) -> None: + MutagenPP._set_metadata(file, meta, 'artist', 'artist') + MutagenPP._set_metadata(file, meta, 'title', 'title') + MutagenPP._set_metadata(file, meta, 'genre', 'genre') + MutagenPP._set_metadata(file, meta, 'date', 'date') + MutagenPP._set_metadata(file, meta, 'album', 'album') + MutagenPP._set_metadata(file, meta, 'albumartist', 'album_artist') + MutagenPP._set_metadata(file, meta, 'description', 'description') + MutagenPP._set_metadata(file, meta, 'comment', 'comment') + MutagenPP._set_metadata(file, meta, 'composer', 'composer') + MutagenPP._set_metadata(file, meta, 'tracknumber', 'track') + + # https://getmusicbee.com/forum/index.php?topic=39759.0 + MutagenPP._set_metadata(file, meta, 'WWWAUDIOFILE', 'purl') + + @_assemble_metadata.register(trueaudio.TrueAudio) + @_assemble_metadata.register(dsf.DSF) + @_assemble_metadata.register(dsdiff.DSDIFF) + @_assemble_metadata.register(aiff.AIFF) + @_assemble_metadata.register(mp3.MP3) + @_assemble_metadata.register(wave.WAVE) + @staticmethod + def _(file: wave.WAVE, meta: MetadataInfo) -> None: + + def _set_metadata(file_name: str, meta_name: str): + if meta[meta_name]: + id3_class = getattr(id3, file_name) + file[file_name] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_name]) + + _set_metadata('TIT2', 'title') + _set_metadata('TPE1', 'artist') + _set_metadata('COMM', 'description') + _set_metadata('TCON', 'genre') + _set_metadata('WFED', 'purl') + _set_metadata('WOAF', 'purl') + _set_metadata('TDAT', 'date') + _set_metadata('TALB', 'album') + _set_metadata('TPE2', 'album_artist') + _set_metadata('TRCK', 'track') + _set_metadata('TCOM', 'composer') + _set_metadata('TPOS', 'disc') + + @_assemble_metadata.register(mp4.MP4) + @staticmethod + def _(file: mp4.MP4, meta: MetadataInfo) -> None: + MutagenPP._set_metadata(file, meta, '\251ART', 'artist') + MutagenPP._set_metadata(file, meta, '\251nam', 'title') + MutagenPP._set_metadata(file, meta, '\251gen', 'genre') + MutagenPP._set_metadata(file, meta, '\251day', 'date') + MutagenPP._set_metadata(file, meta, '\251alb', 'album') + MutagenPP._set_metadata(file, meta, 'aART', 'album_artist') + MutagenPP._set_metadata(file, meta, '\251cmt', 'description') + MutagenPP._set_metadata(file, meta, '\251wrt', 'composer') + MutagenPP._set_metadata(file, meta, 'disk', 'disc') + MutagenPP._set_metadata(file, meta, 'tvsh', 'show') + MutagenPP._set_metadata(file, meta, 'tvsn', 'season_number') + MutagenPP._set_metadata(file, meta, 'egid', 'episode_id') + MutagenPP._set_metadata(file, meta, 'tven', 'episode_sort') + + if meta['purl']: + # https://getmusicbee.com/forum/index.php?topic=39759.0 + file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() + file['purl'] = meta['purl'].encode() + + if meta['track']: + file['trkn'] = [(meta['track'], 0)] + + def _get_cover_art_file(self, info) -> str | None: + idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None) + if idx is None: + return None + thumbnail_filename = info['thumbnails'][idx]['filepath'] + if not os.path.exists(thumbnail_filename): + self.report_warning('Skipping embedding the cover art because the file is missing.') + return None + return thumbnail_filename + + def _get_metadata_from_info(self, info) -> MetadataInfo: + meta_prefix = 'meta' + metadata: dict[str, self.MetadataInfo] = collections.defaultdict( + lambda: collections.defaultdict(lambda: None), + ) + + def add(meta_list, info_list=None): + value = next(( + info[key] for key in [f'{meta_prefix}_', *variadic(info_list or meta_list)] + if info.get(key) is not None), None) + if value not in ('', None): + value = ', '.join(map(str, variadic(value))) + value = value.replace('\0', '') # nul character cannot be passed in command line + metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) + + add('title', ('track', 'title')) + add('date', 'upload_date') + add(('description', 'synopsis'), 'description') + add(('purl', 'comment'), 'webpage_url') + add('track', 'track_number') + add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id')) + add('composer', ('composer', 'composers')) + add('genre', ('genre', 'genres')) + add('album') + add('album_artist', ('album_artist', 'album_artists')) + add('disc', 'disc_number') + add('show', 'series') + add('season_number') + add('episode_id', ('episode', 'episode_id')) + add('episode_sort', 'episode_number') + if 'embed-metadata' in self.get_param('compat_opts', []): + add('comment', 'description') + metadata['common'].pop('synopsis', None) + + meta_regex = rf'{re.escape(meta_prefix)}(?P\d+)?_(?P.+)' + for key, value in info.items(): + mobj = re.fullmatch(meta_regex, key) + if value is not None and mobj: + metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') + + cover_art = self._get_cover_art_file(info) + if cover_art: + try: + with open(cover_art, 'rb') as cover_file: + cover_data = cover_file.read() + type_ = imghdr.what(h=cover_data) + if not type_: + raise ValueError('could not determine image type') + elif type_ not in ('jpeg', 'png'): + raise ValueError(f'incompatible image type: {type_}') + metadata['common']['cover_art_data'] = cover_data + metadata['common']['cover_art_type'] = type_ + except Exception as err: + self.report_warning(f'Skipping embedding cover art due to error; {err}') + + return metadata['common'] + + @PostProcessor._restrict_to(video=False, images=False) + def run(self, info): + if not mutagen: + raise MutagenPPError('module mutagen was not found. Please install using `python3 -m pip install mutagen`') + filename = info['filepath'] + metadata = self._get_metadata_from_info(info) + if not metadata: + self.to_screen('There isn\'t any metadata to add') + return [], info + + self.to_screen(f'Adding metadata to "{filename}"') + try: + f = mutagen.File(filename) + metadata = self._get_metadata_from_info(info) + self._assemble_metadata(f, metadata) + f.save() + except Exception as err: + raise MutagenPPError(f'Unable to embed metadata; {err}') + + return [], info From ffa22df1f24f9abc53e363b7d1dc9238cec6c277 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sat, 14 Dec 2024 19:00:05 -0500 Subject: [PATCH 2/7] Fix error if mutagen is not installed --- yt_dlp/postprocessor/mutagen.py | 153 ++++++++++++++++---------------- 1 file changed, 77 insertions(+), 76 deletions(-) diff --git a/yt_dlp/postprocessor/mutagen.py b/yt_dlp/postprocessor/mutagen.py index 9901063a1..3b4b0ae8f 100644 --- a/yt_dlp/postprocessor/mutagen.py +++ b/yt_dlp/postprocessor/mutagen.py @@ -58,88 +58,89 @@ class MetadataInfo(TypedDict): episode_id: str | None episode_sort: str | None - @singledispatchmethod - @staticmethod - def _assemble_metadata(file: FileType, meta: MetadataInfo) -> None: - raise MutagenPPError(f'Filetype {file.__class__.__name__} is not currently supported') + if mutagen: + @singledispatchmethod + @staticmethod + def _assemble_metadata(file: FileType, meta: MetadataInfo) -> None: + raise MutagenPPError(f'Filetype {file.__class__.__name__} is not currently supported') - @staticmethod - def _set_metadata(file: FileType, meta: MetadataInfo, file_name: str, meta_name: str): - if meta[meta_name]: - file[file_name] = meta[meta_name] - - @_assemble_metadata.register(oggvorbis.OggVorbis) - @_assemble_metadata.register(oggtheora.OggTheora) - @_assemble_metadata.register(oggspeex.OggSpeex) - @_assemble_metadata.register(oggopus.OggOpus) - @_assemble_metadata.register(flac.FLAC) - @staticmethod - def _(file: oggopus.OggOpus, meta: MetadataInfo) -> None: - MutagenPP._set_metadata(file, meta, 'artist', 'artist') - MutagenPP._set_metadata(file, meta, 'title', 'title') - MutagenPP._set_metadata(file, meta, 'genre', 'genre') - MutagenPP._set_metadata(file, meta, 'date', 'date') - MutagenPP._set_metadata(file, meta, 'album', 'album') - MutagenPP._set_metadata(file, meta, 'albumartist', 'album_artist') - MutagenPP._set_metadata(file, meta, 'description', 'description') - MutagenPP._set_metadata(file, meta, 'comment', 'comment') - MutagenPP._set_metadata(file, meta, 'composer', 'composer') - MutagenPP._set_metadata(file, meta, 'tracknumber', 'track') - - # https://getmusicbee.com/forum/index.php?topic=39759.0 - MutagenPP._set_metadata(file, meta, 'WWWAUDIOFILE', 'purl') - - @_assemble_metadata.register(trueaudio.TrueAudio) - @_assemble_metadata.register(dsf.DSF) - @_assemble_metadata.register(dsdiff.DSDIFF) - @_assemble_metadata.register(aiff.AIFF) - @_assemble_metadata.register(mp3.MP3) - @_assemble_metadata.register(wave.WAVE) - @staticmethod - def _(file: wave.WAVE, meta: MetadataInfo) -> None: - - def _set_metadata(file_name: str, meta_name: str): + @staticmethod + def _set_metadata(file: FileType, meta: MetadataInfo, file_name: str, meta_name: str): if meta[meta_name]: - id3_class = getattr(id3, file_name) - file[file_name] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_name]) + file[file_name] = meta[meta_name] - _set_metadata('TIT2', 'title') - _set_metadata('TPE1', 'artist') - _set_metadata('COMM', 'description') - _set_metadata('TCON', 'genre') - _set_metadata('WFED', 'purl') - _set_metadata('WOAF', 'purl') - _set_metadata('TDAT', 'date') - _set_metadata('TALB', 'album') - _set_metadata('TPE2', 'album_artist') - _set_metadata('TRCK', 'track') - _set_metadata('TCOM', 'composer') - _set_metadata('TPOS', 'disc') + @_assemble_metadata.register(oggvorbis.OggVorbis) + @_assemble_metadata.register(oggtheora.OggTheora) + @_assemble_metadata.register(oggspeex.OggSpeex) + @_assemble_metadata.register(oggopus.OggOpus) + @_assemble_metadata.register(flac.FLAC) + @staticmethod + def _(file: oggopus.OggOpus, meta: MetadataInfo) -> None: + MutagenPP._set_metadata(file, meta, 'artist', 'artist') + MutagenPP._set_metadata(file, meta, 'title', 'title') + MutagenPP._set_metadata(file, meta, 'genre', 'genre') + MutagenPP._set_metadata(file, meta, 'date', 'date') + MutagenPP._set_metadata(file, meta, 'album', 'album') + MutagenPP._set_metadata(file, meta, 'albumartist', 'album_artist') + MutagenPP._set_metadata(file, meta, 'description', 'description') + MutagenPP._set_metadata(file, meta, 'comment', 'comment') + MutagenPP._set_metadata(file, meta, 'composer', 'composer') + MutagenPP._set_metadata(file, meta, 'tracknumber', 'track') - @_assemble_metadata.register(mp4.MP4) - @staticmethod - def _(file: mp4.MP4, meta: MetadataInfo) -> None: - MutagenPP._set_metadata(file, meta, '\251ART', 'artist') - MutagenPP._set_metadata(file, meta, '\251nam', 'title') - MutagenPP._set_metadata(file, meta, '\251gen', 'genre') - MutagenPP._set_metadata(file, meta, '\251day', 'date') - MutagenPP._set_metadata(file, meta, '\251alb', 'album') - MutagenPP._set_metadata(file, meta, 'aART', 'album_artist') - MutagenPP._set_metadata(file, meta, '\251cmt', 'description') - MutagenPP._set_metadata(file, meta, '\251wrt', 'composer') - MutagenPP._set_metadata(file, meta, 'disk', 'disc') - MutagenPP._set_metadata(file, meta, 'tvsh', 'show') - MutagenPP._set_metadata(file, meta, 'tvsn', 'season_number') - MutagenPP._set_metadata(file, meta, 'egid', 'episode_id') - MutagenPP._set_metadata(file, meta, 'tven', 'episode_sort') - - if meta['purl']: # https://getmusicbee.com/forum/index.php?topic=39759.0 - file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() - file['purl'] = meta['purl'].encode() + MutagenPP._set_metadata(file, meta, 'WWWAUDIOFILE', 'purl') - if meta['track']: - file['trkn'] = [(meta['track'], 0)] + @_assemble_metadata.register(trueaudio.TrueAudio) + @_assemble_metadata.register(dsf.DSF) + @_assemble_metadata.register(dsdiff.DSDIFF) + @_assemble_metadata.register(aiff.AIFF) + @_assemble_metadata.register(mp3.MP3) + @_assemble_metadata.register(wave.WAVE) + @staticmethod + def _(file: wave.WAVE, meta: MetadataInfo) -> None: + + def _set_metadata(file_name: str, meta_name: str): + if meta[meta_name]: + id3_class = getattr(id3, file_name) + file[file_name] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_name]) + + _set_metadata('TIT2', 'title') + _set_metadata('TPE1', 'artist') + _set_metadata('COMM', 'description') + _set_metadata('TCON', 'genre') + _set_metadata('WFED', 'purl') + _set_metadata('WOAF', 'purl') + _set_metadata('TDAT', 'date') + _set_metadata('TALB', 'album') + _set_metadata('TPE2', 'album_artist') + _set_metadata('TRCK', 'track') + _set_metadata('TCOM', 'composer') + _set_metadata('TPOS', 'disc') + + @_assemble_metadata.register(mp4.MP4) + @staticmethod + def _(file: mp4.MP4, meta: MetadataInfo) -> None: + MutagenPP._set_metadata(file, meta, '\251ART', 'artist') + MutagenPP._set_metadata(file, meta, '\251nam', 'title') + MutagenPP._set_metadata(file, meta, '\251gen', 'genre') + MutagenPP._set_metadata(file, meta, '\251day', 'date') + MutagenPP._set_metadata(file, meta, '\251alb', 'album') + MutagenPP._set_metadata(file, meta, 'aART', 'album_artist') + MutagenPP._set_metadata(file, meta, '\251cmt', 'description') + MutagenPP._set_metadata(file, meta, '\251wrt', 'composer') + MutagenPP._set_metadata(file, meta, 'disk', 'disc') + MutagenPP._set_metadata(file, meta, 'tvsh', 'show') + MutagenPP._set_metadata(file, meta, 'tvsn', 'season_number') + MutagenPP._set_metadata(file, meta, 'egid', 'episode_id') + MutagenPP._set_metadata(file, meta, 'tven', 'episode_sort') + + if meta['purl']: + # https://getmusicbee.com/forum/index.php?topic=39759.0 + file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() + file['purl'] = meta['purl'].encode() + + if meta['track']: + file['trkn'] = [(meta['track'], 0)] def _get_cover_art_file(self, info) -> str | None: idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None) From a8ba5954abdaaacafc44e7478403d7d55e749b4d Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sat, 14 Dec 2024 22:16:37 -0500 Subject: [PATCH 3/7] Remove cover art embedding artifact --- yt_dlp/postprocessor/mutagen.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/yt_dlp/postprocessor/mutagen.py b/yt_dlp/postprocessor/mutagen.py index 3b4b0ae8f..28b42b6d5 100644 --- a/yt_dlp/postprocessor/mutagen.py +++ b/yt_dlp/postprocessor/mutagen.py @@ -1,11 +1,9 @@ from __future__ import annotations import collections from functools import singledispatchmethod -import os import re from typing import TypedDict -from yt_dlp.compat import imghdr from yt_dlp.utils._utils import PostProcessingError, variadic from ..dependencies import mutagen @@ -142,16 +140,6 @@ def _(file: mp4.MP4, meta: MetadataInfo) -> None: if meta['track']: file['trkn'] = [(meta['track'], 0)] - def _get_cover_art_file(self, info) -> str | None: - idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None) - if idx is None: - return None - thumbnail_filename = info['thumbnails'][idx]['filepath'] - if not os.path.exists(thumbnail_filename): - self.report_warning('Skipping embedding the cover art because the file is missing.') - return None - return thumbnail_filename - def _get_metadata_from_info(self, info) -> MetadataInfo: meta_prefix = 'meta' metadata: dict[str, self.MetadataInfo] = collections.defaultdict( @@ -192,21 +180,6 @@ def add(meta_list, info_list=None): if value is not None and mobj: metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') - cover_art = self._get_cover_art_file(info) - if cover_art: - try: - with open(cover_art, 'rb') as cover_file: - cover_data = cover_file.read() - type_ = imghdr.what(h=cover_data) - if not type_: - raise ValueError('could not determine image type') - elif type_ not in ('jpeg', 'png'): - raise ValueError(f'incompatible image type: {type_}') - metadata['common']['cover_art_data'] = cover_data - metadata['common']['cover_art_type'] = type_ - except Exception as err: - self.report_warning(f'Skipping embedding cover art due to error; {err}') - return metadata['common'] @PostProcessor._restrict_to(video=False, images=False) From bac8ee69a741017a0f0b34d376ffc456b00133f1 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 15 Dec 2024 17:03:43 -0500 Subject: [PATCH 4/7] Move mutagen PP to ffmpeg PP (WIP) --- yt_dlp/__init__.py | 5 +- yt_dlp/postprocessor/__init__.py | 1 - yt_dlp/postprocessor/ffmpeg.py | 156 ++++++++++++++++++++++- yt_dlp/postprocessor/mutagen.py | 204 ------------------------------- 4 files changed, 155 insertions(+), 211 deletions(-) delete mode 100644 yt_dlp/postprocessor/mutagen.py diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index eaf0e1179..34a1eda96 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -690,10 +690,7 @@ def get_postprocessors(opts): 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, - } - if opts.prefer_mutagen: - yield { - 'key': 'Mutagen', + 'prefer_mutagen': opts.prefer_mutagen, } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index dba3764eb..7b1620544 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -30,7 +30,6 @@ ) from .modify_chapters import ModifyChaptersPP from .movefilesafterdownload import MoveFilesAfterDownloadPP -from .mutagen import MutagenPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 8965806ae..6cbaab44f 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -32,6 +32,26 @@ variadic, write_json_file, ) +from ..dependencies import mutagen + +if mutagen: + import mutagen + from mutagen import ( + FileType, + aiff, + dsdiff, + dsf, + flac, + id3, + mp3, + mp4, + oggopus, + oggspeex, + oggtheora, + oggvorbis, + trueaudio, + wave, + ) EXT_TO_OUT_FORMATS = { 'aac': 'adts', @@ -668,11 +688,56 @@ def run(self, info): class FFmpegMetadataPP(FFmpegPostProcessor): - def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists'): + _MUTAGEN_SUPPORTED_EXTS = ('alac', 'aiff', 'flac', 'mp3', 'm4a', 'ogg', 'opus', 'vorbis', 'wav') + _VORBIS_METADATA = { + 'title': 'title', + 'artist': 'artist', + 'genre': 'genre', + 'date': 'date', + 'album': 'album', + 'albumartist': 'album_artist', + 'description': 'description', + 'comment': 'comment', + 'composer': 'composer', + 'tracknumber': 'track', + 'WWWAUDIOFILE': 'purl', # https://getmusicbee.com/forum/index.php?topic=39759.0 + } + _ID3_METADATA = { + 'TIT2': 'title', + 'TPE1': 'artist', + 'COMM': 'description', + 'TCON': 'genre', + 'WFED': 'purl', + 'WOAF': 'purl', + 'TDAT': 'date', + 'TALB': 'album', + 'TPE2': 'album_artist', + 'TRCK': 'track', + 'TCOM': 'composer', + 'TPOS': 'disc', + } + _MP4_METADATA = { + '\251ART': 'artist', + '\251nam': 'title', + '\251gen': 'genre', + '\251day': 'date', + '\251alb': 'album', + 'aART': 'album_artist', + '\251cmt': 'description', + '\251wrt': 'composer', + 'disk': 'disc', + 'tvsh': 'show', + 'tvsn': 'season_number', + 'egid': 'episode_id', + 'tven': 'episode_sort', + } + + def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists', prefer_mutagen=False): FFmpegPostProcessor.__init__(self, downloader) self._add_metadata = add_metadata self._add_chapters = add_chapters self._add_infojson = add_infojson + self._prefer_mutagen = prefer_mutagen @staticmethod def _options(target_ext): @@ -681,8 +746,91 @@ def _options(target_ext): if audio_only: yield from ('-vn', '-acodec', 'copy') + def _use_mutagen(self, info): + if not self._prefer_mutagen: + return False + if info['ext'] not in self._MUTAGEN_SUPPORTED_EXTS: + return False + if self._add_chapters and info.get('chapters'): + # mutagen can't handle adding chapters to M4A + return False + if not mutagen: + self.report_warning('module mutagen was not found. Please install using `python3 -m pip install mutagen`') + return False + return True + + if mutagen: + @functools.singledispatchmethod + def _assemble_metadata(self, file: FileType, meta: dict) -> None: + raise FFmpegPostProcessorError(f'Filetype {file.__class__.__name__} is not currently supported') + + @_assemble_metadata.register(oggvorbis.OggVorbis) + @_assemble_metadata.register(oggtheora.OggTheora) + @_assemble_metadata.register(oggspeex.OggSpeex) + @_assemble_metadata.register(oggopus.OggOpus) + @_assemble_metadata.register(flac.FLAC) + def _(self, file: oggopus.OggOpus, meta: dict) -> None: + for file_key, meta_key in self._VORBIS_METADATA.items(): + if meta.get(meta_key): + file[file_key] = meta[meta_key] + + @_assemble_metadata.register(trueaudio.TrueAudio) + @_assemble_metadata.register(dsf.DSF) + @_assemble_metadata.register(dsdiff.DSDIFF) + @_assemble_metadata.register(aiff.AIFF) + @_assemble_metadata.register(mp3.MP3) + @_assemble_metadata.register(wave.WAVE) + def _(self, file: wave.WAVE, meta: dict) -> None: + for file_key, meta_key in self._ID3_METADATA.items(): + if meta.get(meta_key): + id3_class = getattr(id3, file_key) + if issubclass(id3_class, id3.UrlFrame): + file[file_key] = id3_class(url=meta[meta_key]) + else: + file[file_key] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_key]) + + @_assemble_metadata.register(mp4.MP4) + def _(self, file: mp4.MP4, meta: dict) -> None: + for file_key, meta_key in self._MP4_METADATA.items(): + if meta.get(meta_key): + file[file_key] = meta[meta_key] + + if meta.get('purl'): + # https://getmusicbee.com/forum/index.php?topic=39759.0 + file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() + file['purl'] = meta['purl'].encode() + + if meta.get('track'): + file['trkn'] = [(meta['track'], 0)] + + def _run_mutagen(self, info): + self.to_screen('Using mutagen to embed metadata') + filename = info['filepath'] + metadata = self._get_metadata_dict(info)['common'] + if not metadata: + self.to_screen('There isn\'t any metadata to add') + return [], info + + self.to_screen(f'Adding metadata to "{filename}"') + try: + f = mutagen.File(filename) + self._assemble_metadata(f, metadata) + f.save() + except Exception as err: + raise FFmpegPostProcessorError(f'Unable to embed metadata; {err}') + + return [], info + @PostProcessor._restrict_to(images=False) def run(self, info): + + if self._use_mutagen(info): + try: + self._run_mutagen(info) + return [], info + except Exception as err: + self.report_warning(f'Unable to embed metadata using mutagen; {err}') + self._fixup_chapters(info) filename, metadata_filename = info['filepath'], None files_to_delete, options = [], [] @@ -732,7 +880,7 @@ def ffmpeg_escape(text): f.write(metadata_file_content) yield ('-map_metadata', '1') - def _get_metadata_opts(self, info): + def _get_metadata_dict(self, info): meta_prefix = 'meta' metadata = collections.defaultdict(dict) @@ -774,6 +922,10 @@ def add(meta_list, info_list=None): mobj = re.fullmatch(meta_regex, key) if value is not None and mobj: metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') + return metadata + + def _get_metadata_opts(self, info): + metadata = self._get_metadata_dict(info) # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags yield ('-write_id3v1', '1') diff --git a/yt_dlp/postprocessor/mutagen.py b/yt_dlp/postprocessor/mutagen.py deleted file mode 100644 index 28b42b6d5..000000000 --- a/yt_dlp/postprocessor/mutagen.py +++ /dev/null @@ -1,204 +0,0 @@ -from __future__ import annotations -import collections -from functools import singledispatchmethod -import re -from typing import TypedDict - -from yt_dlp.utils._utils import PostProcessingError, variadic -from ..dependencies import mutagen - -if mutagen: - import mutagen - from mutagen import ( - FileType, - aiff, - dsdiff, - dsf, - flac, - id3, - mp3, - mp4, - oggopus, - oggspeex, - oggtheora, - oggvorbis, - trueaudio, - wave, - ) - -from yt_dlp.postprocessor.common import PostProcessor - - -class MutagenPPError(PostProcessingError): - pass - - -class MutagenPP(PostProcessor): - def __init__(self, downloader=None): - PostProcessor.__init__(self, downloader) - - class MetadataInfo(TypedDict): - title: str | None - date: str | None - description: str | None - synopsis: str | None - purl: str | None - comment: str | None - track: str | None - artist: str | None - composer: str | None - genre: str | None - album: str | None - album_artist: str | None - disc: str | None - show: str | None - season_number: str | None - episode_id: str | None - episode_sort: str | None - - if mutagen: - @singledispatchmethod - @staticmethod - def _assemble_metadata(file: FileType, meta: MetadataInfo) -> None: - raise MutagenPPError(f'Filetype {file.__class__.__name__} is not currently supported') - - @staticmethod - def _set_metadata(file: FileType, meta: MetadataInfo, file_name: str, meta_name: str): - if meta[meta_name]: - file[file_name] = meta[meta_name] - - @_assemble_metadata.register(oggvorbis.OggVorbis) - @_assemble_metadata.register(oggtheora.OggTheora) - @_assemble_metadata.register(oggspeex.OggSpeex) - @_assemble_metadata.register(oggopus.OggOpus) - @_assemble_metadata.register(flac.FLAC) - @staticmethod - def _(file: oggopus.OggOpus, meta: MetadataInfo) -> None: - MutagenPP._set_metadata(file, meta, 'artist', 'artist') - MutagenPP._set_metadata(file, meta, 'title', 'title') - MutagenPP._set_metadata(file, meta, 'genre', 'genre') - MutagenPP._set_metadata(file, meta, 'date', 'date') - MutagenPP._set_metadata(file, meta, 'album', 'album') - MutagenPP._set_metadata(file, meta, 'albumartist', 'album_artist') - MutagenPP._set_metadata(file, meta, 'description', 'description') - MutagenPP._set_metadata(file, meta, 'comment', 'comment') - MutagenPP._set_metadata(file, meta, 'composer', 'composer') - MutagenPP._set_metadata(file, meta, 'tracknumber', 'track') - - # https://getmusicbee.com/forum/index.php?topic=39759.0 - MutagenPP._set_metadata(file, meta, 'WWWAUDIOFILE', 'purl') - - @_assemble_metadata.register(trueaudio.TrueAudio) - @_assemble_metadata.register(dsf.DSF) - @_assemble_metadata.register(dsdiff.DSDIFF) - @_assemble_metadata.register(aiff.AIFF) - @_assemble_metadata.register(mp3.MP3) - @_assemble_metadata.register(wave.WAVE) - @staticmethod - def _(file: wave.WAVE, meta: MetadataInfo) -> None: - - def _set_metadata(file_name: str, meta_name: str): - if meta[meta_name]: - id3_class = getattr(id3, file_name) - file[file_name] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_name]) - - _set_metadata('TIT2', 'title') - _set_metadata('TPE1', 'artist') - _set_metadata('COMM', 'description') - _set_metadata('TCON', 'genre') - _set_metadata('WFED', 'purl') - _set_metadata('WOAF', 'purl') - _set_metadata('TDAT', 'date') - _set_metadata('TALB', 'album') - _set_metadata('TPE2', 'album_artist') - _set_metadata('TRCK', 'track') - _set_metadata('TCOM', 'composer') - _set_metadata('TPOS', 'disc') - - @_assemble_metadata.register(mp4.MP4) - @staticmethod - def _(file: mp4.MP4, meta: MetadataInfo) -> None: - MutagenPP._set_metadata(file, meta, '\251ART', 'artist') - MutagenPP._set_metadata(file, meta, '\251nam', 'title') - MutagenPP._set_metadata(file, meta, '\251gen', 'genre') - MutagenPP._set_metadata(file, meta, '\251day', 'date') - MutagenPP._set_metadata(file, meta, '\251alb', 'album') - MutagenPP._set_metadata(file, meta, 'aART', 'album_artist') - MutagenPP._set_metadata(file, meta, '\251cmt', 'description') - MutagenPP._set_metadata(file, meta, '\251wrt', 'composer') - MutagenPP._set_metadata(file, meta, 'disk', 'disc') - MutagenPP._set_metadata(file, meta, 'tvsh', 'show') - MutagenPP._set_metadata(file, meta, 'tvsn', 'season_number') - MutagenPP._set_metadata(file, meta, 'egid', 'episode_id') - MutagenPP._set_metadata(file, meta, 'tven', 'episode_sort') - - if meta['purl']: - # https://getmusicbee.com/forum/index.php?topic=39759.0 - file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() - file['purl'] = meta['purl'].encode() - - if meta['track']: - file['trkn'] = [(meta['track'], 0)] - - def _get_metadata_from_info(self, info) -> MetadataInfo: - meta_prefix = 'meta' - metadata: dict[str, self.MetadataInfo] = collections.defaultdict( - lambda: collections.defaultdict(lambda: None), - ) - - def add(meta_list, info_list=None): - value = next(( - info[key] for key in [f'{meta_prefix}_', *variadic(info_list or meta_list)] - if info.get(key) is not None), None) - if value not in ('', None): - value = ', '.join(map(str, variadic(value))) - value = value.replace('\0', '') # nul character cannot be passed in command line - metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) - - add('title', ('track', 'title')) - add('date', 'upload_date') - add(('description', 'synopsis'), 'description') - add(('purl', 'comment'), 'webpage_url') - add('track', 'track_number') - add('artist', ('artist', 'artists', 'creator', 'creators', 'uploader', 'uploader_id')) - add('composer', ('composer', 'composers')) - add('genre', ('genre', 'genres')) - add('album') - add('album_artist', ('album_artist', 'album_artists')) - add('disc', 'disc_number') - add('show', 'series') - add('season_number') - add('episode_id', ('episode', 'episode_id')) - add('episode_sort', 'episode_number') - if 'embed-metadata' in self.get_param('compat_opts', []): - add('comment', 'description') - metadata['common'].pop('synopsis', None) - - meta_regex = rf'{re.escape(meta_prefix)}(?P\d+)?_(?P.+)' - for key, value in info.items(): - mobj = re.fullmatch(meta_regex, key) - if value is not None and mobj: - metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') - - return metadata['common'] - - @PostProcessor._restrict_to(video=False, images=False) - def run(self, info): - if not mutagen: - raise MutagenPPError('module mutagen was not found. Please install using `python3 -m pip install mutagen`') - filename = info['filepath'] - metadata = self._get_metadata_from_info(info) - if not metadata: - self.to_screen('There isn\'t any metadata to add') - return [], info - - self.to_screen(f'Adding metadata to "{filename}"') - try: - f = mutagen.File(filename) - metadata = self._get_metadata_from_info(info) - self._assemble_metadata(f, metadata) - f.save() - except Exception as err: - raise MutagenPPError(f'Unable to embed metadata; {err}') - - return [], info From baea15d927555fc9edb0a05b5ff41620ff3c4770 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 15 Dec 2024 18:14:19 -0500 Subject: [PATCH 5/7] Fixups --- yt_dlp/postprocessor/ffmpeg.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 6cbaab44f..40ebb1d05 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -8,6 +8,8 @@ import subprocess import time +from yt_dlp.utils._utils import date_from_str + from .common import PostProcessor from ..compat import imghdr from ..utils import ( @@ -693,11 +695,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): 'title': 'title', 'artist': 'artist', 'genre': 'genre', - 'date': 'date', 'album': 'album', 'albumartist': 'album_artist', 'description': 'description', - 'comment': 'comment', 'composer': 'composer', 'tracknumber': 'track', 'WWWAUDIOFILE': 'purl', # https://getmusicbee.com/forum/index.php?topic=39759.0 @@ -707,9 +707,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): 'TPE1': 'artist', 'COMM': 'description', 'TCON': 'genre', - 'WFED': 'purl', 'WOAF': 'purl', - 'TDAT': 'date', 'TALB': 'album', 'TPE2': 'album_artist', 'TRCK': 'track', @@ -720,7 +718,6 @@ class FFmpegMetadataPP(FFmpegPostProcessor): '\251ART': 'artist', '\251nam': 'title', '\251gen': 'genre', - '\251day': 'date', '\251alb': 'album', 'aART': 'album_artist', '\251cmt': 'description', @@ -774,6 +771,11 @@ def _(self, file: oggopus.OggOpus, meta: dict) -> None: if meta.get(meta_key): file[file_key] = meta[meta_key] + if meta.get('date'): + # Vorbis uses ISO 8601 format YYYY-MM-DD + date = date_from_str(meta['date']) + file['date'] = date.strftime('%Y-%m-%d') + @_assemble_metadata.register(trueaudio.TrueAudio) @_assemble_metadata.register(dsf.DSF) @_assemble_metadata.register(dsdiff.DSDIFF) @@ -789,16 +791,26 @@ def _(self, file: wave.WAVE, meta: dict) -> None: else: file[file_key] = id3_class(encoding=id3.Encoding.UTF8, text=meta[meta_key]) + if meta.get('date'): + # ID3 uses ISO 8601 format YYYY-MM-DD + date = date_from_str(meta['date']) + file['TDRC'] = id3.TDRC(encoding=id3.Encoding.UTF8, text=date.strftime('%Y-%m-%d')) + @_assemble_metadata.register(mp4.MP4) def _(self, file: mp4.MP4, meta: dict) -> None: for file_key, meta_key in self._MP4_METADATA.items(): if meta.get(meta_key): file[file_key] = meta[meta_key] + if meta.get('date'): + # no standard but iTunes uses YYYY-MM-DD format + date = date_from_str(meta['date']) + file['\251day'] = date.strftime('%Y-%m-%d') + if meta.get('purl'): # https://getmusicbee.com/forum/index.php?topic=39759.0 file['----:com.apple.iTunes:WWWAUDIOFILE'] = meta['purl'].encode() - file['purl'] = meta['purl'].encode() + file['purl'] = meta['purl'] if meta.get('track'): file['trkn'] = [(meta['track'], 0)] From e399a564fa0ad60d84a3cb1f4b0d3114fe7d0233 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Sun, 15 Dec 2024 18:38:24 -0500 Subject: [PATCH 6/7] Store Vorbis description in 'comment' field --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 40ebb1d05..b29cd1d93 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -697,7 +697,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): 'genre': 'genre', 'album': 'album', 'albumartist': 'album_artist', - 'description': 'description', + 'comment': 'description', 'composer': 'composer', 'tracknumber': 'track', 'WWWAUDIOFILE': 'purl', # https://getmusicbee.com/forum/index.php?topic=39759.0 From 57f6ae434273db7b69bab631e6782a1498bcd846 Mon Sep 17 00:00:00 2001 From: 7x11x13 Date: Fri, 27 Dec 2024 13:47:29 -0500 Subject: [PATCH 7/7] Error handling --- yt_dlp/postprocessor/ffmpeg.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b29cd1d93..b029f1289 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -826,6 +826,8 @@ def _run_mutagen(self, info): self.to_screen(f'Adding metadata to "{filename}"') try: f = mutagen.File(filename) + if f is None: + raise TypeError(f'Mutagen unable to determine type of file: {info["ext"]}') self._assemble_metadata(f, metadata) f.save() except Exception as err: