1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00
This commit is contained in:
Rohit 2025-03-07 23:03:32 +01:00 committed by GitHub
commit 5380818509
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 144 additions and 42 deletions

View file

@ -934,8 +934,9 @@ ## Post-Processing Options:
post-processing (default)
--post-overwrites Overwrite post-processed files (default)
--no-post-overwrites Do not overwrite post-processed files
--embed-subs Embed subtitles in the video (only for mp4,
webm and mkv videos)
--embed-subs Embed subtitles in downloaded media.
Available for video (mp4, webm, mkv) and
"lrc" in audio (m4a, mp3, ogg, flac)
--no-embed-subs Do not embed subtitles (default)
--embed-thumbnail Embed thumbnail in the video as cover art
--no-embed-thumbnail Do not embed thumbnail (default)

View file

@ -491,11 +491,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
}, 'aliases': {
'embed-thumbnail-atomicparsley': ['avoid-mutagen'], # compat
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
@ -1631,7 +1632,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
postproc.add_option(
'--embed-subs',
action='store_true', dest='embedsubtitles', default=False,
help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
help=(
'Embed subtitles in downloaded media. '
'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)'))
postproc.add_option(
'--no-embed-subs',
action='store_false', dest='embedsubtitles',

View file

@ -86,14 +86,39 @@ def run(self, info):
mtime = os.stat(filename).st_mtime
avoid_mutagen = any(
opt in self.get_param('compat_opts', [])
for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley'))
success = True
if info['ext'] == 'mp3':
options = [
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
# Method 1: Use mutagen
if avoid_mutagen:
success = False
elif not mutagen:
self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted')
success = False
else:
try:
self._report_run('mutagen', filename)
audio = mutagen.id3.ID3(filename)
with open(thumbnail_filename, 'rb') as thumbfile:
audio['APIC'] = mutagen.id3.APIC(
encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}',
type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read())
audio.save()
temp_filename = filename
except Exception as err:
self.report_warning(f'unable to embed using mutagen; {err}')
success = False
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
# Method 2: Use ffmpeg
if not success:
options = [
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
elif info['ext'] in ['mkv', 'mka']:
options = list(self.stream_copy_opts())
@ -113,9 +138,8 @@ def run(self, info):
self.run_ffmpeg(filename, temp_filename, options)
elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
# Method 1: Use mutagen
if not mutagen or prefer_atomicparsley:
if avoid_mutagen or not mutagen:
success = False
else:
self._report_run('mutagen', filename)
@ -151,7 +175,7 @@ def run(self, info):
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
success = False
else:
if not prefer_atomicparsley:
if not avoid_mutagen:
self.to_screen('mutagen was not found. Falling back to AtomicParsley')
cmd = [atomicparsley,
filename,

View file

@ -9,6 +9,7 @@
import time
from .common import PostProcessor
from ..dependencies import mutagen
from ..compat import imghdr
from ..utils import (
MEDIA_EXTENSIONS,
@ -32,6 +33,7 @@
variadic,
write_json_file,
)
from ..utils.subtitles import Subtitle, parse_lrc
EXT_TO_OUT_FORMATS = {
'aac': 'adts',
@ -586,7 +588,8 @@ def _options(target_ext):
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus')
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS)
def __init__(self, downloader=None, already_have_subtitle=False):
super().__init__(downloader)
@ -594,9 +597,11 @@ def __init__(self, downloader=None, already_have_subtitle=False):
@PostProcessor._restrict_to(images=False)
def run(self, info):
if info['ext'] not in self.SUPPORTED_EXTS:
ext = info['ext']
if ext not in self.SUPPORTED_EXTS:
self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
return [], info
subtitles = info.get('requested_subtitles')
if not subtitles:
self.to_screen('There aren\'t any subtitles to embed')
@ -614,57 +619,87 @@ def run(self, info):
return [], info
'''
ext = info['ext']
sub_langs, sub_names, sub_filenames = [], [], []
webm_vtt_warn = False
mp4_ass_warn = False
warnings = set()
def warn_once(msg):
if msg not in warnings:
warnings.add(msg)
self.report_warning(msg)
subtitles_to_embed = {}
for lang, sub_info in subtitles.items():
if not os.path.exists(sub_info.get('filepath', '')):
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
continue
sub_ext = sub_info['ext']
if sub_ext == 'json':
self.report_warning('JSON subtitles cannot be embedded')
elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'):
sub_langs.append(lang)
sub_names.append(sub_info.get('name'))
sub_filenames.append(sub_info['filepath'])
elif sub_info['ext'] == 'json':
warn_once('JSON subtitles cannot be embedded')
elif ext == 'webm' and sub_info['ext'] != 'vtt':
warn_once('Only WebVTT subtitles can be embedded in webm files')
elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc':
warn_once(f'Only lrc subtitles can be embedded in {ext} files')
elif ext in self.SUPPORTS_LYRICS and not mutagen:
raise PostProcessingError(
f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`')
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
self.report_warning('Only WebVTT subtitles can be embedded in webm files')
if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
mp4_ass_warn = True
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if ext == 'mp4' and sub_info['ext'] == 'ass':
warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
subtitles_to_embed[lang] = sub_info
if not sub_langs:
if not subtitles_to_embed:
return [], info
input_files = [filename, *sub_filenames]
sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()]
files_to_delete = [] if self._already_have_subtitle else sub_files
if ext in self.SUPPORTS_LYRICS:
self._embed_lyrics(subtitles_to_embed, info['filepath'], ext)
return files_to_delete, info
opts = [
*self.stream_copy_opts(ext=info['ext']),
*self.stream_copy_opts(ext=ext),
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
]
for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
opts.extend(['-map', f'{i + 1}:0'])
lang_code = ISO639Utils.short2long(lang) or lang
opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}'])
if name:
for i, (lang, sub) in enumerate(subtitles_to_embed.items()):
lang = ISO639Utils.short2long(lang) or lang
opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}'])
if name := sub['name']:
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
f'-metadata:s:s:{i}', f'title={name}'])
temp_filename = prepend_extension(filename, 'temp')
self.to_screen(f'Embedding subtitles in "{filename}"')
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts)
os.replace(temp_filename, filename)
files_to_delete = [] if self._already_have_subtitle else sub_filenames
return files_to_delete, info
def _embed_lyrics(self, subtitles, filename, ext):
assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values())
self.to_screen(f'Embedding lyrics in "{filename}"')
if len(subtitles) > 1:
self.report_warning(
f'Your media player may be unable to display multiple subtitles in {ext}', only_once=True)
for sub in subtitles.values():
if not sub.get('data'):
with open(sub['filepath'], encoding='utf-8') as f:
sub['data'] = f.read()
if ext == 'mp3':
metadata = mutagen.id3.ID3(filename)
for lang, sub in subtitles.items():
metadata.add(mutagen.id3.SYLT(
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
lang=ISO639Utils.short2long(lang) or 'und',
text=[(line.text, int(line.start * 1000))
for line in parse_lrc(sub['data'])
if isinstance(line, Subtitle)]))
else:
metadata = mutagen.File(filename)
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
metadata.save()
class FFmpegMetadataPP(FFmpegPostProcessor):

View file

@ -3806,6 +3806,8 @@ class ISO639Utils:
@classmethod
def short2long(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
if code in cls._lang_map.values():
return code
return cls._lang_map.get(code[:2])
@classmethod

37
yt_dlp/utils/subtitles.py Normal file
View file

@ -0,0 +1,37 @@
from __future__ import annotations
import re
from dataclasses import dataclass
Seconds = float
@dataclass
class Metadata:
name: str
value: str
@dataclass
class Subtitle:
text: str
start: Seconds
end: Seconds | None = None
def parse_lrc(text):
for line in text.split('\n'):
times = []
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
times.append(sum(
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
line = mobj.group('content')
for t in times:
yield Subtitle(start=t, text=line.strip())
if not times:
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
yield Metadata(mobj.group('name'), mobj.group('value').strip())
elif line.strip():
yield ValueError(line)