mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Merge a36d6df3f6
into 05c8023a27
This commit is contained in:
commit
5380818509
6 changed files with 144 additions and 42 deletions
|
@ -934,8 +934,9 @@ ## Post-Processing Options:
|
|||
post-processing (default)
|
||||
--post-overwrites Overwrite post-processed files (default)
|
||||
--no-post-overwrites Do not overwrite post-processed files
|
||||
--embed-subs Embed subtitles in the video (only for mp4,
|
||||
webm and mkv videos)
|
||||
--embed-subs Embed subtitles in downloaded media.
|
||||
Available for video (mp4, webm, mkv) and
|
||||
"lrc" in audio (m4a, mp3, ogg, flac)
|
||||
--no-embed-subs Do not embed subtitles (default)
|
||||
--embed-thumbnail Embed thumbnail in the video as cover art
|
||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||
|
|
|
@ -491,11 +491,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'allowed_values': {
|
||||
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
|
||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
|
||||
}, 'aliases': {
|
||||
'embed-thumbnail-atomicparsley': ['avoid-mutagen'], # compat
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
||||
|
@ -1631,7 +1632,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
postproc.add_option(
|
||||
'--embed-subs',
|
||||
action='store_true', dest='embedsubtitles', default=False,
|
||||
help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
|
||||
help=(
|
||||
'Embed subtitles in downloaded media. '
|
||||
'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)'))
|
||||
postproc.add_option(
|
||||
'--no-embed-subs',
|
||||
action='store_false', dest='embedsubtitles',
|
||||
|
|
|
@ -86,14 +86,39 @@ def run(self, info):
|
|||
|
||||
mtime = os.stat(filename).st_mtime
|
||||
|
||||
avoid_mutagen = any(
|
||||
opt in self.get_param('compat_opts', [])
|
||||
for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley'))
|
||||
success = True
|
||||
if info['ext'] == 'mp3':
|
||||
options = [
|
||||
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
|
||||
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
|
||||
# Method 1: Use mutagen
|
||||
if avoid_mutagen:
|
||||
success = False
|
||||
elif not mutagen:
|
||||
self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted')
|
||||
success = False
|
||||
else:
|
||||
try:
|
||||
self._report_run('mutagen', filename)
|
||||
audio = mutagen.id3.ID3(filename)
|
||||
with open(thumbnail_filename, 'rb') as thumbfile:
|
||||
audio['APIC'] = mutagen.id3.APIC(
|
||||
encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}',
|
||||
type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read())
|
||||
audio.save()
|
||||
temp_filename = filename
|
||||
except Exception as err:
|
||||
self.report_warning(f'unable to embed using mutagen; {err}')
|
||||
success = False
|
||||
|
||||
self._report_run('ffmpeg', filename)
|
||||
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
||||
# Method 2: Use ffmpeg
|
||||
if not success:
|
||||
options = [
|
||||
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
|
||||
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
|
||||
|
||||
self._report_run('ffmpeg', filename)
|
||||
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
||||
|
||||
elif info['ext'] in ['mkv', 'mka']:
|
||||
options = list(self.stream_copy_opts())
|
||||
|
@ -113,9 +138,8 @@ def run(self, info):
|
|||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
|
||||
elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
|
||||
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
|
||||
# Method 1: Use mutagen
|
||||
if not mutagen or prefer_atomicparsley:
|
||||
if avoid_mutagen or not mutagen:
|
||||
success = False
|
||||
else:
|
||||
self._report_run('mutagen', filename)
|
||||
|
@ -151,7 +175,7 @@ def run(self, info):
|
|||
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
|
||||
success = False
|
||||
else:
|
||||
if not prefer_atomicparsley:
|
||||
if not avoid_mutagen:
|
||||
self.to_screen('mutagen was not found. Falling back to AtomicParsley')
|
||||
cmd = [atomicparsley,
|
||||
filename,
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
import time
|
||||
|
||||
from .common import PostProcessor
|
||||
from ..dependencies import mutagen
|
||||
from ..compat import imghdr
|
||||
from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
|
@ -32,6 +33,7 @@
|
|||
variadic,
|
||||
write_json_file,
|
||||
)
|
||||
from ..utils.subtitles import Subtitle, parse_lrc
|
||||
|
||||
EXT_TO_OUT_FORMATS = {
|
||||
'aac': 'adts',
|
||||
|
@ -586,7 +588,8 @@ def _options(target_ext):
|
|||
|
||||
|
||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
|
||||
SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus')
|
||||
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS)
|
||||
|
||||
def __init__(self, downloader=None, already_have_subtitle=False):
|
||||
super().__init__(downloader)
|
||||
|
@ -594,9 +597,11 @@ def __init__(self, downloader=None, already_have_subtitle=False):
|
|||
|
||||
@PostProcessor._restrict_to(images=False)
|
||||
def run(self, info):
|
||||
if info['ext'] not in self.SUPPORTED_EXTS:
|
||||
ext = info['ext']
|
||||
if ext not in self.SUPPORTED_EXTS:
|
||||
self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
|
||||
return [], info
|
||||
|
||||
subtitles = info.get('requested_subtitles')
|
||||
if not subtitles:
|
||||
self.to_screen('There aren\'t any subtitles to embed')
|
||||
|
@ -614,57 +619,87 @@ def run(self, info):
|
|||
return [], info
|
||||
'''
|
||||
|
||||
ext = info['ext']
|
||||
sub_langs, sub_names, sub_filenames = [], [], []
|
||||
webm_vtt_warn = False
|
||||
mp4_ass_warn = False
|
||||
warnings = set()
|
||||
|
||||
def warn_once(msg):
|
||||
if msg not in warnings:
|
||||
warnings.add(msg)
|
||||
self.report_warning(msg)
|
||||
|
||||
subtitles_to_embed = {}
|
||||
for lang, sub_info in subtitles.items():
|
||||
if not os.path.exists(sub_info.get('filepath', '')):
|
||||
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
|
||||
continue
|
||||
sub_ext = sub_info['ext']
|
||||
if sub_ext == 'json':
|
||||
self.report_warning('JSON subtitles cannot be embedded')
|
||||
elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'):
|
||||
sub_langs.append(lang)
|
||||
sub_names.append(sub_info.get('name'))
|
||||
sub_filenames.append(sub_info['filepath'])
|
||||
elif sub_info['ext'] == 'json':
|
||||
warn_once('JSON subtitles cannot be embedded')
|
||||
elif ext == 'webm' and sub_info['ext'] != 'vtt':
|
||||
warn_once('Only WebVTT subtitles can be embedded in webm files')
|
||||
elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc':
|
||||
warn_once(f'Only lrc subtitles can be embedded in {ext} files')
|
||||
elif ext in self.SUPPORTS_LYRICS and not mutagen:
|
||||
raise PostProcessingError(
|
||||
f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`')
|
||||
else:
|
||||
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
|
||||
webm_vtt_warn = True
|
||||
self.report_warning('Only WebVTT subtitles can be embedded in webm files')
|
||||
if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
|
||||
mp4_ass_warn = True
|
||||
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
|
||||
if ext == 'mp4' and sub_info['ext'] == 'ass':
|
||||
warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
|
||||
subtitles_to_embed[lang] = sub_info
|
||||
|
||||
if not sub_langs:
|
||||
if not subtitles_to_embed:
|
||||
return [], info
|
||||
|
||||
input_files = [filename, *sub_filenames]
|
||||
sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()]
|
||||
files_to_delete = [] if self._already_have_subtitle else sub_files
|
||||
|
||||
if ext in self.SUPPORTS_LYRICS:
|
||||
self._embed_lyrics(subtitles_to_embed, info['filepath'], ext)
|
||||
return files_to_delete, info
|
||||
|
||||
opts = [
|
||||
*self.stream_copy_opts(ext=info['ext']),
|
||||
*self.stream_copy_opts(ext=ext),
|
||||
# Don't copy the existing subtitles, we may be running the
|
||||
# postprocessor a second time
|
||||
'-map', '-0:s',
|
||||
]
|
||||
for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
|
||||
opts.extend(['-map', f'{i + 1}:0'])
|
||||
lang_code = ISO639Utils.short2long(lang) or lang
|
||||
opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}'])
|
||||
if name:
|
||||
for i, (lang, sub) in enumerate(subtitles_to_embed.items()):
|
||||
lang = ISO639Utils.short2long(lang) or lang
|
||||
opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}'])
|
||||
if name := sub['name']:
|
||||
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
|
||||
f'-metadata:s:s:{i}', f'title={name}'])
|
||||
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
self.to_screen(f'Embedding subtitles in "{filename}"')
|
||||
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
||||
self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts)
|
||||
os.replace(temp_filename, filename)
|
||||
|
||||
files_to_delete = [] if self._already_have_subtitle else sub_filenames
|
||||
return files_to_delete, info
|
||||
|
||||
def _embed_lyrics(self, subtitles, filename, ext):
|
||||
assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values())
|
||||
self.to_screen(f'Embedding lyrics in "{filename}"')
|
||||
if len(subtitles) > 1:
|
||||
self.report_warning(
|
||||
f'Your media player may be unable to display multiple subtitles in {ext}', only_once=True)
|
||||
|
||||
for sub in subtitles.values():
|
||||
if not sub.get('data'):
|
||||
with open(sub['filepath'], encoding='utf-8') as f:
|
||||
sub['data'] = f.read()
|
||||
|
||||
if ext == 'mp3':
|
||||
metadata = mutagen.id3.ID3(filename)
|
||||
for lang, sub in subtitles.items():
|
||||
metadata.add(mutagen.id3.SYLT(
|
||||
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
|
||||
lang=ISO639Utils.short2long(lang) or 'und',
|
||||
text=[(line.text, int(line.start * 1000))
|
||||
for line in parse_lrc(sub['data'])
|
||||
if isinstance(line, Subtitle)]))
|
||||
else:
|
||||
metadata = mutagen.File(filename)
|
||||
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
|
||||
metadata.save()
|
||||
|
||||
|
||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||
|
||||
|
|
|
@ -3806,6 +3806,8 @@ class ISO639Utils:
|
|||
@classmethod
|
||||
def short2long(cls, code):
|
||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||
if code in cls._lang_map.values():
|
||||
return code
|
||||
return cls._lang_map.get(code[:2])
|
||||
|
||||
@classmethod
|
||||
|
|
37
yt_dlp/utils/subtitles.py
Normal file
37
yt_dlp/utils/subtitles.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
Seconds = float
|
||||
|
||||
|
||||
@dataclass
|
||||
class Metadata:
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Subtitle:
|
||||
text: str
|
||||
start: Seconds
|
||||
end: Seconds | None = None
|
||||
|
||||
|
||||
def parse_lrc(text):
|
||||
for line in text.split('\n'):
|
||||
times = []
|
||||
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
|
||||
times.append(sum(
|
||||
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
|
||||
line = mobj.group('content')
|
||||
|
||||
for t in times:
|
||||
yield Subtitle(start=t, text=line.strip())
|
||||
|
||||
if not times:
|
||||
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
|
||||
yield Metadata(mobj.group('name'), mobj.group('value').strip())
|
||||
elif line.strip():
|
||||
yield ValueError(line)
|
Loading…
Reference in a new issue