mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Merge a36d6df3f6
into 05c8023a27
This commit is contained in:
commit
5380818509
6 changed files with 144 additions and 42 deletions
|
@ -934,8 +934,9 @@ ## Post-Processing Options:
|
||||||
post-processing (default)
|
post-processing (default)
|
||||||
--post-overwrites Overwrite post-processed files (default)
|
--post-overwrites Overwrite post-processed files (default)
|
||||||
--no-post-overwrites Do not overwrite post-processed files
|
--no-post-overwrites Do not overwrite post-processed files
|
||||||
--embed-subs Embed subtitles in the video (only for mp4,
|
--embed-subs Embed subtitles in downloaded media.
|
||||||
webm and mkv videos)
|
Available for video (mp4, webm, mkv) and
|
||||||
|
"lrc" in audio (m4a, mp3, ogg, flac)
|
||||||
--no-embed-subs Do not embed subtitles (default)
|
--no-embed-subs Do not embed subtitles (default)
|
||||||
--embed-thumbnail Embed thumbnail in the video as cover art
|
--embed-thumbnail Embed thumbnail in the video as cover art
|
||||||
--no-embed-thumbnail Do not embed thumbnail (default)
|
--no-embed-thumbnail Do not embed thumbnail (default)
|
||||||
|
|
|
@ -491,11 +491,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
'allowed_values': {
|
'allowed_values': {
|
||||||
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
|
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
|
||||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
'no-attach-info-json', 'avoid-mutagen', 'no-external-downloader-progress',
|
||||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
|
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
|
||||||
}, 'aliases': {
|
}, 'aliases': {
|
||||||
|
'embed-thumbnail-atomicparsley': ['avoid-mutagen'], # compat
|
||||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||||
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
||||||
|
@ -1631,7 +1632,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--embed-subs',
|
'--embed-subs',
|
||||||
action='store_true', dest='embedsubtitles', default=False,
|
action='store_true', dest='embedsubtitles', default=False,
|
||||||
help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
|
help=(
|
||||||
|
'Embed subtitles in downloaded media. '
|
||||||
|
'Available for video (mp4, webm, mkv) and "lrc" in audio (m4a, mp3, ogg, flac)'))
|
||||||
postproc.add_option(
|
postproc.add_option(
|
||||||
'--no-embed-subs',
|
'--no-embed-subs',
|
||||||
action='store_false', dest='embedsubtitles',
|
action='store_false', dest='embedsubtitles',
|
||||||
|
|
|
@ -86,14 +86,39 @@ def run(self, info):
|
||||||
|
|
||||||
mtime = os.stat(filename).st_mtime
|
mtime = os.stat(filename).st_mtime
|
||||||
|
|
||||||
|
avoid_mutagen = any(
|
||||||
|
opt in self.get_param('compat_opts', [])
|
||||||
|
for opt in ('avoid-mutagen', 'embed-thumbnail-atomicparsley'))
|
||||||
success = True
|
success = True
|
||||||
if info['ext'] == 'mp3':
|
if info['ext'] == 'mp3':
|
||||||
options = [
|
# Method 1: Use mutagen
|
||||||
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
|
if avoid_mutagen:
|
||||||
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
|
success = False
|
||||||
|
elif not mutagen:
|
||||||
|
self.to_screen('mutagen not was found. Falling back to ffmpeg. Lyrics may be corrupted')
|
||||||
|
success = False
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self._report_run('mutagen', filename)
|
||||||
|
audio = mutagen.id3.ID3(filename)
|
||||||
|
with open(thumbnail_filename, 'rb') as thumbfile:
|
||||||
|
audio['APIC'] = mutagen.id3.APIC(
|
||||||
|
encoding=mutagen.id3.Encoding.UTF8, mime=f'image/{thumbnail_ext}',
|
||||||
|
type=mutagen.id3.PictureType.COVER_FRONT, desc='Cover (front)', data=thumbfile.read())
|
||||||
|
audio.save()
|
||||||
|
temp_filename = filename
|
||||||
|
except Exception as err:
|
||||||
|
self.report_warning(f'unable to embed using mutagen; {err}')
|
||||||
|
success = False
|
||||||
|
|
||||||
self._report_run('ffmpeg', filename)
|
# Method 2: Use ffmpeg
|
||||||
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
if not success:
|
||||||
|
options = [
|
||||||
|
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
|
||||||
|
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
|
||||||
|
|
||||||
|
self._report_run('ffmpeg', filename)
|
||||||
|
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
|
||||||
|
|
||||||
elif info['ext'] in ['mkv', 'mka']:
|
elif info['ext'] in ['mkv', 'mka']:
|
||||||
options = list(self.stream_copy_opts())
|
options = list(self.stream_copy_opts())
|
||||||
|
@ -113,9 +138,8 @@ def run(self, info):
|
||||||
self.run_ffmpeg(filename, temp_filename, options)
|
self.run_ffmpeg(filename, temp_filename, options)
|
||||||
|
|
||||||
elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
|
elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
|
||||||
prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
|
|
||||||
# Method 1: Use mutagen
|
# Method 1: Use mutagen
|
||||||
if not mutagen or prefer_atomicparsley:
|
if avoid_mutagen or not mutagen:
|
||||||
success = False
|
success = False
|
||||||
else:
|
else:
|
||||||
self._report_run('mutagen', filename)
|
self._report_run('mutagen', filename)
|
||||||
|
@ -151,7 +175,7 @@ def run(self, info):
|
||||||
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
|
self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
|
||||||
success = False
|
success = False
|
||||||
else:
|
else:
|
||||||
if not prefer_atomicparsley:
|
if not avoid_mutagen:
|
||||||
self.to_screen('mutagen was not found. Falling back to AtomicParsley')
|
self.to_screen('mutagen was not found. Falling back to AtomicParsley')
|
||||||
cmd = [atomicparsley,
|
cmd = [atomicparsley,
|
||||||
filename,
|
filename,
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import PostProcessor
|
from .common import PostProcessor
|
||||||
|
from ..dependencies import mutagen
|
||||||
from ..compat import imghdr
|
from ..compat import imghdr
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
MEDIA_EXTENSIONS,
|
MEDIA_EXTENSIONS,
|
||||||
|
@ -32,6 +33,7 @@
|
||||||
variadic,
|
variadic,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
)
|
)
|
||||||
|
from ..utils.subtitles import Subtitle, parse_lrc
|
||||||
|
|
||||||
EXT_TO_OUT_FORMATS = {
|
EXT_TO_OUT_FORMATS = {
|
||||||
'aac': 'adts',
|
'aac': 'adts',
|
||||||
|
@ -586,7 +588,8 @@ def _options(target_ext):
|
||||||
|
|
||||||
|
|
||||||
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
|
||||||
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka')
|
SUPPORTS_LYRICS = ('mp3', 'm4a', 'flac', 'opus')
|
||||||
|
SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka', *SUPPORTS_LYRICS)
|
||||||
|
|
||||||
def __init__(self, downloader=None, already_have_subtitle=False):
|
def __init__(self, downloader=None, already_have_subtitle=False):
|
||||||
super().__init__(downloader)
|
super().__init__(downloader)
|
||||||
|
@ -594,9 +597,11 @@ def __init__(self, downloader=None, already_have_subtitle=False):
|
||||||
|
|
||||||
@PostProcessor._restrict_to(images=False)
|
@PostProcessor._restrict_to(images=False)
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
if info['ext'] not in self.SUPPORTED_EXTS:
|
ext = info['ext']
|
||||||
|
if ext not in self.SUPPORTED_EXTS:
|
||||||
self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
|
self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files')
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
subtitles = info.get('requested_subtitles')
|
subtitles = info.get('requested_subtitles')
|
||||||
if not subtitles:
|
if not subtitles:
|
||||||
self.to_screen('There aren\'t any subtitles to embed')
|
self.to_screen('There aren\'t any subtitles to embed')
|
||||||
|
@ -614,57 +619,87 @@ def run(self, info):
|
||||||
return [], info
|
return [], info
|
||||||
'''
|
'''
|
||||||
|
|
||||||
ext = info['ext']
|
warnings = set()
|
||||||
sub_langs, sub_names, sub_filenames = [], [], []
|
|
||||||
webm_vtt_warn = False
|
|
||||||
mp4_ass_warn = False
|
|
||||||
|
|
||||||
|
def warn_once(msg):
|
||||||
|
if msg not in warnings:
|
||||||
|
warnings.add(msg)
|
||||||
|
self.report_warning(msg)
|
||||||
|
|
||||||
|
subtitles_to_embed = {}
|
||||||
for lang, sub_info in subtitles.items():
|
for lang, sub_info in subtitles.items():
|
||||||
if not os.path.exists(sub_info.get('filepath', '')):
|
if not os.path.exists(sub_info.get('filepath', '')):
|
||||||
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
|
self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing')
|
||||||
continue
|
elif sub_info['ext'] == 'json':
|
||||||
sub_ext = sub_info['ext']
|
warn_once('JSON subtitles cannot be embedded')
|
||||||
if sub_ext == 'json':
|
elif ext == 'webm' and sub_info['ext'] != 'vtt':
|
||||||
self.report_warning('JSON subtitles cannot be embedded')
|
warn_once('Only WebVTT subtitles can be embedded in webm files')
|
||||||
elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'):
|
elif ext in self.SUPPORTS_LYRICS and sub_info['ext'] != 'lrc':
|
||||||
sub_langs.append(lang)
|
warn_once(f'Only lrc subtitles can be embedded in {ext} files')
|
||||||
sub_names.append(sub_info.get('name'))
|
elif ext in self.SUPPORTS_LYRICS and not mutagen:
|
||||||
sub_filenames.append(sub_info['filepath'])
|
raise PostProcessingError(
|
||||||
|
f'[{self.PP_NAME}] module mutagen was not found. Please install using `python -m pip install mutagen`')
|
||||||
else:
|
else:
|
||||||
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
|
if ext == 'mp4' and sub_info['ext'] == 'ass':
|
||||||
webm_vtt_warn = True
|
warn_once('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
|
||||||
self.report_warning('Only WebVTT subtitles can be embedded in webm files')
|
subtitles_to_embed[lang] = sub_info
|
||||||
if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
|
|
||||||
mp4_ass_warn = True
|
|
||||||
self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
|
|
||||||
|
|
||||||
if not sub_langs:
|
if not subtitles_to_embed:
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
input_files = [filename, *sub_filenames]
|
sub_files = [sub['filepath'] for sub in subtitles_to_embed.values()]
|
||||||
|
files_to_delete = [] if self._already_have_subtitle else sub_files
|
||||||
|
|
||||||
|
if ext in self.SUPPORTS_LYRICS:
|
||||||
|
self._embed_lyrics(subtitles_to_embed, info['filepath'], ext)
|
||||||
|
return files_to_delete, info
|
||||||
|
|
||||||
opts = [
|
opts = [
|
||||||
*self.stream_copy_opts(ext=info['ext']),
|
*self.stream_copy_opts(ext=ext),
|
||||||
# Don't copy the existing subtitles, we may be running the
|
# Don't copy the existing subtitles, we may be running the
|
||||||
# postprocessor a second time
|
# postprocessor a second time
|
||||||
'-map', '-0:s',
|
'-map', '-0:s',
|
||||||
]
|
]
|
||||||
for i, (lang, name) in enumerate(zip(sub_langs, sub_names)):
|
for i, (lang, sub) in enumerate(subtitles_to_embed.items()):
|
||||||
opts.extend(['-map', f'{i + 1}:0'])
|
lang = ISO639Utils.short2long(lang) or lang
|
||||||
lang_code = ISO639Utils.short2long(lang) or lang
|
opts.extend(['-map', f'{i + 1}:0', f'-metadata:s:s:{i}', f'language={lang}'])
|
||||||
opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}'])
|
if name := sub['name']:
|
||||||
if name:
|
|
||||||
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
|
opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}',
|
||||||
f'-metadata:s:s:{i}', f'title={name}'])
|
f'-metadata:s:s:{i}', f'title={name}'])
|
||||||
|
|
||||||
temp_filename = prepend_extension(filename, 'temp')
|
temp_filename = prepend_extension(filename, 'temp')
|
||||||
self.to_screen(f'Embedding subtitles in "{filename}"')
|
self.to_screen(f'Embedding subtitles in "{filename}"')
|
||||||
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
|
self.run_ffmpeg_multiple_files([filename, *sub_files], temp_filename, opts)
|
||||||
os.replace(temp_filename, filename)
|
os.replace(temp_filename, filename)
|
||||||
|
|
||||||
files_to_delete = [] if self._already_have_subtitle else sub_filenames
|
|
||||||
return files_to_delete, info
|
return files_to_delete, info
|
||||||
|
|
||||||
|
def _embed_lyrics(self, subtitles, filename, ext):
|
||||||
|
assert mutagen and ext in self.SUPPORTS_LYRICS and all(sub['ext'] == 'lrc' for sub in subtitles.values())
|
||||||
|
self.to_screen(f'Embedding lyrics in "{filename}"')
|
||||||
|
if len(subtitles) > 1:
|
||||||
|
self.report_warning(
|
||||||
|
f'Your media player may be unable to display multiple subtitles in {ext}', only_once=True)
|
||||||
|
|
||||||
|
for sub in subtitles.values():
|
||||||
|
if not sub.get('data'):
|
||||||
|
with open(sub['filepath'], encoding='utf-8') as f:
|
||||||
|
sub['data'] = f.read()
|
||||||
|
|
||||||
|
if ext == 'mp3':
|
||||||
|
metadata = mutagen.id3.ID3(filename)
|
||||||
|
for lang, sub in subtitles.items():
|
||||||
|
metadata.add(mutagen.id3.SYLT(
|
||||||
|
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
|
||||||
|
lang=ISO639Utils.short2long(lang) or 'und',
|
||||||
|
text=[(line.text, int(line.start * 1000))
|
||||||
|
for line in parse_lrc(sub['data'])
|
||||||
|
if isinstance(line, Subtitle)]))
|
||||||
|
else:
|
||||||
|
metadata = mutagen.File(filename)
|
||||||
|
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
|
||||||
|
metadata.save()
|
||||||
|
|
||||||
|
|
||||||
class FFmpegMetadataPP(FFmpegPostProcessor):
|
class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||||
|
|
||||||
|
|
|
@ -3806,6 +3806,8 @@ class ISO639Utils:
|
||||||
@classmethod
|
@classmethod
|
||||||
def short2long(cls, code):
|
def short2long(cls, code):
|
||||||
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
|
||||||
|
if code in cls._lang_map.values():
|
||||||
|
return code
|
||||||
return cls._lang_map.get(code[:2])
|
return cls._lang_map.get(code[:2])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
37
yt_dlp/utils/subtitles.py
Normal file
37
yt_dlp/utils/subtitles.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
Seconds = float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Metadata:
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Subtitle:
|
||||||
|
text: str
|
||||||
|
start: Seconds
|
||||||
|
end: Seconds | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lrc(text):
|
||||||
|
for line in text.split('\n'):
|
||||||
|
times = []
|
||||||
|
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
|
||||||
|
times.append(sum(
|
||||||
|
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
|
||||||
|
line = mobj.group('content')
|
||||||
|
|
||||||
|
for t in times:
|
||||||
|
yield Subtitle(start=t, text=line.strip())
|
||||||
|
|
||||||
|
if not times:
|
||||||
|
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
|
||||||
|
yield Metadata(mobj.group('name'), mobj.group('value').strip())
|
||||||
|
elif line.strip():
|
||||||
|
yield ValueError(line)
|
Loading…
Reference in a new issue