mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Abstract out lrc parsing
This commit is contained in:
parent
bd3676f6e5
commit
3f30a6b78e
2 changed files with 41 additions and 16 deletions
|
@ -33,6 +33,7 @@
|
||||||
variadic,
|
variadic,
|
||||||
write_json_file,
|
write_json_file,
|
||||||
)
|
)
|
||||||
|
from ..utils.subtitles import Subtitle, parse_lrc
|
||||||
|
|
||||||
EXT_TO_OUT_FORMATS = {
|
EXT_TO_OUT_FORMATS = {
|
||||||
'aac': 'adts',
|
'aac': 'adts',
|
||||||
|
@ -683,27 +684,15 @@ def _embed_lyrics(self, subtitles, filename, ext):
|
||||||
with open(sub['filepath'], encoding='utf-8') as f:
|
with open(sub['filepath'], encoding='utf-8') as f:
|
||||||
sub['data'] = f.read()
|
sub['data'] = f.read()
|
||||||
|
|
||||||
def totime(time):
|
|
||||||
time = time.split(":")
|
|
||||||
return int((int(time[0])*60 + float(time[1]))*1000)
|
|
||||||
def convert_lrc_to_sylt(lrc):
|
|
||||||
lrc = lrc.split("\n")
|
|
||||||
lrc = [i.strip() for i in lrc]
|
|
||||||
lrc = [i for i in lrc if i]
|
|
||||||
lrc = [i for i in lrc if i[-1] != "]"]
|
|
||||||
lrc = [i.split("]") for i in lrc]
|
|
||||||
lrc = [[i[0][1:], i[1]] for i in lrc]
|
|
||||||
lrc = [(i[1], totime(i[0])) for i in lrc]
|
|
||||||
return lrc
|
|
||||||
if ext == 'mp3':
|
if ext == 'mp3':
|
||||||
metadata = mutagen.id3.ID3(filename)
|
metadata = mutagen.id3.ID3(filename)
|
||||||
for lang, sub in subtitles.items():
|
for lang, sub in subtitles.items():
|
||||||
metadata.add(mutagen.id3.SYLT(
|
metadata.add(mutagen.id3.SYLT(
|
||||||
encoding=mutagen.id3.Encoding.UTF8,
|
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
|
||||||
lang=ISO639Utils.short2long(lang) or 'und',
|
lang=ISO639Utils.short2long(lang) or 'und',
|
||||||
format=2,
|
text=[(line.text, int(line.start * 1000))
|
||||||
type=1,
|
for line in parse_lrc(sub['data'])
|
||||||
text=convert_lrc_to_sylt(sub['data'])))
|
if isinstance(line, Subtitle)]))
|
||||||
else:
|
else:
|
||||||
metadata = mutagen.File(filename)
|
metadata = mutagen.File(filename)
|
||||||
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
|
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
|
||||||
|
|
36
yt_dlp/utils/subtitles.py
Normal file
36
yt_dlp/utils/subtitles.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import TypeAlias
|
||||||
|
|
||||||
|
Seconds: TypeAlias = float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Metadata:
|
||||||
|
name: str
|
||||||
|
value: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Subtitle:
|
||||||
|
text: str
|
||||||
|
start: Seconds
|
||||||
|
end: Seconds = None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lrc(text):
|
||||||
|
for line in text.split('\n'):
|
||||||
|
times = []
|
||||||
|
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
|
||||||
|
times.append(sum(
|
||||||
|
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
|
||||||
|
line = mobj.group('content')
|
||||||
|
|
||||||
|
for t in times:
|
||||||
|
yield Subtitle(start=t, text=line.strip())
|
||||||
|
|
||||||
|
if not times:
|
||||||
|
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
|
||||||
|
yield Metadata(mobj.group('name'), mobj.group('value').strip())
|
||||||
|
elif line.strip():
|
||||||
|
yield ValueError(line)
|
Loading…
Reference in a new issue