mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-09 12:50:23 -05:00
Abstract out lrc parsing
This commit is contained in:
parent
bd3676f6e5
commit
3f30a6b78e
2 changed files with 41 additions and 16 deletions
|
@ -33,6 +33,7 @@
|
|||
variadic,
|
||||
write_json_file,
|
||||
)
|
||||
from ..utils.subtitles import Subtitle, parse_lrc
|
||||
|
||||
EXT_TO_OUT_FORMATS = {
|
||||
'aac': 'adts',
|
||||
|
@ -683,27 +684,15 @@ def _embed_lyrics(self, subtitles, filename, ext):
|
|||
with open(sub['filepath'], encoding='utf-8') as f:
|
||||
sub['data'] = f.read()
|
||||
|
||||
def totime(time):
|
||||
time = time.split(":")
|
||||
return int((int(time[0])*60 + float(time[1]))*1000)
|
||||
def convert_lrc_to_sylt(lrc):
|
||||
lrc = lrc.split("\n")
|
||||
lrc = [i.strip() for i in lrc]
|
||||
lrc = [i for i in lrc if i]
|
||||
lrc = [i for i in lrc if i[-1] != "]"]
|
||||
lrc = [i.split("]") for i in lrc]
|
||||
lrc = [[i[0][1:], i[1]] for i in lrc]
|
||||
lrc = [(i[1], totime(i[0])) for i in lrc]
|
||||
return lrc
|
||||
if ext == 'mp3':
|
||||
metadata = mutagen.id3.ID3(filename)
|
||||
for lang, sub in subtitles.items():
|
||||
metadata.add(mutagen.id3.SYLT(
|
||||
encoding=mutagen.id3.Encoding.UTF8,
|
||||
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
|
||||
lang=ISO639Utils.short2long(lang) or 'und',
|
||||
format=2,
|
||||
type=1,
|
||||
text=convert_lrc_to_sylt(sub['data'])))
|
||||
text=[(line.text, int(line.start * 1000))
|
||||
for line in parse_lrc(sub['data'])
|
||||
if isinstance(line, Subtitle)]))
|
||||
else:
|
||||
metadata = mutagen.File(filename)
|
||||
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]
|
||||
|
|
36
yt_dlp/utils/subtitles.py
Normal file
36
yt_dlp/utils/subtitles.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import TypeAlias
|
||||
|
||||
Seconds: TypeAlias = float
|
||||
|
||||
|
||||
@dataclass
|
||||
class Metadata:
|
||||
name: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Subtitle:
|
||||
text: str
|
||||
start: Seconds
|
||||
end: Seconds = None
|
||||
|
||||
|
||||
def parse_lrc(text):
|
||||
for line in text.split('\n'):
|
||||
times = []
|
||||
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
|
||||
times.append(sum(
|
||||
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
|
||||
line = mobj.group('content')
|
||||
|
||||
for t in times:
|
||||
yield Subtitle(start=t, text=line.strip())
|
||||
|
||||
if not times:
|
||||
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
|
||||
yield Metadata(mobj.group('name'), mobj.group('value').strip())
|
||||
elif line.strip():
|
||||
yield ValueError(line)
|
Loading…
Reference in a new issue