1
0
Fork 0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-03-09 12:50:23 -05:00

Abstract out lrc parsing

This commit is contained in:
pukkandan 2024-02-27 08:42:44 +05:30
parent bd3676f6e5
commit 3f30a6b78e
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
2 changed files with 41 additions and 16 deletions

View file

@ -33,6 +33,7 @@
variadic,
write_json_file,
)
from ..utils.subtitles import Subtitle, parse_lrc
EXT_TO_OUT_FORMATS = {
'aac': 'adts',
@ -683,27 +684,15 @@ def _embed_lyrics(self, subtitles, filename, ext):
with open(sub['filepath'], encoding='utf-8') as f:
sub['data'] = f.read()
def totime(time):
time = time.split(":")
return int((int(time[0])*60 + float(time[1]))*1000)
def convert_lrc_to_sylt(lrc):
lrc = lrc.split("\n")
lrc = [i.strip() for i in lrc]
lrc = [i for i in lrc if i]
lrc = [i for i in lrc if i[-1] != "]"]
lrc = [i.split("]") for i in lrc]
lrc = [[i[0][1:], i[1]] for i in lrc]
lrc = [(i[1], totime(i[0])) for i in lrc]
return lrc
if ext == 'mp3':
metadata = mutagen.id3.ID3(filename)
for lang, sub in subtitles.items():
metadata.add(mutagen.id3.SYLT(
encoding=mutagen.id3.Encoding.UTF8,
encoding=mutagen.id3.Encoding.UTF8, format=2, type=1,
lang=ISO639Utils.short2long(lang) or 'und',
format=2,
type=1,
text=convert_lrc_to_sylt(sub['data'])))
text=[(line.text, int(line.start * 1000))
for line in parse_lrc(sub['data'])
if isinstance(line, Subtitle)]))
else:
metadata = mutagen.File(filename)
metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()]

36
yt_dlp/utils/subtitles.py Normal file
View file

@ -0,0 +1,36 @@
import re
from dataclasses import dataclass
from typing import TypeAlias
Seconds: TypeAlias = float
@dataclass
class Metadata:
name: str
value: str
@dataclass
class Subtitle:
text: str
start: Seconds
end: Seconds = None
def parse_lrc(text):
for line in text.split('\n'):
times = []
while mobj := re.fullmatch(r'\[(?P<time>((\d+:)?\d+:)?\d+(.\d+)?)\](?P<content>.*)', line):
times.append(sum(
float(t) * 60**i for i, t in enumerate(reversed(mobj.group('time').split(':')))))
line = mobj.group('content')
for t in times:
yield Subtitle(start=t, text=line.strip())
if not times:
if mobj := re.fullmatch(r'\[(?P<name>[^\]:]+):(?P<value>[^\]]+)\]', line):
yield Metadata(mobj.group('name'), mobj.group('value').strip())
elif line.strip():
yield ValueError(line)