From 3f30a6b78ed331f843c8a6b6420371476a36b479 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 27 Feb 2024 08:42:44 +0530 Subject: [PATCH] Abstract out lrc parsing --- yt_dlp/postprocessor/ffmpeg.py | 21 +++++--------------- yt_dlp/utils/subtitles.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 16 deletions(-) create mode 100644 yt_dlp/utils/subtitles.py diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d97930655..59ca879cf 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -33,6 +33,7 @@ variadic, write_json_file, ) +from ..utils.subtitles import Subtitle, parse_lrc EXT_TO_OUT_FORMATS = { 'aac': 'adts', @@ -683,27 +684,15 @@ def _embed_lyrics(self, subtitles, filename, ext): with open(sub['filepath'], encoding='utf-8') as f: sub['data'] = f.read() - def totime(time): - time = time.split(":") - return int((int(time[0])*60 + float(time[1]))*1000) - def convert_lrc_to_sylt(lrc): - lrc = lrc.split("\n") - lrc = [i.strip() for i in lrc] - lrc = [i for i in lrc if i] - lrc = [i for i in lrc if i[-1] != "]"] - lrc = [i.split("]") for i in lrc] - lrc = [[i[0][1:], i[1]] for i in lrc] - lrc = [(i[1], totime(i[0])) for i in lrc] - return lrc if ext == 'mp3': metadata = mutagen.id3.ID3(filename) for lang, sub in subtitles.items(): metadata.add(mutagen.id3.SYLT( - encoding=mutagen.id3.Encoding.UTF8, + encoding=mutagen.id3.Encoding.UTF8, format=2, type=1, lang=ISO639Utils.short2long(lang) or 'und', - format=2, - type=1, - text=convert_lrc_to_sylt(sub['data']))) + text=[(line.text, int(line.start * 1000)) + for line in parse_lrc(sub['data']) + if isinstance(line, Subtitle)])) else: metadata = mutagen.File(filename) metadata['©lyr' if ext == 'm4a' else 'lyrics'] = [sub['data'] for sub in subtitles.values()] diff --git a/yt_dlp/utils/subtitles.py b/yt_dlp/utils/subtitles.py new file mode 100644 index 000000000..784acecde --- /dev/null +++ b/yt_dlp/utils/subtitles.py @@ -0,0 +1,36 @@ +import re +from dataclasses import dataclass +from typing import TypeAlias + +Seconds: TypeAlias = float + + +@dataclass +class Metadata: + name: str + value: str + + +@dataclass +class Subtitle: + text: str + start: Seconds + end: Seconds = None + + +def parse_lrc(text): + for line in text.split('\n'): + times = [] + while mobj := re.fullmatch(r'\[(?P