diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 603f85c65..1ca90cd4c 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -558,6 +558,53 @@ def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(sel '[SponsorBlock]: Sponsor', 'c', ]), []) + def test_round_remove_chapter_Common(self): + keyframes = [1, 3, 5, 7] + chapters = self._pp._round_remove_chapters(keyframes, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 2, 'end_time': 6, 'remove': True}, + {'start_time': 6, 'end_time': 10, 'remove': False}, + ]) + self.assertEqual(chapters, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 3, 'end_time': 5, 'remove': True}, + {'start_time': 6, 'end_time': 10, 'remove': False}, + ]) + + def test_round_remove_chapter_AlreadyKeyframe(self): + keyframes = [1, 3, 5, 7] + chapters = self._pp._round_remove_chapters(keyframes, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 3, 'end_time': 7, 'remove': True}, + {'start_time': 6, 'end_time': 10, 'remove': False}, + ]) + self.assertEqual(chapters, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 3, 'end_time': 7, 'remove': True}, + {'start_time': 6, 'end_time': 10, 'remove': False}, + ]) + + def test_round_remove_chapter_RemoveEnd(self): + keyframes = [1, 3, 5, 7] + chapters = self._pp._round_remove_chapters(keyframes, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 3, 'end_time': 8, 'remove': True}, + ]) + self.assertEqual(chapters, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 3, 'end_time': 8, 'remove': True}, + ]) + + def test_round_remove_chapter_RemoveAfterLast(self): + keyframes = [1, 3, 5, 7] + chapters = self._pp._round_remove_chapters(keyframes, [ + {'start_time': 0, 'end_time': 2}, + {'start_time': 8, 'end_time': 9, 'remove': True}, + ]) + self.assertEqual(chapters, [ + {'start_time': 0, 'end_time': 2}, + ]) + def test_make_concat_opts_CommonCase(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..dc97e4d40 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -677,6 +677,7 @@ def get_postprocessors(opts): 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts, + 'round_to_keyframes': opts.round_cuts_to_keyframes, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 06b65e0ea..08f5c6af0 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1778,6 +1778,17 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--no-force-keyframes-at-cuts', action='store_false', dest='force_keyframes_at_cuts', help='Do not force keyframes around the chapters when cutting/splitting (default)') + postproc.add_option( + '--round-cuts-to-keyframes', + action='store_true', dest='round_cuts_to_keyframes', default=False, + help=( + 'Rounds cuts to the nearest keyframe when removing sections. ' + 'This may result in some more content being included than specified, but makes problems around cuts ' + 'less likely')) + postproc.add_option( + '--no-round-cuts-to-keyframes', + action='store_false', dest='round_cuts_to_keyframes', + help='Do not rounds cuts to the nearest keyframe when removing sections (default)') _postprocessor_opts_parser = lambda key, val='': ( *(item.split('=', 1) for item in (val.split(';') if val else [])), ('key', remove_end(key, 'PP'))) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 8965806ae..12a6560b4 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -393,6 +393,32 @@ def _quote_for_ffmpeg(string): string = string[1:] if string[0] == "'" else "'" + string return string[:-1] if string[-1] == "'" else string + "'" + def get_keyframe_timestamps(self, path, opts=[]): + if self.probe_basename != 'ffprobe': + if self.probe_available: + self.report_warning('Only ffprobe is supported for keyframe timestamp extraction') + raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location') + + self.check_version() + + cmd = [ + self.probe_executable, + encodeArgument('-select_streams'), + encodeArgument('v:0'), + encodeArgument('-show_entries'), + encodeArgument('packet=pts_time,flags'), + encodeArgument('-print_format'), + encodeArgument('json'), + ] + + cmd += opts + cmd.append(self._ffmpeg_filename_argument(path)) + self.write_debug(f'ffprobe command line: {shell_quote(cmd)}') + stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + result = json.loads(stdout) + + return [float(packet['pts_time']) for packet in result['packets'] if 'K' in packet['flags']] + def force_keyframes(self, filename, timestamps): timestamps = orderedSet(timestamps) if timestamps[0] == 0: diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index d82685ed8..7eec34474 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -1,3 +1,4 @@ +import bisect import copy import heapq import os @@ -13,13 +14,16 @@ class ModifyChaptersPP(FFmpegPostProcessor): def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None, - *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False, + round_to_keyframes=False): FFmpegPostProcessor.__init__(self, downloader) self._remove_chapters_patterns = set(remove_chapters_patterns or []) - self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()) + self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set( + SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()) self._ranges_to_remove = set(remove_ranges or []) self._sponsorblock_chapter_title = sponsorblock_chapter_title self._force_keyframes = force_keyframes + self._round_to_keyframes = round_to_keyframes @PostProcessor._restrict_to(images=False) def run(self, info): @@ -35,7 +39,12 @@ def run(self, info): if not chapters: chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}] - info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters) + chapters += sponsor_chapters + if self._round_to_keyframes: + keyframes = self.get_keyframe_timestamps(info['filepath']) + self._round_remove_chapters(keyframes, chapters) + + info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters) if not cuts: return [], info elif not info['chapters']: @@ -54,7 +63,8 @@ def run(self, info): self.write_debug('Expected and actual durations mismatch') concat_opts = self._make_concat_opts(cuts, real_duration) - self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts))) + self.write_debug('Concat spec = {}'.format( + ', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts))) def remove_chapters(file, is_sub): return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub) @@ -117,7 +127,8 @@ def _get_supported_subs(self, info): continue ext = sub['ext'] if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: - self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync') + self.report_warning( + f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync') continue # TODO: create __real_download for subs? yield sub_file @@ -314,13 +325,33 @@ def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes= in_file = filename out_file = prepend_extension(in_file, 'temp') if force_keyframes: - in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time']))) + in_file = self.force_keyframes(in_file, + (t for c in ranges_to_cut for t in (c['start_time'], c['end_time']))) self.to_screen(f'Removing chapters from {filename}') self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) if in_file != filename: self._delete_downloaded_files(in_file, msg=None) return out_file + @staticmethod + def _round_remove_chapters(keyframes, chapters): + result = [] + for c in chapters: + if not c.get('remove', False) or not keyframes: + result.append(c) + continue + + start_frame = bisect.bisect_left(keyframes, c['start_time']) + if start_frame >= len(keyframes): + continue + + c['start_time'] = keyframes[start_frame] + if c['end_time'] < keyframes[-1]: + c['end_time'] = keyframes[bisect.bisect_right(keyframes, c['end_time']) - 1] + result.append(c) + + return result + @staticmethod def _make_concat_opts(chapters_to_remove, duration): opts = [{}]