From 517ddf3c3f12560ab93e3d36244dc82db9f97818 Mon Sep 17 00:00:00 2001 From: sepro Date: Sat, 8 Feb 2025 17:00:38 +0100 Subject: [PATCH 01/25] [misc] Improve Issue/PR templates (#11499) Authored by: seproDev --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 24 ++++-------- .../ISSUE_TEMPLATE/2_site_support_request.yml | 24 ++++-------- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 26 ++++--------- .github/ISSUE_TEMPLATE/4_bug_report.yml | 28 ++++---------- .github/ISSUE_TEMPLATE/5_feature_request.yml | 26 ++++--------- .github/ISSUE_TEMPLATE/6_question.yml | 26 ++++--------- .github/ISSUE_TEMPLATE/config.yml | 7 +--- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 4 +- .../2_site_support_request.yml | 4 +- .../3_site_feature_request.yml | 6 +-- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 8 +--- .../ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 6 +-- .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 6 +-- .github/PULL_REQUEST_TEMPLATE.md | 37 ++++++++++--------- README.md | 1 - devscripts/make_issue_template.py | 30 ++++++--------- 16 files changed, 87 insertions(+), 176 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 20e5e944f..fd7591ba2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -2,13 +2,11 @@ name: Broken site support description: Report issue with yt-dlp on a supported site labels: [triage, site-bug] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: @@ -24,9 +22,7 @@ body: required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input @@ -47,6 +43,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true @@ -78,11 +76,3 @@ body: render: shell validations: required: true - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 4aeff7dc6..111f423d0 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -2,13 +2,11 @@ name: Site support request description: Request support for a new site labels: [triage, site-request] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: @@ -24,9 +22,7 @@ body: required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input @@ -59,6 +55,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true @@ -90,11 +88,3 @@ body: render: shell validations: required: true - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 2f516ebb7..fd74be331 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -1,14 +1,12 @@ name: Site feature request -description: Request a new functionality for a supported site +description: Request new functionality for a site supported by yt-dlp labels: [triage, site-enhancement] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: @@ -22,9 +20,7 @@ body: required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input @@ -55,6 +51,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true @@ -86,11 +84,3 @@ body: render: shell validations: required: true - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 201586e9d..e9ec45e14 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -2,13 +2,11 @@ name: Core bug report description: Report a bug unrelated to any particular site or extractor labels: [triage, bug] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: @@ -20,13 +18,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - required: true - - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) - required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar issues **including closed ones**. DO NOT post duplicates required: true - type: textarea id: description @@ -40,6 +32,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true @@ -71,11 +65,3 @@ body: render: shell validations: required: true - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 765de86a2..9c7d66d81 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -1,14 +1,12 @@ name: Feature request -description: Request a new functionality unrelated to any particular site or extractor +description: Request a new feature unrelated to any particular site or extractor labels: [triage, enhancement] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: @@ -22,9 +20,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - type: textarea id: description @@ -38,6 +34,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" @@ -65,11 +63,3 @@ body: [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 198e21bec..186f73a4a 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -1,14 +1,12 @@ name: Ask question -description: Ask yt-dlp related question +description: Ask a question about using yt-dlp labels: [question] body: - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. - type: markdown attributes: value: | @@ -28,9 +26,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%3Aissue%20-label%3Aspam%20%20) for similar questions **including closed ones**. DO NOT post duplicates required: true - type: textarea id: question @@ -44,6 +40,8 @@ body: id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" @@ -71,11 +69,3 @@ body: [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9cdffa4b1..0131631bb 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,8 +1,5 @@ blank_issues_enabled: false contact_links: - - name: Get help from the community on Discord + - name: Get help on Discord url: https://discord.gg/H5MNcFW63r - about: Join the yt-dlp Discord for community-powered support! - - name: Matrix Bridge to the Discord server - url: https://matrix.to/#/#yt-dlp:matrix.org - about: For those who do not want to use Discord + about: Join the yt-dlp Discord server for support and discussion diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index bff28ae4e..f1a2d3090 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -18,9 +18,7 @@ body: required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index 2bffe738d..31b89b683 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -18,9 +18,7 @@ body: required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index 6c3127983..421766a75 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -1,5 +1,5 @@ name: Site feature request -description: Request a new functionality for a supported site +description: Request new functionality for a site supported by yt-dlp labels: [triage, site-enhancement] body: %(no_skip)s @@ -16,9 +16,7 @@ body: required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index 5f357d96e..31a19b292 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -14,13 +14,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - required: true - - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) - required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar issues **including closed ones**. DO NOT post duplicates required: true - type: textarea id: description diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml index 99107ff58..b8ab6610b 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -1,5 +1,5 @@ name: Feature request -description: Request a new functionality unrelated to any particular site or extractor +description: Request a new feature unrelated to any particular site or extractor labels: [triage, enhancement] body: %(no_skip)s @@ -16,9 +16,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar requests **including closed ones**. DO NOT post duplicates required: true - type: textarea id: description diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index bd742109a..062e96321 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -1,5 +1,5 @@ name: Ask question -description: Ask yt-dlp related question +description: Ask a question about using yt-dlp labels: [question] body: %(no_skip)s @@ -22,9 +22,7 @@ body: required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates - required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766), [the FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ), and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=is%%3Aissue%%20-label%%3Aspam%%20%%20) for similar questions **including closed ones**. DO NOT post duplicates required: true - type: textarea id: question diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4deee572f..4dcfcc48c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,14 +1,17 @@ -**IMPORTANT**: PRs without the template will be CLOSED + ### Description of your *pull request* and other information - - -ADD DESCRIPTION HERE +ADD DETAILED DESCRIPTION HERE Fixes # @@ -16,24 +19,22 @@ ### Description of your *pull request* and other information
Template ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: -- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) -- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) +### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check those that apply and remove the others: +- [ ] I am the original author of the code in this PR, and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of the code in this PR, but it is in the public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) -### What is the purpose of your *pull request*? +### What is the purpose of your *pull request*? Check those that apply and remove the others: - [ ] Fix or improvement to an extractor (Make sure to add/update tests) - [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) - [ ] Core bug fix/improvement diff --git a/README.md b/README.md index 45c56434a..0ac27c462 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") [![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPI") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") -[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 2a418ddbf..681ebc51c 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -11,11 +11,13 @@ from devscripts.utils import get_filename_args, read_file, write_file -VERBOSE_TMPL = ''' +VERBOSE = ''' - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem + description: | + This is mandatory unless absolutely impossible to provide. If you are unable to provide the output, please explain why. options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true @@ -47,31 +49,23 @@ render: shell validations: required: true - - type: markdown - attributes: - value: | - > [!CAUTION] - > ### GitHub is experiencing a high volume of malicious spam comments. - > ### If you receive any replies asking you download a file, do NOT follow the download links! - > - > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. '''.strip() NO_SKIP = ''' - - type: checkboxes + - type: markdown attributes: - label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE - description: Fill all fields even if you think it is irrelevant for the issue - options: - - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field - required: true + value: | + > [!IMPORTANT] + > Not providing the required (*) information will result in your issue being closed and ignored. '''.strip() def main(): - fields = {'no_skip': NO_SKIP} - fields['verbose'] = VERBOSE_TMPL % fields - fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose']) + fields = { + 'no_skip': NO_SKIP, + 'verbose': VERBOSE, + 'verbose_optional': re.sub(r'(\n\s+validations:)?\n\s+required: true', '', VERBOSE), + } infile, outfile = get_filename_args(has_infile=True) write_file(outfile, read_file(infile) % fields) From 0d9f061d38c3a4da61972e2adad317079f2f1c84 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sat, 8 Feb 2025 18:48:36 +0100 Subject: [PATCH 02/25] [jsinterp] Add `js_number_to_string` (#12110) Authored by: Grub4K --- test/test_jsinterp.py | 23 +++++++++++++++++- yt_dlp/jsinterp.py | 55 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 06840ed85..c1464f2cd 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -9,7 +9,7 @@ import math -from yt_dlp.jsinterp import JS_Undefined, JSInterpreter +from yt_dlp.jsinterp import JS_Undefined, JSInterpreter, js_number_to_string class NaN: @@ -431,6 +431,27 @@ def test_slice(self): self._test('function f(){return "012345678".slice(-1, 1)}', '') self._test('function f(){return "012345678".slice(-3, -1)}', '67') + def test_js_number_to_string(self): + for test, radix, expected in [ + (0, None, '0'), + (-0, None, '0'), + (0.0, None, '0'), + (-0.0, None, '0'), + (math.nan, None, 'NaN'), + (-math.nan, None, 'NaN'), + (math.inf, None, 'Infinity'), + (-math.inf, None, '-Infinity'), + (10 ** 21.5, 8, '526665530627250154000000'), + (6, 2, '110'), + (254, 16, 'fe'), + (-10, 2, '-1010'), + (-0xff, 2, '-11111111'), + (0.1 + 0.2, 16, '0.4cccccccccccd'), + (1234.1234, 10, '1234.1234'), + # (1000000000000000128, 10, '1000000000000000100') + ]: + assert js_number_to_string(test, radix) == expected + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index ba059babb..bd6a47004 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -95,6 +95,61 @@ def _js_ternary(cndn, if_true=True, if_false=False): return if_true +# Ref: https://es5.github.io/#x9.8.1 +def js_number_to_string(val: float, radix: int = 10): + if radix in (JS_Undefined, None): + radix = 10 + assert radix in range(2, 37), 'radix must be an integer at least 2 and no greater than 36' + + if math.isnan(val): + return 'NaN' + if val == 0: + return '0' + if math.isinf(val): + return '-Infinity' if val < 0 else 'Infinity' + if radix == 10: + # TODO: implement special cases + ... + + ALPHABET = b'0123456789abcdefghijklmnopqrstuvwxyz.-' + + result = collections.deque() + sign = val < 0 + val = abs(val) + fraction, integer = math.modf(val) + delta = max(math.nextafter(.0, math.inf), math.ulp(val) / 2) + + if fraction >= delta: + result.append(-2) # `.` + while fraction >= delta: + delta *= radix + fraction, digit = math.modf(fraction * radix) + result.append(int(digit)) + # if we need to round, propagate potential carry through fractional part + needs_rounding = fraction > 0.5 or (fraction == 0.5 and int(digit) & 1) + if needs_rounding and fraction + delta > 1: + for index in reversed(range(1, len(result))): + if result[index] + 1 < radix: + result[index] += 1 + break + result.pop() + + else: + integer += 1 + break + + integer, digit = divmod(int(integer), radix) + result.appendleft(digit) + while integer > 0: + integer, digit = divmod(integer, radix) + result.appendleft(digit) + + if sign: + result.appendleft(-1) # `-` + + return bytes(ALPHABET[digit] for digit in result).decode('ascii') + + # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence _OPERATORS = { # None => Defined in JSInterpreter._operator '?': None, From 10b7ff68e98f17655e31952f6e17120b2d7dda96 Mon Sep 17 00:00:00 2001 From: entourage8 <193440922+entourage8@users.noreply.github.com> Date: Sat, 8 Feb 2025 22:43:12 +0100 Subject: [PATCH 03/25] [fd/hls] Fix `BYTERANGE` logic (#11972) Closes #3578, Closes #3810, Closes #9400 Authored by: entourage8 --- yt_dlp/downloader/hls.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index da2574da7..7bd116d96 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -177,6 +177,7 @@ def is_ad_fragment_end(s): if external_aes_iv: external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) byte_range = {} + byte_range_offset = 0 discontinuity_count = 0 frag_index = 0 ad_frag_next = False @@ -204,6 +205,11 @@ def is_ad_fragment_end(s): }) media_sequence += 1 + # If the byte_range is truthy, reset it after appending a fragment that uses it + if byte_range: + byte_range_offset = byte_range['end'] + byte_range = {} + elif line.startswith('#EXT-X-MAP'): if format_index and discontinuity_count != format_index: continue @@ -217,10 +223,12 @@ def is_ad_fragment_end(s): if extra_segment_query: frag_url = update_url_query(frag_url, extra_segment_query) + map_byte_range = {} + if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] - byte_range = { + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else 0 + map_byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } @@ -229,7 +237,7 @@ def is_ad_fragment_end(s): 'frag_index': frag_index, 'url': frag_url, 'decrypt_info': decrypt_info, - 'byte_range': byte_range, + 'byte_range': map_byte_range, 'media_sequence': media_sequence, }) media_sequence += 1 @@ -257,7 +265,7 @@ def is_ad_fragment_end(s): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): splitted_byte_range = line[17:].split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range_offset byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), From 19edaa44fcd375f54e63d6227b092f5252d3e889 Mon Sep 17 00:00:00 2001 From: Julien Valentin Date: Sun, 9 Feb 2025 00:28:54 +0100 Subject: [PATCH 04/25] [ie/generic] Extract `live_status` for DASH manifest URLs (#12256) * Also removes the content-type check for dash+xml/mpd. This was added in cf1f13b817d88eb7d4b449f20cbad3215030e35f, but is a no-op since the regex pattern was never changed accordingly. And it looks like it was unwanted anyways per 28ad7df65ddb78c16ac008886d14ae2914aea6be Closes #12255 Authored by: mp3butcher --- yt_dlp/extractor/generic.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 320a47772..67c224e50 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -293,6 +293,19 @@ class GenericIE(InfoExtractor): 'timestamp': 1378272859.0, }, }, + # Live DASH MPD + { + 'url': 'https://livesim2.dashif.org/livesim2/ato_10/testpic_2s/Manifest.mpd', + 'info_dict': { + 'id': 'Manifest', + 'ext': 'mp4', + 'title': r're:Manifest \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'livestream', + }, + }, # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8 { 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8', @@ -2436,10 +2449,9 @@ def _real_extract(self, url): subtitles = {} if format_id.endswith('mpegurl') or ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) - elif format_id.endswith(('mpd', 'dash+xml')) or ext == 'mpd': - formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) elif format_id == 'f4m' or ext == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) + # Don't check for DASH/mpd here, do it later w/ first_bytes. Same number of requests either way else: formats = [{ 'format_id': format_id, @@ -2521,6 +2533,7 @@ def _real_extract(self, url): doc, mpd_base_url=full_response.url.rpartition('/')[0], mpd_url=url) + info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None self._extra_manifest_info(info_dict, url) self.report_detected('DASH manifest') return info_dict From 1295bbedd45fa8d9bc3f7a194864ae280297848e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 8 Feb 2025 20:21:48 -0600 Subject: [PATCH 05/25] [ie/francetv:site] Fix livestream extraction (#12316) Closes #12310 Authored by: bashonly --- yt_dlp/extractor/francetv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 01b18d8da..c6036b306 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -358,7 +358,8 @@ def _real_extract(self, url): # For livestreams we need the id of the stream instead of the currently airing episode id video_id = traverse_obj(nextjs_data, ( ..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ..., - 'children', ..., ..., 'children', ..., ..., 'children', ..., 'options', 'id', {str}, any)) + 'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)), + 'options', 'id', {str}, any)) else: video_id = traverse_obj(nextjs_data, ( ..., ..., ..., 'children', From 241ace4f104d50fdf7638f9203927aefcf57a1f7 Mon Sep 17 00:00:00 2001 From: Stefan Lobbenmeier Date: Sun, 9 Feb 2025 20:19:28 +0100 Subject: [PATCH 06/25] [ie/zdf] Extract more metadata (#9565) Closes #9564 Authored by: StefanLobbenmeier --- yt_dlp/extractor/zdf.py | 94 +++++++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 703766cd7..b64a88f6c 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -187,12 +187,20 @@ class ZDFIE(ZDFBaseIE): 'info_dict': { 'id': '151025_magie_farben2_tex', 'ext': 'mp4', + 'duration': 2615.0, 'title': 'Die Magie der Farben (2/2)', 'description': 'md5:a89da10c928c6235401066b60a6d5c1a', - 'duration': 2615, 'timestamp': 1465021200, - 'upload_date': '20160604', 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806', + 'upload_date': '20160604', + 'episode': 'Die Magie der Farben (2/2)', + 'episode_id': 'POS_954f4170-36a5-4a41-a6cf-78f1f3b1f127', + 'season': 'Staffel 1', + 'series': 'Die Magie der Farben', + 'season_number': 1, + 'series_id': 'a39900dd-cdbd-4a6a-a413-44e8c6ae18bc', + 'season_id': '5a92e619-8a0f-4410-a3d5-19c76fbebb37', + 'episode_number': 2, }, }, { 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html', @@ -200,12 +208,13 @@ class ZDFIE(ZDFBaseIE): 'info_dict': { 'ext': 'mp4', 'id': 'video_funk_1770473', - 'duration': 1278, - 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', + 'duration': 1278.0, 'title': 'Alles ist verzaubert', + 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.', 'timestamp': 1635520560, - 'upload_date': '20211029', 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907', + 'upload_date': '20211029', + 'episode': 'Alles ist verzaubert', }, }, { # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche @@ -248,22 +257,52 @@ class ZDFIE(ZDFBaseIE): 'title': 'Das Geld anderer Leute', 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d', 'duration': 2581.0, - 'timestamp': 1675160100, - 'upload_date': '20230131', + 'timestamp': 1728983700, + 'upload_date': '20241015', 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350', + 'series': 'SOKO Stuttgart', + 'series_id': 'f862ce9a-6dd1-4388-a698-22b36ac4c9e9', + 'season': 'Staffel 11', + 'season_number': 11, + 'season_id': 'ae1b4990-6d87-4970-a571-caccf1ba2879', + 'episode': 'Das Geld anderer Leute', + 'episode_number': 10, + 'episode_id': 'POS_7f367934-f2f0-45cb-9081-736781ff2d23', }, }, { 'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html', 'info_dict': { - 'id': '220605_dk_gruener_planet_wuesten_tex', + 'id': '220525_green_planet_makingof_1_tropen_tex', 'ext': 'mp4', - 'title': 'Unser grüner Planet - Wüsten', - 'description': 'md5:4fc647b6f9c3796eea66f4a0baea2862', - 'duration': 2613.0, - 'timestamp': 1654450200, - 'upload_date': '20220605', - 'format_note': 'uhd, main', - 'thumbnail': 'https://www.zdf.de/assets/saguaro-kakteen-102~3840x2160?cb=1655910690796', + 'title': 'Making-of Unser grüner Planet - Tropen', + 'description': 'md5:d7c6949dc7c75c73c4ad51c785fb0b79', + 'duration': 435.0, + 'timestamp': 1653811200, + 'upload_date': '20220529', + 'format_note': 'hd, main', + 'thumbnail': 'https://www.zdf.de/assets/unser-gruener-planet-making-of-1-tropen-100~3840x2160?cb=1653493335577', + 'episode': 'Making-of Unser grüner Planet - Tropen', + }, + 'skip': 'No longer available: "Leider kein Video verfügbar"', + }, { + 'url': 'https://www.zdf.de/serien/northern-lights/begegnung-auf-der-bruecke-100.html', + 'info_dict': { + 'id': '240319_2310_sendung_not', + 'ext': 'mp4', + 'title': 'Begegnung auf der Brücke', + 'description': 'md5:e53a555da87447f7f1207f10353f8e45', + 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/c5ff1d1f-f5c8-4468-86ac-1b2f1dbecc76?layout=2400x1350', + 'upload_date': '20250203', + 'duration': 3083.0, + 'timestamp': 1738546500, + 'series_id': '1d7a1879-01ee-4468-8237-c6b4ecd633c7', + 'series': 'Northern Lights', + 'season': 'Staffel 1', + 'season_number': 1, + 'season_id': '22ac26a2-4ea2-4055-ac0b-98b755cdf718', + 'episode': 'Begegnung auf der Brücke', + 'episode_number': 1, + 'episode_id': 'POS_71049438-024b-471f-b472-4fe2e490d1fb', }, }] @@ -316,12 +355,31 @@ def _extract_entry(self, url, player, content, video_id): 'timestamp': unified_timestamp(content.get('editorialDate')), 'thumbnails': thumbnails, 'chapters': chapters or None, + 'episode': title, + **traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', { + 'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}), + 'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}), + 'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}), + 'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}), + 'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode_id': ('contentId', {str}), + })), }) def _extract_regular(self, url, player, video_id): - content = self._call_api( - player['content'], video_id, 'content', player['apiToken'], url) - return self._extract_entry(player['content'], player, content, video_id) + player_url = player['content'] + + try: + content = self._call_api( + update_url_query(player_url, {'profile': 'player-3'}), + video_id, 'content', player['apiToken'], url) + except ExtractorError as e: + self.report_warning(f'{video_id}: {e.orig_msg}; retrying with v2 profile') + content = self._call_api( + player_url, video_id, 'content', player['apiToken'], url) + + return self._extract_entry(player_url, player, content, video_id) def _extract_mobile(self, video_id): video = self._download_v2_doc(video_id) From 4ca8c44a073d5aa3a3e3112c35b2b23d6ce25ac6 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 9 Feb 2025 22:37:23 +0100 Subject: [PATCH 07/25] [jsinterp] Improve zeroise (#12313) Authored by: seproDev --- test/test_jsinterp.py | 10 ++++++++++ test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index c1464f2cd..ed3ca61c4 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -93,6 +93,16 @@ def test_operators(self): self._test('function f(){return 0 ?? 42;}', 0) self._test('function f(){return "life, the universe and everything" < 42;}', False) self._test('function f(){return 0 - 7 * - 6;}', 42) + self._test('function f(){return true << "5";}', 32) + self._test('function f(){return true << true;}', 2) + self._test('function f(){return "19" & "21.9";}', 17) + self._test('function f(){return "19" & false;}', 0) + self._test('function f(){return "11.0" >> "2.1";}', 2) + self._test('function f(){return 5 ^ 9;}', 12) + self._test('function f(){return 0.0 << NaN}', 0) + self._test('function f(){return null << undefined}', 0) + # TODO: Does not work due to number too large + # self._test('function f(){return 21 << 4294967297}', 42) def test_array_access(self): self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7]) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 13436f088..7ae627f2c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -201,6 +201,10 @@ 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', ), + ( + 'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js', + 'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index bd6a47004..ac0629715 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -25,7 +25,7 @@ def zeroise(x): with contextlib.suppress(TypeError): if math.isnan(x): # NB: NaN cannot be checked by membership return 0 - return x + return int(float(x)) def wrapped(a, b): return op(zeroise(a), zeroise(b)) & 0xffffffff From 14cd7f3443c6da4d49edaefcc12da9dee86e243e Mon Sep 17 00:00:00 2001 From: Patrick Robertson Date: Mon, 10 Feb 2025 20:00:00 +0100 Subject: [PATCH 08/25] [ie/twitter] Fix syndication token generation (#12107) Authored by: pjrobertson, Grub4K Co-authored-by: Simon Sawicki --- yt_dlp/extractor/twitter.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index c05b5bf9c..d32ae3f18 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,11 +1,12 @@ import functools import json -import random +import math import re import urllib.parse from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE +from ..jsinterp import js_number_to_string from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -1330,6 +1331,11 @@ def _build_graphql_query(self, media_id): }, } + def _generate_syndication_token(self, twid): + # ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '') + translation = str.maketrans(dict.fromkeys('0.')) + return js_number_to_string((int(twid) / 1e15) * math.PI, 36).translate(translation) + def _call_syndication_api(self, twid): self.report_warning( 'Not all metadata or media is available via syndication endpoint', twid, only_once=True) @@ -1337,8 +1343,7 @@ def _call_syndication_api(self, twid): 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON', headers={'User-Agent': 'Googlebot'}, query={ 'id': twid, - # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '') - 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)), + 'token': self._generate_syndication_token(twid), }) if not status: raise ExtractorError('Syndication endpoint returned empty JSON response') From c987be0acb6872c6561f28aa28171e803393d851 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 10 Feb 2025 17:08:10 -0600 Subject: [PATCH 09/25] [fd/hls] Support `hls_media_playlist_data` format field (#12322) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/downloader/hls.py | 12 ++++++++---- yt_dlp/extractor/common.py | 5 +++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index b7b19cf6e..3293a9076 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -598,7 +598,7 @@ class YoutubeDL: # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', - 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'hls_media_playlist_data', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url', diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 7bd116d96..7a47f8f83 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,11 +72,15 @@ def check_results(): def real_download(self, filename, info_dict): man_url = info_dict['url'] - self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') - urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.url - s = urlh.read().decode('utf-8', 'ignore') + s = info_dict.get('hls_media_playlist_data') + if s: + self.to_screen(f'[{self.FD_NAME}] Using m3u8 manifest from extracted info') + else: + self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.url + s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 92ddad2b7..3e7734ce1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -201,6 +201,11 @@ class InfoExtractor: fragment_base_url * "duration" (optional, int or float) * "filesize" (optional, int) + * hls_media_playlist_data + The M3U8 media playlist data as a string. + Only use if the data must be modified during extraction and + the native HLS downloader should bypass requesting the URL. + Does not apply if ffmpeg is used as external downloader * is_from_start Is a live format that can be downloaded from the start. Boolean * preference Order number of this format. If this field is From 208163447408c78673b08c172beafe5c310fb167 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Mon, 10 Feb 2025 23:22:21 +0000 Subject: [PATCH 10/25] [test:download] Validate and sort info dict fields (#12299) Authored by: pzhlkj6612, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- test/helper.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/test/helper.py b/test/helper.py index c776e70b7..193019019 100644 --- a/test/helper.py +++ b/test/helper.py @@ -237,6 +237,20 @@ def sanitize(key, value): def expect_info_dict(self, got_dict, expected_dict): + ALLOWED_KEYS_SORT_ORDER = ( + # NB: Keep in sync with the docstring of extractor/common.py + 'id', 'ext', 'direct', 'display_id', 'title', 'alt_title', 'description', 'media_type', + 'uploader', 'uploader_id', 'uploader_url', 'channel', 'channel_id', 'channel_url', 'channel_is_verified', + 'channel_follower_count', 'comment_count', 'view_count', 'concurrent_view_count', + 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'age_limit', 'duration', 'thumbnail', 'heatmap', + 'chapters', 'chapter', 'chapter_number', 'chapter_id', 'start_time', 'end_time', 'section_start', 'section_end', + 'categories', 'tags', 'cast', 'composers', 'artists', 'album_artists', 'creators', 'genres', + 'track', 'track_number', 'track_id', 'album', 'album_type', 'disc_number', + 'series', 'series_id', 'season', 'season_number', 'season_id', 'episode', 'episode_number', 'episode_id', + 'timestamp', 'upload_date', 'release_timestamp', 'release_date', 'release_year', 'modified_timestamp', 'modified_date', + 'playable_in_embed', 'availability', 'live_status', 'location', 'license', '_old_archive_ids', + ) + expect_dict(self, got_dict, expected_dict) # Check for the presence of mandatory fields if got_dict.get('_type') not in ('playlist', 'multi_video'): @@ -252,7 +266,13 @@ def expect_info_dict(self, got_dict, expected_dict): test_info_dict = sanitize_got_info_dict(got_dict) - missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) + # Check for invalid/misspelled field names being returned by the extractor + invalid_keys = sorted(test_info_dict.keys() - ALLOWED_KEYS_SORT_ORDER) + self.assertFalse(invalid_keys, f'Invalid fields returned by the extractor: {", ".join(invalid_keys)}') + + missing_keys = sorted( + test_info_dict.keys() - expected_dict.keys(), + key=lambda x: ALLOWED_KEYS_SORT_ORDER.index(x)) if missing_keys: def _repr(v): if isinstance(v, str): From 4ecb833472c90e078567b561fb7c089f1aa9587b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 10 Feb 2025 18:40:21 -0600 Subject: [PATCH 11/25] [misc] Clarify that the issue template cannot be removed (#12332) Fix 517ddf3c3f12560ab93e3d36244dc82db9f97818 Authored by: bashonly --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 +- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 2 +- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/6_question.yml | 2 +- devscripts/make_issue_template.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index fd7591ba2..c8d3de06b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 111f423d0..a9564c0c2 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index fd74be331..6e2380fae 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index e9ec45e14..6fc523be0 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 9c7d66d81..57a33bb71 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: checkboxes id: checklist attributes: diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 186f73a4a..28ec7cbe0 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -6,7 +6,7 @@ body: attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. - type: markdown attributes: value: | diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 681ebc51c..110fcc245 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -56,7 +56,7 @@ attributes: value: | > [!IMPORTANT] - > Not providing the required (*) information will result in your issue being closed and ignored. + > Not providing the required (*) information or removing the template will result in your issue being closed and ignored. '''.strip() From f53553087d3fde9dcd61d6e9f98caf09db1d8ef2 Mon Sep 17 00:00:00 2001 From: Laurent FAVOLE Date: Tue, 11 Feb 2025 21:04:20 +0100 Subject: [PATCH 12/25] [ie/Digiview] Add extractor (#9902) Authored by: lfavole --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/digiview.py | 130 ++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 yt_dlp/extractor/digiview.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c331bab78..c937dfe13 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -508,6 +508,7 @@ from .dhm import DHMIE from .digitalconcerthall import DigitalConcertHallIE from .digiteka import DigitekaIE +from .digiview import DigiviewIE from .discogs import DiscogsReleasePlaylistIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE diff --git a/yt_dlp/extractor/digiview.py b/yt_dlp/extractor/digiview.py new file mode 100644 index 000000000..f7f23864d --- /dev/null +++ b/yt_dlp/extractor/digiview.py @@ -0,0 +1,130 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import clean_html, int_or_none, traverse_obj, url_or_none, urlencode_postdata + + +class DigiviewIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ladigitale\.dev/digiview/#/v/(?P[0-9a-f]+)' + _TESTS = [{ + # normal video + 'url': 'https://ladigitale.dev/digiview/#/v/67a8e50aee2ec', + 'info_dict': { + 'id': '67a8e50aee2ec', + 'ext': 'mp4', + 'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film', + 'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg', + 'upload_date': '20141110', + 'playable_in_embed': True, + 'duration': 635, + 'view_count': int, + 'comment_count': int, + 'channel': 'Blender', + 'license': 'Creative Commons Attribution license (reuse allowed)', + 'like_count': int, + 'tags': 'count:8', + 'live_status': 'not_live', + 'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg', + 'channel_follower_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg', + 'uploader_id': '@BlenderOfficial', + 'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0', + 'categories': ['Film & Animation'], + 'channel_is_verified': True, + 'heatmap': 'count:100', + 'section_end': 635, + 'uploader': 'Blender', + 'timestamp': 1415628355, + 'uploader_url': 'https://www.youtube.com/@BlenderOfficial', + 'age_limit': 0, + 'section_start': 0, + 'availability': 'public', + }, + }, { + # cut video + 'url': 'https://ladigitale.dev/digiview/#/v/67a8e51d0dd58', + 'info_dict': { + 'id': '67a8e51d0dd58', + 'ext': 'mp4', + 'title': 'Big Buck Bunny 60fps 4K - Official Blender Foundation Short Film', + 'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg', + 'upload_date': '20141110', + 'playable_in_embed': True, + 'duration': 5, + 'view_count': int, + 'comment_count': int, + 'channel': 'Blender', + 'license': 'Creative Commons Attribution license (reuse allowed)', + 'like_count': int, + 'tags': 'count:8', + 'live_status': 'not_live', + 'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg', + 'channel_follower_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg', + 'uploader_id': '@BlenderOfficial', + 'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0', + 'categories': ['Film & Animation'], + 'channel_is_verified': True, + 'heatmap': 'count:100', + 'section_end': 10, + 'uploader': 'Blender', + 'timestamp': 1415628355, + 'uploader_url': 'https://www.youtube.com/@BlenderOfficial', + 'age_limit': 0, + 'section_start': 5, + 'availability': 'public', + }, + }, { + # changed title + 'url': 'https://ladigitale.dev/digiview/#/v/67a8ea5644d7a', + 'info_dict': { + 'id': '67a8ea5644d7a', + 'ext': 'mp4', + 'title': 'Big Buck Bunny (with title changed)', + 'thumbnail': 'https://i.ytimg.com/vi/aqz-KE-bpKQ/hqdefault.jpg', + 'upload_date': '20141110', + 'playable_in_embed': True, + 'duration': 5, + 'view_count': int, + 'comment_count': int, + 'channel': 'Blender', + 'license': 'Creative Commons Attribution license (reuse allowed)', + 'like_count': int, + 'tags': 'count:8', + 'live_status': 'not_live', + 'channel_id': 'UCSMOQeBJ2RAnuFungnQOxLg', + 'channel_follower_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCSMOQeBJ2RAnuFungnQOxLg', + 'uploader_id': '@BlenderOfficial', + 'description': 'md5:8f3ed18a53a1bb36cbb3b70a15782fd0', + 'categories': ['Film & Animation'], + 'channel_is_verified': True, + 'heatmap': 'count:100', + 'section_end': 15, + 'uploader': 'Blender', + 'timestamp': 1415628355, + 'uploader_url': 'https://www.youtube.com/@BlenderOfficial', + 'age_limit': 0, + 'section_start': 10, + 'availability': 'public', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + 'https://ladigitale.dev/digiview/inc/recuperer_video.php', video_id, + data=urlencode_postdata({'id': video_id})) + + clip_id = video_data['videoId'] + return self.url_result( + f'https://www.youtube.com/watch?v={clip_id}', + YoutubeIE, video_id, url_transparent=True, + **traverse_obj(video_data, { + 'section_start': ('debut', {int_or_none}), + 'section_end': ('fin', {int_or_none}), + 'description': ('description', {clean_html}, filter), + 'title': ('titre', {str}), + 'thumbnail': ('vignette', {url_or_none}), + 'view_count': ('vues', {int_or_none}), + }), + ) From 6ca23ffaa4663cb552f937f0b1e9769b66db11bd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 11 Feb 2025 15:32:25 -0600 Subject: [PATCH 13/25] [ie/reddit] Bypass gated subreddit warning (#12335) Closes #12331 Authored by: bashonly --- yt_dlp/extractor/reddit.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 7325e547b..d5150fd8f 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -198,6 +198,25 @@ class RedditIE(InfoExtractor): 'skip_download': True, 'writesubtitles': True, }, + }, { + # "gated" subreddit post + 'url': 'https://old.reddit.com/r/ketamine/comments/degtjo/when_the_k_hits/', + 'info_dict': { + 'id': 'gqsbxts133r31', + 'ext': 'mp4', + 'display_id': 'degtjo', + 'title': 'When the K hits', + 'uploader': '[deleted]', + 'channel_id': 'ketamine', + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'age_limit': 18, + 'duration': 34, + 'thumbnail': r're:https?://.+/.+\.(?:jpg|png)', + 'timestamp': 1570438713.0, + 'upload_date': '20191007', + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -245,6 +264,15 @@ def _perform_login(self, username, password): elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): raise ExtractorError('Unable to login, no cookie was returned') + def _real_initialize(self): + # Set cookie to opt-in to age-restricted subreddits + self._set_cookie('reddit.com', 'over18', '1') + # Set cookie to opt-in to "gated" subreddits + options = traverse_obj(self._get_cookies('https://www.reddit.com/'), ( + '_options', 'value', {urllib.parse.unquote}, {json.loads}, {dict})) or {} + options['pref_gated_sr_optin'] = True + self._set_cookie('reddit.com', '_options', urllib.parse.quote(json.dumps(options))) + def _get_subtitles(self, video_id): # Fallback if there were no subtitles provided by DASH or HLS manifests caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt' From e7882b682b959e476d8454911655b3e9b14c79b2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 18:19:02 -0600 Subject: [PATCH 14/25] [ie/3sat] Fix extractor (#12403) Fix 241ace4f104d50fdf7638f9203927aefcf57a1f7 Closes #12391 Authored by: bashonly --- yt_dlp/extractor/dreisat.py | 31 ++++- yt_dlp/extractor/zdf.py | 230 ++++++++++++++++++------------------ 2 files changed, 142 insertions(+), 119 deletions(-) diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 4b0a269b9..376ff672d 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,10 +1,24 @@ -from .zdf import ZDFIE +from .zdf import ZDFBaseIE -class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE +class DreiSatIE(ZDFBaseIE): IE_NAME = '3sat' _VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' _TESTS = [{ + 'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html', + 'info_dict': { + 'id': '231124_traumziele_philippinen_und_vietnam_dokreise', + 'ext': 'mp4', + 'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam', + 'description': 'md5:26329ce5197775b596773b939354079d', + 'duration': 2625.0, + 'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148', + 'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam', + 'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95', + 'timestamp': 1738593000, + 'upload_date': '20250203', + }, + }, { # Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html 'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html', 'md5': '0aff3e7bc72c8813f5e0fae333316a1d', @@ -17,6 +31,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE 'timestamp': 1608604200, 'upload_date': '20201222', }, + 'skip': '410 Gone', }, { 'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html', 'info_dict': { @@ -30,6 +45,7 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE 'params': { 'skip_download': True, }, + 'skip': '404 Not Found', }, { # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', @@ -39,3 +55,14 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE 'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html', 'only_matching': True, }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id, fatal=False) + if webpage: + player = self._extract_player(webpage, url, fatal=False) + if player: + return self._extract_regular(url, player, video_id) + + return self._extract_mobile(video_id) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index b64a88f6c..4bef1017c 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -137,6 +137,116 @@ def _extract_player(self, webpage, video_id, fatal=True): group='json'), video_id) + def _extract_entry(self, url, player, content, video_id): + title = content.get('title') or content['teaserHeadline'] + + t = content['mainVideoContent']['http://zdf.de/rels/target'] + ptmd_path = traverse_obj(t, ( + (('streams', 'default'), None), + ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), + ), get_all=False) + if not ptmd_path: + raise ExtractorError('Could not extract ptmd_path') + + info = self._extract_ptmd( + urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url) + + thumbnails = [] + layouts = try_get( + content, lambda x: x['teaserImageRef']['layouts'], dict) + if layouts: + for layout_key, layout_url in layouts.items(): + layout_url = url_or_none(layout_url) + if not layout_url: + continue + thumbnail = { + 'url': layout_url, + 'format_id': layout_key, + } + mobj = re.search(r'(?P\d+)x(?P\d+)', layout_key) + if mobj: + thumbnail.update({ + 'width': int(mobj.group('width')), + 'height': int(mobj.group('height')), + }) + thumbnails.append(thumbnail) + + chapter_marks = t.get('streamAnchorTag') or [] + chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))}) + chapters = [{ + 'start_time': chap.get('anchorOffset'), + 'end_time': next_chap.get('anchorOffset'), + 'title': chap.get('anchorLabel'), + } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] + + return merge_dicts(info, { + 'title': title, + 'description': content.get('leadParagraph') or content.get('teasertext'), + 'duration': int_or_none(t.get('duration')), + 'timestamp': unified_timestamp(content.get('editorialDate')), + 'thumbnails': thumbnails, + 'chapters': chapters or None, + 'episode': title, + **traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', { + 'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}), + 'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}), + 'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}), + 'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}), + 'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode_id': ('contentId', {str}), + })), + }) + + def _extract_regular(self, url, player, video_id, query=None): + player_url = player['content'] + + content = self._call_api( + update_url_query(player_url, query), + video_id, 'content', player['apiToken'], url) + + return self._extract_entry(player_url, player, content, video_id) + + def _extract_mobile(self, video_id): + video = self._download_v2_doc(video_id) + + formats = [] + formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) + document = formitaeten and video['document'] + if formitaeten: + title = document['titel'] + content_id = document['basename'] + + format_urls = set() + for f in formitaeten or []: + self._extract_format(content_id, formats, format_urls, f) + + thumbnails = [] + teaser_bild = document.get('teaserBild') + if isinstance(teaser_bild, dict): + for thumbnail_key, thumbnail in teaser_bild.items(): + thumbnail_url = try_get( + thumbnail, lambda x: x['url'], str) + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + 'id': thumbnail_key, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + return { + 'id': content_id, + 'title': title, + 'description': document.get('beschreibung'), + 'duration': int_or_none(document.get('length')), + 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( + try_get(video, lambda x: x['meta']['editorialDate'], str)), + 'thumbnails': thumbnails, + 'subtitles': self._extract_subtitles(document), + 'formats': formats, + } + class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P[^/?#&]+)\.html' @@ -306,121 +416,6 @@ class ZDFIE(ZDFBaseIE): }, }] - def _extract_entry(self, url, player, content, video_id): - title = content.get('title') or content['teaserHeadline'] - - t = content['mainVideoContent']['http://zdf.de/rels/target'] - ptmd_path = traverse_obj(t, ( - (('streams', 'default'), None), - ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), - ), get_all=False) - if not ptmd_path: - raise ExtractorError('Could not extract ptmd_path') - - info = self._extract_ptmd( - urljoin(url, ptmd_path.replace('{playerId}', 'android_native_5')), video_id, player['apiToken'], url) - - thumbnails = [] - layouts = try_get( - content, lambda x: x['teaserImageRef']['layouts'], dict) - if layouts: - for layout_key, layout_url in layouts.items(): - layout_url = url_or_none(layout_url) - if not layout_url: - continue - thumbnail = { - 'url': layout_url, - 'format_id': layout_key, - } - mobj = re.search(r'(?P\d+)x(?P\d+)', layout_key) - if mobj: - thumbnail.update({ - 'width': int(mobj.group('width')), - 'height': int(mobj.group('height')), - }) - thumbnails.append(thumbnail) - - chapter_marks = t.get('streamAnchorTag') or [] - chapter_marks.append({'anchorOffset': int_or_none(t.get('duration'))}) - chapters = [{ - 'start_time': chap.get('anchorOffset'), - 'end_time': next_chap.get('anchorOffset'), - 'title': chap.get('anchorLabel'), - } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] - - return merge_dicts(info, { - 'title': title, - 'description': content.get('leadParagraph') or content.get('teasertext'), - 'duration': int_or_none(t.get('duration')), - 'timestamp': unified_timestamp(content.get('editorialDate')), - 'thumbnails': thumbnails, - 'chapters': chapters or None, - 'episode': title, - **traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', { - 'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}), - 'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}), - 'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}), - 'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}), - 'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}), - 'episode_number': ('episodeNumber', {int_or_none}), - 'episode_id': ('contentId', {str}), - })), - }) - - def _extract_regular(self, url, player, video_id): - player_url = player['content'] - - try: - content = self._call_api( - update_url_query(player_url, {'profile': 'player-3'}), - video_id, 'content', player['apiToken'], url) - except ExtractorError as e: - self.report_warning(f'{video_id}: {e.orig_msg}; retrying with v2 profile') - content = self._call_api( - player_url, video_id, 'content', player['apiToken'], url) - - return self._extract_entry(player_url, player, content, video_id) - - def _extract_mobile(self, video_id): - video = self._download_v2_doc(video_id) - - formats = [] - formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list) - document = formitaeten and video['document'] - if formitaeten: - title = document['titel'] - content_id = document['basename'] - - format_urls = set() - for f in formitaeten or []: - self._extract_format(content_id, formats, format_urls, f) - - thumbnails = [] - teaser_bild = document.get('teaserBild') - if isinstance(teaser_bild, dict): - for thumbnail_key, thumbnail in teaser_bild.items(): - thumbnail_url = try_get( - thumbnail, lambda x: x['url'], str) - if thumbnail_url: - thumbnails.append({ - 'url': thumbnail_url, - 'id': thumbnail_key, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - return { - 'id': content_id, - 'title': title, - 'description': document.get('beschreibung'), - 'duration': int_or_none(document.get('length')), - 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( - try_get(video, lambda x: x['meta']['editorialDate'], str)), - 'thumbnails': thumbnails, - 'subtitles': self._extract_subtitles(document), - 'formats': formats, - } - def _real_extract(self, url): video_id = self._match_id(url) @@ -428,7 +423,7 @@ def _real_extract(self, url): if webpage: player = self._extract_player(webpage, url, fatal=False) if player: - return self._extract_regular(url, player, video_id) + return self._extract_regular(url, player, video_id, query={'profile': 'player-3'}) return self._extract_mobile(video_id) @@ -474,7 +469,8 @@ def _extract_entry(self, entry): 'title': ('titel', {str}), 'description': ('beschreibung', {str}), 'duration': ('length', {float_or_none}), - # TODO: seasonNumber and episodeNumber can be extracted but need to also be in ZDFIE + 'season_number': ('seasonNumber', {int_or_none}), + 'episode_number': ('episodeNumber', {int_or_none}), })) def _entries(self, data, document_id): From ec17fb16e8d69d4e3e10fb73bf3221be8570dfee Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 18:24:12 -0600 Subject: [PATCH 15/25] [ie/youtube] nsig workaround for `tce` player JS (#12401) Closes #12398 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 738ade1b0..cf8f6ef39 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3135,6 +3135,11 @@ def _extract_player_url(self, *ytcfgs, webpage=None): get_all=False, expected_type=str) if not player_url: return + # TODO: Add proper support for the 'tce' variant players + # See https://github.com/yt-dlp/yt-dlp/issues/12398 + if '/player_ias_tce.vflset/' in player_url: + self.write_debug(f'Modifying tce player URL: {player_url}') + player_url = player_url.replace('/player_ias_tce.vflset/', '/player_ias.vflset/') return urljoin('https://www.youtube.com', player_url) def _download_player_url(self, video_id, fatal=False): @@ -3369,7 +3374,7 @@ def _fixup_n_function_code(self, argnames, code): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2025.02.19') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) From 5c4c2ddfaa47988b4d50c1ad4988badc0b4f30c2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 18:28:34 -0600 Subject: [PATCH 16/25] [ie/francetvinfo.fr] Fix extractor (#12402) Closes #12366 Authored by: bashonly --- yt_dlp/extractor/francetv.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index c6036b306..5c9f8e36d 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -9,6 +9,7 @@ ExtractorError, clean_html, determine_ext, + extract_attributes, filter_dict, format_field, int_or_none, @@ -18,7 +19,7 @@ unsmuggle_url, url_or_none, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import find_element, traverse_obj class FranceTVBaseInfoExtractor(InfoExtractor): @@ -460,11 +461,16 @@ def _real_extract(self, url): self.url_result(dailymotion_url, DailymotionIE.ie_key()) for dailymotion_url in dailymotion_urls]) - video_id = self._search_regex( - (r'player\.load[^;]+src:\s*["\']([^"\']+)', - r'id-video=([^@]+@[^"]+)', - r']+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', - r'(?:data-id|]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', + r'(?:data-id| Date: Wed, 19 Feb 2025 13:39:51 +1300 Subject: [PATCH 17/25] [ie/youtube] Retry on more critical requests (#12339) Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index cf8f6ef39..4dc80fe28 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -857,6 +857,18 @@ def generate_api_headers( } return filter_dict(headers) + def _download_webpage_with_retries(self, *args, retry_fatal=False, retry_on_status=None, **kwargs): + for retry in self.RetryManager(fatal=retry_fatal): + try: + return self._download_webpage(*args, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, network_exceptions): + if not isinstance(e.cause, HTTPError) or e.cause.status not in (retry_on_status or (403, 429)): + retry.error = e + continue + self._error_or_warning(e, fatal=retry_fatal) + break + def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', @@ -866,8 +878,8 @@ def _download_ytcfg(self, client, video_id): }.get(client) if not url: return {} - webpage = self._download_webpage( - url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', + webpage = self._download_webpage_with_retries( + url, video_id, note=f'Downloading {client.replace("_", " ").strip()} client config', headers=traverse_obj(self._get_default_ytcfg(client), { 'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}), })) @@ -3143,12 +3155,14 @@ def _extract_player_url(self, *ytcfgs, webpage=None): return urljoin('https://www.youtube.com', player_url) def _download_player_url(self, video_id, fatal=False): - res = self._download_webpage( + iframe_webpage = self._download_webpage_with_retries( 'https://www.youtube.com/iframe_api', - note='Downloading iframe API JS', video_id=video_id, fatal=fatal) - if res: + note='Downloading iframe API JS', + video_id=video_id, retry_fatal=fatal) + + if iframe_webpage: player_version = self._search_regex( - r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal) + r'player\\?/([0-9a-fA-F]{8})\\?/', iframe_webpage, 'player version', fatal=fatal) if player_version: return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js' @@ -4561,8 +4575,7 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): pp = self._configuration_arg('player_params', [None], casesense=True)[0] if pp: query['pp'] = pp - webpage = self._download_webpage( - webpage_url, video_id, fatal=False, query=query) + webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() From 5271ef48c6f61c145e03e18e960995d2e651d205 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 20:20:50 -0600 Subject: [PATCH 18/25] [ie/gem.cbc.ca] Fix extractors (#12404) Does not fix broken login support Closes #11848 Authored by: bashonly, dirkf Co-authored-by: dirkf --- yt_dlp/extractor/cbc.py | 180 +++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 85 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index c0cf3da3d..e63c608fc 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -14,16 +14,18 @@ js_to_json, mimetype2ext, orderedSet, + parse_age_limit, parse_iso8601, replace_extension, smuggle_url, strip_or_none, - traverse_obj, try_get, + unified_timestamp, update_url, url_basename, url_or_none, ) +from ..utils.traversal import require, traverse_obj, trim_str class CBCIE(InfoExtractor): @@ -516,9 +518,43 @@ def entries(): return self.playlist_result(entries(), playlist_id) -class CBCGemIE(InfoExtractor): +class CBCGemBaseIE(InfoExtractor): + _NETRC_MACHINE = 'cbcgem' + _GEO_COUNTRIES = ['CA'] + + def _call_show_api(self, item_id, display_id=None): + return self._download_json( + f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}', + display_id or item_id, query={'device': 'web'}) + + def _extract_item_info(self, item_info): + episode_number = None + title = traverse_obj(item_info, ('title', {str})) + if title and (mobj := re.match(r'(?P\d+)\. (?P.+)', title)): + episode_number = int_or_none(mobj.group('episode')) + title = mobj.group('title') + + return { + 'episode_number': episode_number, + **traverse_obj(item_info, { + 'id': ('url', {str}), + 'episode_id': ('url', {str}), + 'description': ('description', {str}), + 'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'duration': ('metadata', 'duration', {int_or_none}), + 'release_timestamp': ('metadata', 'airDate', {unified_timestamp}), + 'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}), + 'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}), + }), + 'episode': title, + 'title': title, + } + + +class CBCGemIE(CBCGemBaseIE): IE_NAME = 'gem.cbc.ca' - _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' + _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)' _TESTS = [{ # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', @@ -529,7 +565,7 @@ class CBCGemIE(InfoExtractor): 'description': 'md5:929868d20021c924020641769eb3e7f1', 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg', 'duration': 1324, - 'categories': ['comedy'], + 'genres': ['Comédie et humour'], 'series': 'Schitt\'s Creek', 'season': 'Season 6', 'season_number': 6, @@ -537,9 +573,10 @@ class CBCGemIE(InfoExtractor): 'episode_number': 1, 'episode_id': 'schitts-creek/s06e01', 'upload_date': '20210618', - 'timestamp': 1623988800, + 'timestamp': 1623974400, 'release_date': '20200107', - 'release_timestamp': 1578427200, + 'release_timestamp': 1578355200, + 'age_limit': 14, }, 'params': {'format': 'bv'}, }, { @@ -557,12 +594,13 @@ class CBCGemIE(InfoExtractor): 'episode_number': 1, 'episode': 'The Cup Runneth Over', 'episode_id': 'schitts-creek/s01e01', - 'duration': 1309, - 'categories': ['comedy'], + 'duration': 1308, + 'genres': ['Comédie et humour'], 'upload_date': '20210617', - 'timestamp': 1623902400, - 'release_date': '20151124', - 'release_timestamp': 1448323200, + 'timestamp': 1623888000, + 'release_date': '20151123', + 'release_timestamp': 1448236800, + 'age_limit': 14, }, 'params': {'format': 'bv'}, }, { @@ -570,9 +608,7 @@ class CBCGemIE(InfoExtractor): 'only_matching': True, }] - _GEO_COUNTRIES = ['CA'] _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' - _NETRC_MACHINE = 'cbcgem' _claims_token = None def _new_claims_token(self, email, password): @@ -634,10 +670,12 @@ def _real_initialize(self): self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') def _real_extract(self, url): - video_id = self._match_id(url) - video_info = self._download_json( - f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}', - video_id, expected_status=426) + video_id, season_number = self._match_valid_url(url).group('id', 'season') + video_info = self._call_show_api(video_id) + item_info = traverse_obj(video_info, ( + 'content', ..., 'lineups', ..., 'items', + lambda _, v: v['url'] == video_id, any, {require('item info')})) + media_id = item_info['idMedia'] email, password = self._get_login_info() if email and password: @@ -645,7 +683,20 @@ def _real_extract(self, url): headers = {'x-claims-token': claims_token} else: headers = {} - m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) + + m3u8_info = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/', + video_id, headers=headers, query={ + 'appCode': 'gem', + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + 'output': 'json', + 'tech': 'hls', + 'manifestVersion': '2', + 'manifestType': 'desktop', + 'idMedia': media_id, + }) if m3u8_info.get('errorCode') == 1: self.raise_geo_restricted(countries=['CA']) @@ -671,26 +722,20 @@ def _real_extract(self, url): fmt['preference'] = -2 return { + 'season_number': int_or_none(season_number), + **traverse_obj(video_info, { + 'series': ('title', {str}), + 'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}), + 'genres': ('structuredMetadata', 'genre', ..., {str}), + }), + **self._extract_item_info(item_info), 'id': video_id, 'episode_id': video_id, 'formats': formats, - **traverse_obj(video_info, { - 'title': ('title', {str}), - 'episode': ('title', {str}), - 'description': ('description', {str}), - 'thumbnail': ('image', {url_or_none}), - 'series': ('series', {str}), - 'season_number': ('season', {int_or_none}), - 'episode_number': ('episode', {int_or_none}), - 'duration': ('duration', {int_or_none}), - 'categories': ('category', {str}, all), - 'release_timestamp': ('airDate', {int_or_none(scale=1000)}), - 'timestamp': ('availableDate', {int_or_none(scale=1000)}), - }), } -class CBCGemPlaylistIE(InfoExtractor): +class CBCGemPlaylistIE(CBCGemBaseIE): IE_NAME = 'gem.cbc.ca:playlist' _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)' _TESTS = [{ @@ -700,70 +745,35 @@ class CBCGemPlaylistIE(InfoExtractor): 'info_dict': { 'id': 'schitts-creek/s06', 'title': 'Season 6', - 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', 'series': 'Schitt\'s Creek', 'season_number': 6, 'season': 'Season 6', - 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75', }, }, { 'url': 'https://gem.cbc.ca/schitts-creek/s06', 'only_matching': True, }] - _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' + + def _entries(self, season_info): + for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])): + yield self.url_result( + f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE, + **self._extract_item_info(episode)) def _real_extract(self, url): - match = self._match_valid_url(url) - season_id = match.group('id') - show = match.group('show') - show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426) - season = int(match.group('season')) + season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season') + show_info = self._call_show_api(show, display_id=season_id) + season_info = traverse_obj(show_info, ( + 'content', ..., 'lineups', + lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')})) - season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) - - if season_info is None: - raise ExtractorError(f'Couldn\'t find season {season} of {show}') - - episodes = [] - for episode in season_info['assets']: - episodes.append({ - '_type': 'url_transparent', - 'ie_key': 'CBCGem', - 'url': 'https://gem.cbc.ca/media/' + episode['id'], - 'id': episode['id'], - 'title': episode.get('title'), - 'description': episode.get('description'), - 'thumbnail': episode.get('image'), - 'series': episode.get('series'), - 'season_number': episode.get('season'), - 'season': season_info['title'], - 'season_id': season_info.get('id'), - 'episode_number': episode.get('episode'), - 'episode': episode.get('title'), - 'episode_id': episode['id'], - 'duration': episode.get('duration'), - 'categories': [episode.get('category')], - }) - - thumbnail = None - tn_uri = season_info.get('image') - # the-national was observed to use a "data:image/png;base64" - # URI for their 'image' value. The image was 1x1, and is - # probably just a placeholder, so it is ignored. - if tn_uri is not None and not tn_uri.startswith('data:'): - thumbnail = tn_uri - - return { - '_type': 'playlist', - 'entries': episodes, - 'id': season_id, - 'title': season_info['title'], - 'description': season_info.get('description'), - 'thumbnail': thumbnail, - 'series': show_info.get('title'), - 'season_number': season_info.get('season'), - 'season': season_info['title'], - } + return self.playlist_result( + self._entries(season_info), season_id, + **traverse_obj(season_info, { + 'title': ('title', {str}), + 'season': ('title', {str}), + 'season_number': ('seasonNumber', {int_or_none}), + }), series=traverse_obj(show_info, ('title', {str}))) class CBCGemLiveIE(InfoExtractor): From be69468752ff598cacee57bb80533deab2367a5d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 20:23:42 -0600 Subject: [PATCH 19/25] [fd/hls] Support `--write-pages` for m3u8 media playlists (#12333) Authored by: bashonly --- yt_dlp/downloader/hls.py | 11 ++++++++++- yt_dlp/extractor/common.py | 28 +++++++--------------------- yt_dlp/utils/_utils.py | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 7a47f8f83..1f36a07f5 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -16,6 +16,7 @@ update_url_query, urljoin, ) +from ..utils._utils import _request_dump_filename class HlsFD(FragmentFD): @@ -80,7 +81,15 @@ def real_download(self, filename, info_dict): self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url - s = urlh.read().decode('utf-8', 'ignore') + s_bytes = urlh.read() + if self.params.get('write_pages'): + dump_filename = _request_dump_filename( + man_url, info_dict['id'], None, + trim_length=self.params.get('trim_file_name')) + self.to_screen(f'[{self.FD_NAME}] Saving request to {dump_filename}') + with open(dump_filename, 'wb') as outf: + outf.write(s_bytes) + s = s_bytes.decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3e7734ce1..8d199b353 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2,7 +2,6 @@ import collections import functools import getpass -import hashlib import http.client import http.cookiejar import http.cookies @@ -78,7 +77,6 @@ parse_iso8601, parse_m3u8_attributes, parse_resolution, - sanitize_filename, sanitize_url, smuggle_url, str_or_none, @@ -100,6 +98,7 @@ xpath_text, xpath_with_ns, ) +from ..utils._utils import _request_dump_filename class InfoExtractor: @@ -1022,23 +1021,6 @@ def __check_blocked(self, content): 'Visit http://blocklist.rkn.gov.ru/ for a block reason.', expected=True) - def _request_dump_filename(self, url, video_id, data=None): - if data is not None: - data = hashlib.md5(data).hexdigest() - basen = join_nonempty(video_id, data, url, delim='_') - trim_length = self.get_param('trim_file_name') or 240 - if len(basen) > trim_length: - h = '___' + hashlib.md5(basen.encode()).hexdigest() - basen = basen[:trim_length - len(h)] + h - filename = sanitize_filename(f'{basen}.dump', restricted=True) - # Working around MAX_PATH limitation on Windows (see - # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) - if os.name == 'nt': - absfilepath = os.path.abspath(filename) - if len(absfilepath) > 259: - filename = fR'\\?\{absfilepath}' - return filename - def __decode_webpage(self, webpage_bytes, encoding, headers): if not encoding: encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes) @@ -1067,7 +1049,9 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno if self.get_param('write_pages'): if isinstance(url_or_request, Request): data = self._create_request(url_or_request, data).data - filename = self._request_dump_filename(urlh.url, video_id, data) + filename = _request_dump_filename( + urlh.url, video_id, data, + trim_length=self.get_param('trim_file_name')) self.to_screen(f'Saving request to {filename}') with open(filename, 'wb') as outf: outf.write(webpage_bytes) @@ -1128,7 +1112,9 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, impersonate=None, require_impersonation=False): if self.get_param('load_pages'): url_or_request = self._create_request(url_or_request, data, headers, query) - filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data) + filename = _request_dump_filename( + url_or_request.url, video_id, url_or_request.data, + trim_length=self.get_param('trim_file_name')) self.to_screen(f'Loading request from {filename}') try: with open(filename, 'rb') as dumpf: diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index a71a381e5..c6a90a0dd 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -5631,6 +5631,24 @@ def filesize_from_tbr(tbr, duration): return int(duration * tbr * (1000 / 8)) +def _request_dump_filename(url, video_id, data=None, trim_length=None): + if data is not None: + data = hashlib.md5(data).hexdigest() + basen = join_nonempty(video_id, data, url, delim='_') + trim_length = trim_length or 240 + if len(basen) > trim_length: + h = '___' + hashlib.md5(basen.encode()).hexdigest() + basen = basen[:trim_length - len(h)] + h + filename = sanitize_filename(f'{basen}.dump', restricted=True) + # Working around MAX_PATH limitation on Windows (see + # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) + if os.name == 'nt': + absfilepath = os.path.abspath(filename) + if len(absfilepath) > 259: + filename = fR'\\?\{absfilepath}' + return filename + + # XXX: Temporary class _YDLLogger: def __init__(self, ydl=None): From 01a63629a21781458dcbd38779898e117678f5ff Mon Sep 17 00:00:00 2001 From: sepro <sepro@sepr0.com> Date: Wed, 19 Feb 2025 03:27:49 +0100 Subject: [PATCH 20/25] [docs] Add note to `supportedsites.md` (#12382) Authored by: seproDev --- devscripts/make_supportedsites.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 01548ef97..145f6d47f 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -10,10 +10,21 @@ from devscripts.utils import get_filename_args, write_file from yt_dlp.extractor import list_extractor_classes +TEMPLATE = '''\ +# Supported sites + +Below is a list of all extractors that are currently included with yt-dlp. +If a site is not listed here, it might still be supported by yt-dlp's embed extraction or generic extractor. +Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them. +The only reliable way to check if a site is supported is to try it. + +{ie_list} +''' + def main(): out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) - write_file(get_filename_args(), f'# Supported sites\n{out}\n') + write_file(get_filename_args(), TEMPLATE.format(ie_list=out)) if __name__ == '__main__': From 4985a4041770eaa0016271809a1fd950dc809a55 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 18 Feb 2025 20:29:29 -0600 Subject: [PATCH 21/25] [cleanup] Misc (#12238) Authored by: StefanLobbenmeier, dirkf, Grub4K Co-authored-by: Stefan Lobbenmeier <Stefan.Lobbenmeier@gmail.com> Co-authored-by: dirkf <fieldhouse@gmx.net> Co-authored-by: Simon Sawicki <contact@grub4k.xyz> --- .github/workflows/codeql.yml | 6 +++--- README.md | 2 +- devscripts/run_tests.py | 3 ++- yt_dlp/__init__.py | 10 ++++++---- yt_dlp/extractor/pbs.py | 2 +- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 170a6ac19..910c409ef 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -33,7 +33,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -47,7 +47,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -60,6 +60,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" diff --git a/README.md b/README.md index 0ac27c462..e8ef1980a 100644 --- a/README.md +++ b/README.md @@ -1525,7 +1525,7 @@ ## Sorting Formats - `hasvid`: Gives priority to formats that have a video stream - `hasaud`: Gives priority to formats that have an audio stream - `ie_pref`: The format preference - - `lang`: The language preference + - `lang`: The language preference as determined by the extractor (e.g. original language preferred over audio description) - `quality`: The quality of the format - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index eb614fe59..ebb3500b6 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -25,7 +25,8 @@ def parse_args(): def run_tests(*tests, pattern=None, ci=False): - run_core = 'core' in tests or (not pattern and not tests) + # XXX: hatch uses `tests` if no arguments are passed + run_core = 'core' in tests or 'tests' in tests or (not pattern and not tests) run_download = 'download' in tests pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index c76fe2748..3819656e2 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -295,18 +295,20 @@ def parse_sleep_func(expr): raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes - def validate_bytes(name, value): + def validate_bytes(name, value, strict_positive=False): if value is None: return None numeric_limit = parse_bytes(value) - validate(numeric_limit is not None, 'rate limit', value) + validate(numeric_limit is not None, name, value) + if strict_positive: + validate_positive(name, numeric_limit, True) return numeric_limit - opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit, True) opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) - opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize, True) opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) # Output templates diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 2f839a2e9..53b199415 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -501,7 +501,7 @@ def _extract_webpage(self, url): r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'", # frontline video embed r'class="coveplayerid">([^<]+)<', # coveplayer r'<section[^>]+data-coveid="(\d+)"', # coveplayer from http://www.pbs.org/wgbh/frontline/film/real-csi/ - r'\bclass="passportcoveplayer"[^>]+\bdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/ + r'\sclass="passportcoveplayer"[^>]*\sdata-media="(\d+)', # https://www.thirteen.org/programs/the-woodwrights-shop/who-wrote-the-book-of-sloyd-fggvvq/ r'<input type="hidden" id="pbs_video_id_[0-9]+" value="([0-9]+)"/>', # jwplayer r"(?s)window\.PBS\.playerConfig\s*=\s*{.*?id\s*:\s*'([0-9]+)',", r'<div[^>]+\bdata-cove-id=["\'](\d+)"', # http://www.pbs.org/wgbh/roadshow/watch/episode/2105-indianapolis-hour-2/ From 9c3e8b16967cb6582712b6b7e4c107631d3873d0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 19 Feb 2025 02:42:18 +0000 Subject: [PATCH 22/25] Release 2025.02.19 Created by: bashonly :ci skip all --- CONTRIBUTORS | 6 ++++++ Changelog.md | 43 +++++++++++++++++++++++++++++++++++++++++++ supportedsites.md | 16 ++++++++++++---- yt_dlp/version.py | 6 +++--- 4 files changed, 64 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 7376b1801..0b90c0e6c 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -736,3 +736,9 @@ NecroRomnt pjrobertson subsense test20140 +arantius +entourage8 +lfavole +mp3butcher +slipinthedove +YoshiTabletopGamer diff --git a/Changelog.md b/Changelog.md index 3232c158b..9c544feb9 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,49 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.02.19 + +#### Core changes +- **jsinterp** + - [Add `js_number_to_string`](https://github.com/yt-dlp/yt-dlp/commit/0d9f061d38c3a4da61972e2adad317079f2f1c84) ([#12110](https://github.com/yt-dlp/yt-dlp/issues/12110)) by [Grub4K](https://github.com/Grub4K) + - [Improve zeroise](https://github.com/yt-dlp/yt-dlp/commit/4ca8c44a073d5aa3a3e3112c35b2b23d6ce25ac6) ([#12313](https://github.com/yt-dlp/yt-dlp/issues/12313)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **acast**: [Support shows.acast.com URLs](https://github.com/yt-dlp/yt-dlp/commit/57c717fee4bfbc9309845bbb48901b72e4b69304) ([#12223](https://github.com/yt-dlp/yt-dlp/issues/12223)) by [barsnick](https://github.com/barsnick) +- **cwtv** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/18a28514e306e822eab4f3a79c76d515bf076406) ([#12207](https://github.com/yt-dlp/yt-dlp/issues/12207)) by [arantius](https://github.com/arantius) + - movie: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/03c3d705778c07739e0034b51490877cffdc0983) ([#12227](https://github.com/yt-dlp/yt-dlp/issues/12227)) by [bashonly](https://github.com/bashonly) +- **digiview**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f53553087d3fde9dcd61d6e9f98caf09db1d8ef2) ([#9902](https://github.com/yt-dlp/yt-dlp/issues/9902)) by [lfavole](https://github.com/lfavole) +- **dropbox**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/861aeec449c8f3c062d962945b234ff0341f61f3) ([#12228](https://github.com/yt-dlp/yt-dlp/issues/12228)) by [bashonly](https://github.com/bashonly) +- **francetv** + - site + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/817483ccc68aed6049ed9c4a2ffae44ca82d2b1c) ([#12236](https://github.com/yt-dlp/yt-dlp/issues/12236)) by [bashonly](https://github.com/bashonly) + - [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/1295bbedd45fa8d9bc3f7a194864ae280297848e) ([#12316](https://github.com/yt-dlp/yt-dlp/issues/12316)) by [bashonly](https://github.com/bashonly) +- **francetvinfo.fr**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5c4c2ddfaa47988b4d50c1ad4988badc0b4f30c2) ([#12402](https://github.com/yt-dlp/yt-dlp/issues/12402)) by [bashonly](https://github.com/bashonly) +- **gem.cbc.ca**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5271ef48c6f61c145e03e18e960995d2e651d205) ([#12404](https://github.com/yt-dlp/yt-dlp/issues/12404)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf) +- **generic**: [Extract `live_status` for DASH manifest URLs](https://github.com/yt-dlp/yt-dlp/commit/19edaa44fcd375f54e63d6227b092f5252d3e889) ([#12256](https://github.com/yt-dlp/yt-dlp/issues/12256)) by [mp3butcher](https://github.com/mp3butcher) +- **globo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f8d0161455f00add65585ca1a476a7b5d56f5f96) ([#11795](https://github.com/yt-dlp/yt-dlp/issues/11795)) by [slipinthedove](https://github.com/slipinthedove), [YoshiTabletopGamer](https://github.com/YoshiTabletopGamer) +- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d59f14a0a7a8b55e6bf468237def62b73ab4a517) ([#12237](https://github.com/yt-dlp/yt-dlp/issues/12237)) by [alard](https://github.com/alard) +- **pbs**: [Support www.thirteen.org URLs](https://github.com/yt-dlp/yt-dlp/commit/9fb8ab2ff67fb699f60cce09163a580976e90c0e) ([#11191](https://github.com/yt-dlp/yt-dlp/issues/11191)) by [rohieb](https://github.com/rohieb) +- **reddit**: [Bypass gated subreddit warning](https://github.com/yt-dlp/yt-dlp/commit/6ca23ffaa4663cb552f937f0b1e9769b66db11bd) ([#12335](https://github.com/yt-dlp/yt-dlp/issues/12335)) by [bashonly](https://github.com/bashonly) +- **twitter**: [Fix syndication token generation](https://github.com/yt-dlp/yt-dlp/commit/14cd7f3443c6da4d49edaefcc12da9dee86e243e) ([#12107](https://github.com/yt-dlp/yt-dlp/issues/12107)) by [Grub4K](https://github.com/Grub4K), [pjrobertson](https://github.com/pjrobertson) +- **youtube** + - [Retry on more critical requests](https://github.com/yt-dlp/yt-dlp/commit/d48e612609d012abbea3785be4d26d78a014abb2) ([#12339](https://github.com/yt-dlp/yt-dlp/issues/12339)) by [coletdjnz](https://github.com/coletdjnz) + - [nsig workaround for `tce` player JS](https://github.com/yt-dlp/yt-dlp/commit/ec17fb16e8d69d4e3e10fb73bf3221be8570dfee) ([#12401](https://github.com/yt-dlp/yt-dlp/issues/12401)) by [bashonly](https://github.com/bashonly) +- **zdf**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/241ace4f104d50fdf7638f9203927aefcf57a1f7) ([#9565](https://github.com/yt-dlp/yt-dlp/issues/9565)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) (With fixes in [e7882b6](https://github.com/yt-dlp/yt-dlp/commit/e7882b682b959e476d8454911655b3e9b14c79b2) by [bashonly](https://github.com/bashonly)) + +#### Downloader changes +- **hls** + - [Fix `BYTERANGE` logic](https://github.com/yt-dlp/yt-dlp/commit/10b7ff68e98f17655e31952f6e17120b2d7dda96) ([#11972](https://github.com/yt-dlp/yt-dlp/issues/11972)) by [entourage8](https://github.com/entourage8) + - [Support `--write-pages` for m3u8 media playlists](https://github.com/yt-dlp/yt-dlp/commit/be69468752ff598cacee57bb80533deab2367a5d) ([#12333](https://github.com/yt-dlp/yt-dlp/issues/12333)) by [bashonly](https://github.com/bashonly) + - [Support `hls_media_playlist_data` format field](https://github.com/yt-dlp/yt-dlp/commit/c987be0acb6872c6561f28aa28171e803393d851) ([#12322](https://github.com/yt-dlp/yt-dlp/issues/12322)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- [Improve Issue/PR templates](https://github.com/yt-dlp/yt-dlp/commit/517ddf3c3f12560ab93e3d36244dc82db9f97818) ([#11499](https://github.com/yt-dlp/yt-dlp/issues/11499)) by [seproDev](https://github.com/seproDev) (With fixes in [4ecb833](https://github.com/yt-dlp/yt-dlp/commit/4ecb833472c90e078567b561fb7c089f1aa9587b) by [bashonly](https://github.com/bashonly)) +- **cleanup**: Miscellaneous: [4985a40](https://github.com/yt-dlp/yt-dlp/commit/4985a4041770eaa0016271809a1fd950dc809a55) by [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **docs**: [Add note to `supportedsites.md`](https://github.com/yt-dlp/yt-dlp/commit/01a63629a21781458dcbd38779898e117678f5ff) ([#12382](https://github.com/yt-dlp/yt-dlp/issues/12382)) by [seproDev](https://github.com/seproDev) +- **test**: download: [Validate and sort info dict fields](https://github.com/yt-dlp/yt-dlp/commit/208163447408c78673b08c172beafe5c310fb167) ([#12299](https://github.com/yt-dlp/yt-dlp/issues/12299)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612) + ### 2025.01.26 #### Core changes diff --git a/supportedsites.md b/supportedsites.md index 70909ef00..0924c88ff 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1,4 +1,10 @@ # Supported sites + +Below is a list of all extractors that are currently included with yt-dlp. +If a site is not listed here, it might still be supported by yt-dlp's embed extraction or generic extractor. +Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them. +The only reliable way to check if a site is supported is to try it. + - **17live** - **17live:clip** - **1News**: 1news.co.nz article videos @@ -314,7 +320,8 @@ # Supported sites - **curiositystream**: [*curiositystream*](## "netrc machine") - **curiositystream:collections**: [*curiositystream*](## "netrc machine") - **curiositystream:series**: [*curiositystream*](## "netrc machine") - - **CWTV** + - **cwtv** + - **cwtv:movie** - **Cybrary**: [*cybrary*](## "netrc machine") - **CybraryCourse**: [*cybrary*](## "netrc machine") - **DacastPlaylist** @@ -349,6 +356,7 @@ # Supported sites - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** + - **Digiview** - **DiscogsReleasePlaylist** - **DiscoveryLife** - **DiscoveryNetworksDe** @@ -465,9 +473,9 @@ # Supported sites - **fptplay**: fptplay.vn - **FranceCulture** - **FranceInter** - - **FranceTV** + - **francetv** + - **francetv:site** - **francetvinfo.fr** - - **FranceTVSite** - **Freesound** - **freespeech.org** - **freetv:series** @@ -499,7 +507,7 @@ # Supported sites - **GediDigital** - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") - **gem.cbc.ca:live** - - **gem.cbc.ca:playlist** + - **gem.cbc.ca:playlist**: [*cbcgem*](## "netrc machine") - **Genius** - **GeniusLyrics** - **Germanupa**: germanupa.de diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 4f7a2ec90..7346ca49c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.01.26' +__version__ = '2025.02.19' -RELEASE_GIT_HEAD = '3b4531934465580be22937fecbb6e1a3a9e2334f' +RELEASE_GIT_HEAD = '4985a4041770eaa0016271809a1fd950dc809a55' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.01.26' +_pkg_version = '2025.02.19' From c2e6e1d5f77f3b720a6266f2869eb750d20e5dc1 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 20 Feb 2025 09:39:06 -0600 Subject: [PATCH 23/25] [ie/niconico:live] Fix thumbnail extraction (#12419) Closes #12417 Authored by: bashonly --- yt_dlp/extractor/niconico.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 2f04de9e2..77a22777b 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -13,11 +13,13 @@ ExtractorError, OnDemandPagedList, clean_html, + determine_ext, float_or_none, int_or_none, join_nonempty, parse_duration, parse_iso8601, + parse_qs, parse_resolution, qualities, remove_start, @@ -1033,6 +1035,7 @@ def _real_extract(self, url): thumbnails.append({ 'id': f'{name}_{width}x{height}', 'url': img_url, + 'ext': traverse_obj(parse_qs(img_url), ('image', 0, {determine_ext(default_ext='jpg')})), **res, }) From 9deed13d7cce6d3647379e50589c92de89227509 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 20 Feb 2025 09:51:08 -0600 Subject: [PATCH 24/25] [ie/soundcloud] Extract tags (#12420) Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index eafa306f2..a524efc21 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -53,6 +53,7 @@ class SoundcloudBaseIE(InfoExtractor): _HEADERS = {} _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' + _TAGS_RE = re.compile(r'"([^"]+)"|([^ ]+)') _ARTWORK_MAP = { 'mini': 16, @@ -372,6 +373,7 @@ def extract_count(key): 'comment_count': extract_count('comment'), 'repost_count': extract_count('reposts'), 'genres': traverse_obj(info, ('genre', {str}, filter, all, filter)), + 'tags': traverse_obj(info, ('tag_list', {self._TAGS_RE.findall}, ..., ..., filter)), 'artists': traverse_obj(info, ('publisher_metadata', 'artist', {str}, filter, all, filter)), 'formats': formats if not extract_flat else None, } @@ -425,6 +427,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'repost_count': int, 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'uploader_url': 'https://soundcloud.com/ethmusic', + 'tags': 'count:14', }, }, # geo-restricted @@ -440,7 +443,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_id': '9615865', 'timestamp': 1337635207, 'upload_date': '20120521', - 'duration': 227.155, + 'duration': 227.103, 'license': 'all-rights-reserved', 'view_count': int, 'like_count': int, @@ -450,6 +453,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'genres': ['Alternative'], 'artists': ['The Royal Concept'], + 'tags': [], }, }, # private link @@ -475,6 +479,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_url': 'https://soundcloud.com/jaimemf', 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', 'genres': ['youtubedl'], + 'tags': [], }, }, # private link (alt format) @@ -500,15 +505,16 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_url': 'https://soundcloud.com/jaimemf', 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', 'genres': ['youtubedl'], + 'tags': [], }, }, # downloadable song { 'url': 'https://soundcloud.com/the80m/the-following', - 'md5': '9ffcddb08c87d74fb5808a3c183a1d04', + 'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04' 'info_dict': { 'id': '343609555', - 'ext': 'wav', + 'ext': 'opus', # wav original available with auth 'title': 'The Following', 'track': 'The Following', 'description': '', @@ -526,15 +532,18 @@ class SoundcloudIE(SoundcloudBaseIE): 'view_count': int, 'genres': ['Dance & EDM'], 'artists': ['80M'], + 'tags': ['80M', 'EDM', 'Dance', 'Music'], }, + 'expected_warnings': ['Original download format is only available for registered users'], }, # private link, downloadable format + # tags with spaces (e.g. "Uplifting Trance", "Ori Uplift") { 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', - 'md5': '64a60b16e617d41d0bef032b7f55441e', + 'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e' 'info_dict': { 'id': '340344461', - 'ext': 'wav', + 'ext': 'opus', # wav original available with auth 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', @@ -552,7 +561,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'uploader_url': 'https://soundcloud.com/oriuplift', 'genres': ['Trance'], 'artists': ['Ori Uplift'], + 'tags': ['Orchestral', 'Emotional', 'Uplifting Trance', 'Trance', 'Ori Uplift', 'UpOnly'], }, + 'expected_warnings': ['Original download format is only available for registered users'], }, # no album art, use avatar pic for thumbnail { @@ -577,6 +588,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'repost_count': int, 'uploader_url': 'https://soundcloud.com/garyvee', 'artists': ['MadReal'], + 'tags': [], }, 'params': { 'skip_download': True, @@ -604,6 +616,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'repost_count': int, 'genres': ['Piano'], 'uploader_url': 'https://soundcloud.com/giovannisarani', + 'tags': 'count:10', }, }, { From f7a1f2d8132967a62b0f6d5665c6d2dde2d42c09 Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Thu, 20 Feb 2025 20:33:31 +0100 Subject: [PATCH 25/25] [core] Support emitting ConEmu progress codes (#10649) Authored by: Grub4K --- yt_dlp/YoutubeDL.py | 42 ++++++++++++++++++---------------- yt_dlp/downloader/common.py | 14 ++++++++---- yt_dlp/postprocessor/common.py | 3 ++- yt_dlp/utils/_utils.py | 30 ++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 26 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3293a9076..7026822b6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -157,7 +157,7 @@ write_json_file, write_string, ) -from .utils._utils import _UnsafeExtensionError, _YDLLogger +from .utils._utils import _UnsafeExtensionError, _YDLLogger, _ProgressState from .utils.networking import ( HTTPHeaderDict, clean_headers, @@ -642,20 +642,19 @@ def __init__(self, params=None, auto_init=True): self.cache = Cache(self) self.__header_cookies = [] + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, - console=None if os.name == 'nt' else next( - filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), + console=next(filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) - try: - windows_enable_vt_mode() - except Exception as e: - self.write_debug(f'Failed to enable VT mode: {e}') - if self.params.get('no_color'): if self.params.get('color') is not None: self.params.setdefault('_warnings', []).append( @@ -956,21 +955,22 @@ def to_stderr(self, message, only_once=False): self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): - if os.name == 'nt' or not self._out_files.console: - return + if not supports_terminal_sequences(self._out_files.console): + return False self._write_string(code, self._out_files.console) + return True - def to_console_title(self, message): - if not self.params.get('consoletitle', False): + def to_console_title(self, message=None, progress_state=None, percent=None): + if not self.params.get('consoletitle'): return - message = remove_terminal_sequences(message) - if os.name == 'nt': - if ctypes.windll.kernel32.GetConsoleWindow(): - # c_wchar_p() might not be necessary if `message` is - # already of type unicode() - ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) - else: - self._send_console_code(f'\033]0;{message}\007') + + if message: + success = self._send_console_code(f'\033]0;{remove_terminal_sequences(message)}\007') + if not success and os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): + ctypes.windll.kernel32.SetConsoleTitleW(message) + + if isinstance(progress_state, _ProgressState): + self._send_console_code(progress_state.get_ansi_escape(percent)) def save_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): @@ -984,6 +984,7 @@ def restore_console_title(self): def __enter__(self): self.save_console_title() + self.to_console_title(progress_state=_ProgressState.INDETERMINATE) return self def save_cookies(self): @@ -992,6 +993,7 @@ def save_cookies(self): def __exit__(self, *args): self.restore_console_title() + self.to_console_title(progress_state=_ProgressState.HIDDEN) self.close() def close(self): diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index e8dcb37cc..bb9303f8a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -31,6 +31,7 @@ timetuple_from_msec, try_call, ) +from ..utils._utils import _ProgressState class FileDownloader: @@ -333,7 +334,7 @@ def _report_progress_status(self, s, default_template): progress_dict), s.get('progress_idx') or 0) self.to_console_title(self.ydl.evaluate_outtmpl( progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', - progress_dict)) + progress_dict), _ProgressState.from_dict(s), s.get('_percent')) def _format_progress(self, *args, **kwargs): return self.ydl._format_text( @@ -357,6 +358,7 @@ def with_fields(*tups, default=''): '_speed_str': self.format_speed(speed).strip(), '_total_bytes_str': _format_bytes('total_bytes'), '_elapsed_str': self.format_seconds(s.get('elapsed')), + '_percent': 100.0, '_percent_str': self.format_percent(100), }) self._report_progress_status(s, join_nonempty( @@ -375,13 +377,15 @@ def with_fields(*tups, default=''): return self._progress_delta_time += update_delta + progress = try_call( + lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], + lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], + lambda: s['downloaded_bytes'] == 0 and 0) s.update({ '_eta_str': self.format_eta(s.get('eta')).strip(), '_speed_str': self.format_speed(s.get('speed')), - '_percent_str': self.format_percent(try_call( - lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], - lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], - lambda: s['downloaded_bytes'] == 0 and 0)), + '_percent': progress, + '_percent_str': self.format_percent(progress), '_total_bytes_str': _format_bytes('total_bytes'), '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index be2bb33f6..f0a71c1ff 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -10,6 +10,7 @@ _configuration_args, deprecation_warning, ) +from ..utils._utils import _ProgressState class PostProcessorMetaClass(type): @@ -189,7 +190,7 @@ def report_progress(self, s): self._downloader.to_console_title(self._downloader.evaluate_outtmpl( progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s', - progress_dict)) + progress_dict), _ProgressState.from_dict(s), s.get('_percent')) def _retry_download(self, err, count, retries): # While this is not an extractor, it behaves similar to one and diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index c6a90a0dd..3e7a375ee 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -8,6 +8,7 @@ import datetime as dt import email.header import email.utils +import enum import errno import functools import hashlib @@ -5677,3 +5678,32 @@ def stdout(self, message): def stderr(self, message): if self._ydl: self._ydl.to_stderr(message) + + +class _ProgressState(enum.Enum): + """ + Represents a state for a progress bar. + + See: https://conemu.github.io/en/AnsiEscapeCodes.html#ConEmu_specific_OSC + """ + + HIDDEN = 0 + INDETERMINATE = 3 + VISIBLE = 1 + WARNING = 4 + ERROR = 2 + + @classmethod + def from_dict(cls, s, /): + if s['status'] == 'finished': + return cls.INDETERMINATE + + # Not currently used + if s['status'] == 'error': + return cls.ERROR + + return cls.INDETERMINATE if s.get('_percent') is None else cls.VISIBLE + + def get_ansi_escape(self, /, percent=None): + percent = 0 if percent is None else int(percent) + return f'\033]9;4;{self.value};{percent}\007'